Trinity-RFT
Tutorial
Quick Start
Off-Policy RFT
Asynchronous RFT
Multi-Turn RFT
Offline DPO
Data Processing
Configuration Guide
Developer Guide
API Reference
trinity.buffer
trinity.explorer
trinity.trainer
trinity.manager
trinity.common
trinity.utils
Trinity-RFT
Index
Edit on GitHub
Index
_
|
A
|
B
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
K
|
L
|
M
|
N
|
O
|
P
|
Q
|
R
|
S
|
T
|
U
|
V
|
W
_
__init__() (trinity.buffer.reader.file_reader.DPODataReader method)
(trinity.buffer.reader.file_reader.RolloutDataReader method)
(trinity.buffer.reader.file_reader.SFTDataReader method)
(trinity.buffer.reader.queue_reader.QueueReader method)
(trinity.buffer.reader.sql_reader.SQLReader method)
(trinity.buffer.schema.Base method)
(trinity.buffer.writer.queue_writer.QueueWriter method)
(trinity.buffer.writer.sql_writer.SQLWriter method)
(trinity.common.config.AlgorithmConfig method)
(trinity.common.config.BufferConfig method)
(trinity.common.config.ClusterConfig method)
(trinity.common.config.Config method)
(trinity.common.config.DataProcessorConfig method)
(trinity.common.config.ExplorerConfig method)
(trinity.common.config.ExplorerInput method)
(trinity.common.config.FormatConfig method)
(trinity.common.config.GenerationConfig method)
(trinity.common.config.InferenceModelConfig method)
(trinity.common.config.ModelConfig method)
(trinity.common.config.MonitorConfig method)
(trinity.common.config.StorageConfig method)
(trinity.common.config.SynchronizerConfig method)
(trinity.common.config.TrainerConfig method)
(trinity.common.config.TrainerInput method)
(trinity.common.experience.Experience method)
(trinity.common.experience.Experiences method)
(trinity.common.models.model.ModelWrapper method)
(trinity.common.models.vllm_async_model.vLLMAysncRolloutModel method)
(trinity.common.models.vllm_model.vLLMRolloutModel method)
(trinity.common.rewards.accuracy_reward.AccuracyRewardShapper method)
(trinity.common.rewards.AccuracyReward method)
(trinity.common.rewards.composite_reward.CompositeRewardShapper method)
(trinity.common.rewards.format_reward.FormatRewardShapper method)
(trinity.common.rewards.FormatReward method)
(trinity.common.rewards.reward_fn.AccuracyReward method)
(trinity.common.rewards.reward_fn.CountDownRewardFn method)
(trinity.common.rewards.reward_fn.FormatReward method)
(trinity.common.rewards.reward_fn.MathRewardFn method)
(trinity.common.schema.DPODataModel method)
(trinity.common.schema.ExperienceModel method)
(trinity.common.schema.RftDatasetModel method)
(trinity.common.schema.SFTDataModel method)
(trinity.common.schema.TaskModel method)
(trinity.common.verl_config.Actor method)
(trinity.common.verl_config.ActorModel method)
(trinity.common.verl_config.ActorRolloutRef method)
(trinity.common.verl_config.Algorithm method)
(trinity.common.verl_config.Checkpoint method)
(trinity.common.verl_config.Critic method)
(trinity.common.verl_config.CriticModel method)
(trinity.common.verl_config.CustomRewardFunction method)
(trinity.common.verl_config.Data method)
(trinity.common.verl_config.FSDPConfig method)
(trinity.common.verl_config.KL_Ctrl method)
(trinity.common.verl_config.Optim method)
(trinity.common.verl_config.Ref method)
(trinity.common.verl_config.RewardModel method)
(trinity.common.verl_config.Rollout method)
(trinity.common.verl_config.Trainer method)
(trinity.common.verl_config.veRLConfig method)
(trinity.common.verl_config.WrapPolicy method)
(trinity.common.workflows.AlfworldWorkflow method)
(trinity.common.workflows.MathWorkflow method)
(trinity.common.workflows.SciWorldWorkflow method)
(trinity.common.workflows.SimpleWorkflow method)
(trinity.common.workflows.Task method)
(trinity.common.workflows.WebShopWorkflow method)
(trinity.common.workflows.workflow.MathWorkflow method)
(trinity.common.workflows.workflow.MultiTurnWorkflow method)
(trinity.common.workflows.workflow.SimpleWorkflow method)
(trinity.common.workflows.workflow.Task method)
(trinity.common.workflows.workflow.Workflow method)
(trinity.explorer.runner_pool.RunnerPool method)
(trinity.explorer.RunnerPool method)
(trinity.explorer.workflow_runner.Status method)
(trinity.manager.config_manager.ConfigManager method)
(trinity.manager.config_registry.config_registry.ConfigRegistry method)
(trinity.manager.manager.CacheManager method)
(trinity.trainer.verl.core_algos.AdaptiveKLController method)
(trinity.trainer.verl.core_algos.FixedKLController method)
(trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
(trinity.trainer.verl.fsdp_workers.CriticWorker method)
(trinity.trainer.verl.fsdp_workers.RewardModelWorker method)
(trinity.trainer.verl.ray_trainer.RayPPOTrainer method)
(trinity.trainer.verl.ray_trainer.ResourcePoolManager method)
(trinity.trainer.verl_trainer.VerlPPOTrainerWrapper method)
(trinity.utils.log.NewLineFormatter method)
(trinity.utils.monitor.Monitor method)
(trinity.utils.monitor.TensorboardMonitor method)
(trinity.utils.monitor.WandbMonitor method)
(trinity.utils.registry.Registry method)
A
AccuracyReward (class in trinity.common.rewards)
(class in trinity.common.rewards.reward_fn)
AccuracyRewardShapper (class in trinity.common.rewards.accuracy_reward)
action_mask (trinity.common.experience.Experience attribute)
action_masks (trinity.common.experience.Experiences attribute)
Actor (class in trinity.common.verl_config)
actor (trinity.common.verl_config.ActorRolloutRef attribute)
Actor (trinity.trainer.verl.ray_trainer.Role attribute)
actor_clip_ratio (trinity.common.config.TrainerConfig attribute)
actor_entropy_coef (trinity.common.config.TrainerConfig attribute)
actor_grad_clip (trinity.common.config.TrainerConfig attribute)
actor_kl_loss_coef (trinity.common.config.TrainerConfig attribute)
actor_rollout_ref (trinity.common.verl_config.veRLConfig attribute)
actor_use_kl_loss (trinity.common.config.TrainerConfig attribute)
ActorModel (class in trinity.common.verl_config)
ActorRollout (trinity.trainer.verl.ray_trainer.Role attribute)
ActorRolloutRef (class in trinity.common.verl_config)
(trinity.trainer.verl.ray_trainer.Role attribute)
ActorRolloutRefWorker (class in trinity.trainer.verl.fsdp_workers)
AdaptiveKLController (class in trinity.trainer.verl.core_algos)
adv_estimator (trinity.common.verl_config.Algorithm attribute)
AdvantageEstimator (class in trinity.trainer.verl.ray_trainer)
agent_model_config (trinity.common.config.DataProcessorConfig attribute)
agent_model_name (trinity.common.config.DataProcessorConfig attribute)
AlfworldWorkflow (class in trinity.common.workflows)
Algorithm (class in trinity.common.verl_config)
algorithm (trinity.common.config.Config attribute)
(trinity.common.verl_config.veRLConfig attribute)
algorithm_type (trinity.common.config.AlgorithmConfig attribute)
(trinity.common.config.StorageConfig attribute)
(trinity.common.verl_config.Actor attribute)
AlgorithmConfig (class in trinity.common.config)
AlgorithmType (class in trinity.common.constants)
api_server_ready() (trinity.common.models.vllm_async_model.vLLMAysncRolloutModel method)
apply_kl_penalty() (in module trinity.trainer.verl.ray_trainer)
attention_masks (trinity.common.experience.Experiences attribute)
auxiliary_models (trinity.common.config.ExplorerConfig attribute)
B
balance_batch (trinity.common.verl_config.Trainer attribute)
Base (class in trinity.buffer.schema)
batch_shape() (trinity.common.rewards.accuracy_reward.AccuracyRewardShapper method)
(trinity.common.rewards.base.RewardShapper method)
(trinity.common.rewards.format_reward.FormatRewardShapper method)
batch_size (trinity.common.config.BufferConfig attribute)
(trinity.common.experience.Experiences property)
beginner_mode() (trinity.manager.config_manager.ConfigManager method)
beta1 (trinity.common.verl_config.Optim attribute)
beta2 (trinity.common.verl_config.Optim attribute)
buffer (trinity.common.config.Config attribute)
(trinity.common.verl_config.veRLConfig attribute)
BufferConfig (class in trinity.common.config)
BufferReader (class in trinity.buffer.buffer_reader)
BufferWriter (class in trinity.buffer.buffer_writer)
bundle_indices (trinity.common.config.InferenceModelConfig attribute)
C
cache_dir (trinity.common.config.MonitorConfig attribute)
CacheManager (class in trinity.manager.manager)
calculate_metrics() (trinity.utils.monitor.Monitor method)
CaseInsensitiveEnum (class in trinity.common.constants)
CaseInsensitiveEnumMeta (class in trinity.common.constants)
chat() (trinity.common.models.model.InferenceModel method)
(trinity.common.models.model.ModelWrapper method)
(trinity.common.models.vllm_model.vLLMRolloutModel method)
chat_async() (trinity.common.models.model.InferenceModel method)
(trinity.common.models.vllm_async_model.vLLMAysncRolloutModel method)
chat_template (trinity.common.config.FormatConfig attribute)
(trinity.common.config.InferenceModelConfig attribute)
CHATPAIR (trinity.common.constants.PromptType attribute)
check_and_update() (trinity.common.config.Config method)
check_auxiliary_models() (in module trinity.manager.config_registry.explorer_config_manager)
check_checkpoint_root_dir() (in module trinity.manager.config_registry.model_config_manager)
check_experience_buffer_path() (in module trinity.manager.config_registry.buffer_config_manager)
check_model_path() (in module trinity.manager.config_registry.model_config_manager)
check_resume_from_path() (in module trinity.manager.config_registry.trainer_config_manager)
check_sft_warmup_dataset_path() (in module trinity.manager.config_registry.buffer_config_manager)
check_taskset_path() (in module trinity.manager.config_registry.buffer_config_manager)
check_tensor_parallel_size() (in module trinity.manager.config_registry.explorer_config_manager)
check_train_batch_size() (in module trinity.manager.config_registry.buffer_config_manager)
Checkpoint (class in trinity.common.verl_config)
CHECKPOINT (trinity.common.constants.SyncMethod attribute)
checkpoint (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
checkpoint_job_dir (trinity.common.config.Config attribute)
checkpoint_root_dir (trinity.common.config.Config attribute)
chosen (trinity.buffer.schema.sql_schema.DPODataModel attribute)
(trinity.common.experience.Experience attribute)
(trinity.common.schema.DPODataModel attribute)
(trinity.common.schema.RftDatasetModel attribute)
chosen_key (trinity.common.config.FormatConfig attribute)
clean_strategy (trinity.common.config.DataProcessorConfig attribute)
clear_optimizer_state() (trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
(trinity.trainer.verl.fsdp_workers.CriticWorker method)
clip_ratio (trinity.common.verl_config.Actor attribute)
cliprange_value (trinity.common.verl_config.Critic attribute)
close() (trinity.utils.monitor.Monitor method)
(trinity.utils.monitor.TensorboardMonitor method)
(trinity.utils.monitor.WandbMonitor method)
cluster (trinity.common.config.Config attribute)
ClusterConfig (class in trinity.common.config)
CompositeRewardShapper (class in trinity.common.rewards.composite_reward)
compute_advantage() (in module trinity.trainer.verl.ray_trainer)
compute_advantage_opmd() (in module trinity.trainer.verl.ray_trainer)
compute_advantage_ppo() (in module trinity.trainer.verl.ray_trainer)
compute_entropy_loss() (in module trinity.trainer.verl.core_algos)
compute_gae_advantage_return() (in module trinity.trainer.verl.core_algos)
compute_grpo_outcome_advantage() (in module trinity.trainer.verl.core_algos)
compute_log_prob() (trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
compute_opmd_outcome_advantage() (in module trinity.trainer.verl.core_algos)
compute_policy_loss() (in module trinity.trainer.verl.core_algos)
compute_policy_loss_dpo() (in module trinity.trainer.verl.core_algos)
compute_policy_loss_opmd() (in module trinity.trainer.verl.core_algos)
compute_policy_loss_pairwise_opmd() (in module trinity.trainer.verl.core_algos)
compute_policy_loss_ppo() (in module trinity.trainer.verl.core_algos)
compute_policy_loss_sft() (in module trinity.trainer.verl.core_algos)
compute_ref_log_prob() (trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
compute_reinforce_plus_plus_outcome_advantage() (in module trinity.trainer.verl.core_algos)
compute_remax_outcome_advantage() (in module trinity.trainer.verl.core_algos)
compute_response_mask() (in module trinity.trainer.verl.ray_trainer)
compute_rewards() (in module trinity.trainer.verl.core_algos)
compute_rloo_outcome_advantage() (in module trinity.trainer.verl.core_algos)
compute_rm_score() (trinity.trainer.verl.fsdp_workers.RewardModelWorker method)
compute_value_loss() (in module trinity.trainer.verl.core_algos)
compute_values() (trinity.trainer.verl.fsdp_workers.CriticWorker method)
Config (class in trinity.common.config)
ConfigManager (class in trinity.manager.config_manager)
ConfigRegistry (class in trinity.manager.config_registry.config_registry)
consumed (trinity.buffer.schema.sql_schema.DPODataModel attribute)
(trinity.buffer.schema.sql_schema.ExperienceModel attribute)
(trinity.buffer.schema.sql_schema.SFTDataModel attribute)
(trinity.common.schema.DPODataModel attribute)
(trinity.common.schema.ExperienceModel attribute)
(trinity.common.schema.SFTDataModel attribute)
consumed_cnt (trinity.common.schema.RftDatasetModel attribute)
contents (trinity.common.verl_config.Checkpoint attribute)
convert_messages_to_experience() (trinity.common.models.model.InferenceModel method)
(trinity.common.models.model.ModelWrapper method)
(trinity.common.models.vllm_model.vLLMRolloutModel method)
convert_messages_to_experience_async() (trinity.common.models.model.InferenceModel method)
(trinity.common.models.vllm_async_model.vLLMAysncRolloutModel method)
CountDownRewardFn (class in trinity.common.rewards.reward_fn)
create_device_mesh() (in module trinity.trainer.verl.fsdp_workers)
create_dynamic_table() (in module trinity.buffer.schema)
(in module trinity.buffer.schema.sql_schema)
create_inference_models() (in module trinity.common.models)
create_resource_pool() (trinity.trainer.verl.ray_trainer.ResourcePoolManager method)
Critic (class in trinity.common.verl_config)
critic (trinity.common.verl_config.veRLConfig attribute)
Critic (trinity.trainer.verl.ray_trainer.Role attribute)
critic_model_path (trinity.common.config.ModelConfig attribute)
critic_warmup (trinity.common.verl_config.Trainer attribute)
CriticModel (class in trinity.common.verl_config)
CriticWorker (class in trinity.trainer.verl.fsdp_workers)
custom_reward_function (trinity.common.verl_config.veRLConfig attribute)
CustomRewardFunction (class in trinity.common.verl_config)
D
Data (class in trinity.common.verl_config)
data (trinity.common.verl_config.veRLConfig attribute)
data_dist (trinity.common.config.DataProcessorConfig attribute)
data_processor (trinity.common.config.Config attribute)
data_workflow_url (trinity.common.config.DataProcessorConfig attribute)
DataProcessorConfig (class in trinity.common.config)
db_url (trinity.common.config.DataProcessorConfig attribute)
DEFAULT (trinity.common.constants.ReadStrategy attribute)
DEFAULT_ANSWER_PARSER() (trinity.common.rewards.reward_fn.MathRewardFn method)
default_config (trinity.manager.config_registry.config_registry.ConfigRegistry property)
DEFAULT_FORMAT_PATTERN (trinity.common.rewards.reward_fn.MathRewardFn attribute)
default_hdfs_dir (trinity.common.verl_config.Trainer attribute)
default_local_dir (trinity.common.verl_config.Trainer attribute)
default_reward_fn_type (trinity.common.config.ExplorerInput attribute)
(trinity.common.config.StorageConfig attribute)
default_workflow_type (trinity.common.config.ExplorerInput attribute)
(trinity.common.config.StorageConfig attribute)
del_local_ckpt_after_load (trinity.common.verl_config.Trainer attribute)
deserialize() (trinity.common.experience.Experience static method)
difficulty_score (trinity.common.schema.RftDatasetModel attribute)
difficulty_score_detail (trinity.common.schema.RftDatasetModel attribute)
diversity_score (trinity.common.schema.RftDatasetModel attribute)
diversity_score_detail (trinity.common.schema.RftDatasetModel attribute)
dj_config_path (trinity.common.config.DataProcessorConfig attribute)
dj_process_desc (trinity.common.config.DataProcessorConfig attribute)
DPO (trinity.common.constants.AlgorithmType attribute)
DPODataModel (class in trinity.buffer.schema.sql_schema)
(class in trinity.common.schema)
DPODataReader (class in trinity.buffer.reader.file_reader)
dtype (trinity.common.config.InferenceModelConfig attribute)
dummy_add_signal_handler() (in module trinity.common.models.openai_api)
E
enable (trinity.common.verl_config.RewardModel attribute)
enable_chunked_prefill (trinity.common.config.InferenceModelConfig attribute)
enable_exp_buffer (trinity.common.verl_config.Trainer attribute)
enable_gradient_checkpointing (trinity.common.verl_config.ActorModel attribute)
(trinity.common.verl_config.CriticModel attribute)
enable_openai_api (trinity.common.config.InferenceModelConfig attribute)
enable_prefix_caching (trinity.common.config.InferenceModelConfig attribute)
enable_preview (trinity.common.config.TrainerConfig attribute)
(trinity.common.verl_config.veRLConfig attribute)
enable_thinking (trinity.common.config.InferenceModelConfig attribute)
enforce_eager (trinity.common.config.InferenceModelConfig attribute)
engine_num (trinity.common.config.InferenceModelConfig attribute)
engine_type (trinity.common.config.InferenceModelConfig attribute)
entropy_coeff (trinity.common.verl_config.Actor attribute)
EVAL (trinity.common.constants.TaskType attribute)
eval_interval (trinity.common.config.ExplorerConfig attribute)
eval_on_latest_checkpoint (trinity.common.config.ExplorerConfig attribute)
eval_tasksets (trinity.common.config.ExplorerInput attribute)
evaluate_equation() (in module trinity.utils.eval_utils)
Experience (class in trinity.common.experience)
experience_buffer (trinity.common.config.TrainerInput attribute)
ExperienceModel (class in trinity.buffer.schema.sql_schema)
(class in trinity.common.schema)
Experiences (class in trinity.common.experience)
experiment_name (trinity.common.verl_config.Trainer attribute)
expert_mode() (trinity.manager.config_manager.ConfigManager method)
EXPLORE (trinity.common.constants.TaskType attribute)
explorer (trinity.common.config.Config attribute)
explorer_input (trinity.common.config.BufferConfig attribute)
explorer_output (trinity.common.config.BufferConfig attribute)
explorer_visible() (in module trinity.manager.config_registry.explorer_config_manager)
explorer_world_size (trinity.common.config.SynchronizerConfig attribute)
ExplorerConfig (class in trinity.common.config)
ExplorerInput (class in trinity.common.config)
external_lib (trinity.common.verl_config.ActorModel attribute)
(trinity.common.verl_config.CriticModel attribute)
extract_solution() (in module trinity.utils.eval_utils)
F
FIFO (trinity.common.constants.ReadStrategy attribute)
FILE (trinity.common.constants.StorageType attribute)
find_boxed_answer() (in module trinity.utils.eval_utils)
finish() (trinity.buffer.buffer_writer.BufferWriter method)
(trinity.buffer.writer.queue_writer.QueueWriter method)
(trinity.buffer.writer.sql_writer.SQLWriter method)
fit() (trinity.trainer.verl.ray_trainer.RayPPOTrainer method)
FixedKLController (class in trinity.trainer.verl.core_algos)
format (trinity.common.config.DataProcessorConfig attribute)
(trinity.common.config.StorageConfig attribute)
format() (trinity.utils.log.NewLineFormatter method)
format_args (trinity.common.workflows.Task attribute)
(trinity.common.workflows.workflow.Task attribute)
format_messages() (trinity.common.workflows.SimpleWorkflow method)
(trinity.common.workflows.workflow.SimpleWorkflow method)
FormatConfig (class in trinity.common.config)
FormatReward (class in trinity.common.rewards)
(class in trinity.common.rewards.reward_fn)
FormatRewardShapper (class in trinity.common.rewards.format_reward)
forward_max_token_len_per_gpu (trinity.common.verl_config.Critic attribute)
(trinity.common.verl_config.RewardModel attribute)
forward_micro_batch_size (trinity.common.verl_config.Critic attribute)
forward_micro_batch_size_per_gpu (trinity.common.verl_config.Critic attribute)
from_experience() (trinity.buffer.schema.sql_schema.ExperienceModel class method)
(trinity.common.schema.ExperienceModel static method)
from_id (trinity.common.schema.RftDatasetModel attribute)
from_messages() (trinity.buffer.schema.sql_schema.SFTDataModel class method)
(trinity.common.schema.SFTDataModel class method)
from_model (trinity.common.schema.RftDatasetModel attribute)
from_recipe (trinity.common.schema.RftDatasetModel attribute)
fsdp_config (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.CriticModel attribute)
(trinity.common.verl_config.Ref attribute)
fsdp_size (trinity.common.verl_config.FSDPConfig attribute)
FSDPConfig (class in trinity.common.verl_config)
G
GAE (trinity.trainer.verl.ray_trainer.AdvantageEstimator attribute)
gamma (trinity.common.config.AlgorithmConfig attribute)
(trinity.common.verl_config.Algorithm attribute)
gather_dpo_experiences() (trinity.common.experience.Experiences class method)
gather_experiences() (trinity.common.experience.Experiences class method)
generate() (trinity.common.models.model.InferenceModel method)
(trinity.common.models.model.ModelWrapper method)
(trinity.common.models.vllm_model.vLLMRolloutModel method)
generate_async() (trinity.common.models.model.InferenceModel method)
(trinity.common.models.vllm_async_model.vLLMAysncRolloutModel method)
generate_config() (trinity.manager.config_manager.ConfigManager method)
generate_env_inference_samples() (trinity.common.workflows.AlfworldWorkflow method)
(trinity.common.workflows.SciWorldWorkflow method)
(trinity.common.workflows.WebShopWorkflow method)
generate_sequences() (trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
GenerationConfig (class in trinity.common.config)
get() (trinity.manager.config_registry.config_registry.ConfigRegistry method)
(trinity.utils.registry.Registry method)
get_available_address() (trinity.common.models.model.InferenceModel method)
get_buffer_reader() (in module trinity.buffer)
(in module trinity.buffer.buffer)
get_buffer_writer() (in module trinity.buffer)
(in module trinity.buffer.buffer)
get_check_func() (trinity.manager.config_registry.config_registry.ConfigRegistry method)
get_checkpoint_dir_with_step_num() (in module trinity.common.models.utils)
get_ckp_version() (trinity.common.models.model.InferenceModel method)
(trinity.common.models.model.ModelWrapper method)
(trinity.common.models.vllm_async_model.vLLMAysncRolloutModel method)
(trinity.common.models.vllm_model.vLLMRolloutModel method)
get_configs() (trinity.manager.config_manager.ConfigManager method)
(trinity.manager.config_registry.config_registry.ConfigRegistry method)
get_dlc_env_vars() (in module trinity.utils.dlc_utils)
get_kl_controller() (in module trinity.trainer.verl.core_algos)
get_logger() (in module trinity.utils.log)
get_model_response() (trinity.common.workflows.AlfworldWorkflow method)
(trinity.common.workflows.SciWorldWorkflow method)
(trinity.common.workflows.WebShopWorkflow method)
get_model_response_text() (trinity.common.workflows.AlfworldWorkflow method)
(trinity.common.workflows.SciWorldWorkflow method)
(trinity.common.workflows.WebShopWorkflow method)
get_n_gpus() (trinity.trainer.verl.ray_trainer.ResourcePoolManager method)
get_next() (trinity.explorer.runner_pool.RunnerPool method)
(trinity.explorer.RunnerPool method)
get_next_unorder() (trinity.explorer.runner_pool.RunnerPool method)
(trinity.explorer.RunnerPool method)
get_openai_client() (trinity.common.models.model.ModelWrapper method)
get_resource_pool() (trinity.trainer.verl.ray_trainer.ResourcePoolManager method)
get_sharding_strategy() (in module trinity.trainer.verl.fsdp_workers)
get_trainer_wrapper() (in module trinity.manager)
(in module trinity.trainer)
(in module trinity.trainer.trainer)
get_verl_checkpoint_dir() (in module trinity.common.models.utils)
gpu_memory_utilization (trinity.common.config.InferenceModelConfig attribute)
gpu_per_node (trinity.common.config.ClusterConfig attribute)
grad_clip (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
GRPO (trinity.common.constants.AlgorithmType attribute)
(trinity.trainer.verl.ray_trainer.AdvantageEstimator attribute)
H
has_api_server() (trinity.common.models.vllm_async_model.vLLMAysncRolloutModel method)
(trinity.common.models.vllm_model.vLLMRolloutModel method)
has_free() (trinity.explorer.runner_pool.RunnerPool method)
(trinity.explorer.RunnerPool method)
has_next() (trinity.explorer.runner_pool.RunnerPool method)
(trinity.explorer.RunnerPool method)
horizon (trinity.common.verl_config.KL_Ctrl attribute)
hybrid_engine (trinity.common.verl_config.ActorRolloutRef attribute)
I
id (trinity.buffer.schema.sql_schema.DPODataModel attribute)
(trinity.buffer.schema.sql_schema.ExperienceModel attribute)
(trinity.buffer.schema.sql_schema.SFTDataModel attribute)
(trinity.buffer.schema.sql_schema.TaskModel attribute)
(trinity.common.schema.DPODataModel attribute)
(trinity.common.schema.ExperienceModel attribute)
(trinity.common.schema.RftDatasetModel attribute)
(trinity.common.schema.SFTDataModel attribute)
(trinity.common.schema.TaskModel attribute)
index (trinity.common.config.StorageConfig attribute)
InferenceModel (class in trinity.common.models.model)
InferenceModelConfig (class in trinity.common.config)
info (trinity.common.experience.Experience attribute)
init_model() (trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
(trinity.trainer.verl.fsdp_workers.CriticWorker method)
(trinity.trainer.verl.fsdp_workers.RewardModelWorker method)
init_process_group() (in module trinity.utils.distributed)
(trinity.common.models.vllm_async_model.vLLMAysncRolloutModel method)
(trinity.common.models.vllm_model.vLLMRolloutModel method)
(trinity.common.models.vllm_worker.WorkerExtension method)
init_workers() (trinity.trainer.verl.ray_trainer.RayPPOTrainer method)
is_dpo() (trinity.common.constants.AlgorithmType method)
is_eval (trinity.common.workflows.Task attribute)
(trinity.common.workflows.workflow.Task attribute)
is_ipv6_address() (in module trinity.utils.distributed)
is_rft() (trinity.common.constants.AlgorithmType method)
is_running() (in module trinity.utils.dlc_utils)
is_sft() (trinity.common.constants.AlgorithmType method)
K
kl_coef (trinity.common.verl_config.KL_Ctrl attribute)
KL_Ctrl (class in trinity.common.verl_config)
kl_ctrl (trinity.common.verl_config.Algorithm attribute)
kl_loss_coef (trinity.common.verl_config.Actor attribute)
kl_loss_type (trinity.common.verl_config.Actor attribute)
kl_penalty (trinity.common.verl_config.Algorithm attribute)
kl_penalty() (in module trinity.trainer.verl.core_algos)
KLController (class in trinity.trainer.verl.core_algos)
L
label (trinity.common.schema.RftDatasetModel attribute)
label_key (trinity.common.config.FormatConfig attribute)
lam (trinity.common.config.AlgorithmConfig attribute)
(trinity.common.verl_config.Algorithm attribute)
last_modified_date (trinity.common.schema.RftDatasetModel attribute)
LFU (trinity.common.constants.ReadStrategy attribute)
list() (trinity.utils.registry.Registry method)
load_checkpoint() (trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
(trinity.trainer.verl.fsdp_workers.CriticWorker method)
load_config() (in module trinity.common.config)
(in module trinity.common.verl_config)
load_explorer() (trinity.manager.manager.CacheManager method)
load_kwargs (trinity.common.config.DataProcessorConfig attribute)
load_state_dict() (in module trinity.common.models.utils)
load_state_dict_from_verl_checkpoint() (in module trinity.common.models.utils)
load_trainer() (trinity.manager.manager.CacheManager method)
log() (trinity.utils.monitor.Monitor method)
(trinity.utils.monitor.TensorboardMonitor method)
(trinity.utils.monitor.WandbMonitor method)
log_prob_max_token_len_per_gpu (trinity.common.verl_config.Ref attribute)
log_prob_micro_batch_size (trinity.common.verl_config.Ref attribute)
log_prob_micro_batch_size_per_gpu (trinity.common.verl_config.Ref attribute)
log_prob_use_dynamic_bsz (trinity.common.verl_config.Ref attribute)
log_table() (trinity.utils.monitor.Monitor method)
(trinity.utils.monitor.TensorboardMonitor method)
(trinity.utils.monitor.WandbMonitor method)
logger (trinity.common.verl_config.Trainer attribute)
logprobs (trinity.common.config.GenerationConfig attribute)
(trinity.common.experience.Experience attribute)
(trinity.common.experience.Experiences attribute)
logprobs() (trinity.common.models.model.InferenceModel method)
(trinity.common.models.model.ModelWrapper method)
(trinity.common.models.vllm_model.vLLMRolloutModel method)
logprobs_async() (trinity.common.models.model.InferenceModel method)
(trinity.common.models.vllm_async_model.vLLMAysncRolloutModel method)
lr (trinity.common.verl_config.Optim attribute)
lr_warmup_steps (trinity.common.verl_config.Optim attribute)
lr_warmup_steps_ratio (trinity.common.verl_config.Optim attribute)
LRU (trinity.common.constants.ReadStrategy attribute)
M
maintain_session_state() (trinity.manager.config_manager.ConfigManager method)
mapping (trinity.trainer.verl.ray_trainer.ResourcePoolManager attribute)
MathRewardFn (class in trinity.common.rewards.reward_fn)
MathWorkflow (class in trinity.common.workflows)
(class in trinity.common.workflows.workflow)
max_actor_ckpt_to_keep (trinity.common.verl_config.Trainer attribute)
max_critic_ckpt_to_keep (trinity.common.verl_config.Trainer attribute)
max_length (trinity.common.verl_config.RewardModel attribute)
max_prompt_tokens (trinity.common.config.InferenceModelConfig attribute)
(trinity.common.config.ModelConfig attribute)
max_response_tokens (trinity.common.config.InferenceModelConfig attribute)
(trinity.common.config.ModelConfig attribute)
max_retry_interval (trinity.common.config.BufferConfig attribute)
(trinity.common.config.DataProcessorConfig attribute)
max_retry_times (trinity.common.config.BufferConfig attribute)
(trinity.common.config.DataProcessorConfig attribute)
(trinity.common.config.ExplorerConfig attribute)
max_timeout (trinity.common.config.ExplorerConfig attribute)
merge_by_placement() (in module trinity.common.models.utils)
message (trinity.explorer.workflow_runner.Status attribute)
messages (trinity.buffer.schema.sql_schema.SFTDataModel attribute)
MESSAGES (trinity.common.constants.PromptType attribute)
messages (trinity.common.schema.SFTDataModel attribute)
messages_key (trinity.common.config.FormatConfig attribute)
metadata (trinity.buffer.schema.Base attribute)
metric (trinity.explorer.workflow_runner.Status attribute)
metrics (trinity.common.experience.Experience attribute)
micro_batch_size_per_gpu (trinity.common.verl_config.RewardModel attribute)
min_lr_ratio (trinity.common.verl_config.Optim attribute)
min_num_params (trinity.common.verl_config.FSDPConfig attribute)
(trinity.common.verl_config.WrapPolicy attribute)
min_priority_score (trinity.common.config.DataProcessorConfig attribute)
min_size_ratio (trinity.common.config.DataProcessorConfig attribute)
mode (trinity.common.config.Config attribute)
model (trinity.common.config.Config attribute)
(trinity.common.verl_config.ActorRolloutRef attribute)
(trinity.common.verl_config.Critic attribute)
(trinity.common.verl_config.RewardModel attribute)
model_path (trinity.common.config.InferenceModelConfig attribute)
(trinity.common.config.ModelConfig attribute)
ModelConfig (class in trinity.common.config)
ModelWrapper (class in trinity.common.models.model)
module
trinity
trinity.buffer
trinity.buffer.buffer
trinity.buffer.buffer_reader
trinity.buffer.buffer_writer
trinity.buffer.queue
trinity.buffer.reader
trinity.buffer.reader.file_reader
trinity.buffer.reader.queue_reader
trinity.buffer.reader.sql_reader
trinity.buffer.schema
trinity.buffer.schema.sql_schema
trinity.buffer.utils
trinity.buffer.writer
trinity.buffer.writer.queue_writer
trinity.buffer.writer.sql_writer
trinity.common
trinity.common.config
trinity.common.constants
trinity.common.experience
trinity.common.models
trinity.common.models.model
trinity.common.models.openai_api
trinity.common.models.utils
trinity.common.models.vllm_async_model
trinity.common.models.vllm_model
trinity.common.models.vllm_worker
trinity.common.rewards
trinity.common.rewards.accuracy_reward
trinity.common.rewards.agents_reward
trinity.common.rewards.base
trinity.common.rewards.composite_reward
trinity.common.rewards.format_reward
trinity.common.rewards.human_reward
trinity.common.rewards.reward_fn
trinity.common.rewards.tool_reward
trinity.common.schema
trinity.common.verl_config
trinity.common.workflows
trinity.common.workflows.workflow
trinity.explorer
trinity.explorer.explorer
trinity.explorer.runner_pool
trinity.explorer.workflow_runner
trinity.manager
trinity.manager.config_manager
trinity.manager.config_registry
trinity.manager.config_registry.buffer_config_manager
trinity.manager.config_registry.config_registry
trinity.manager.config_registry.explorer_config_manager
trinity.manager.config_registry.model_config_manager
trinity.manager.config_registry.trainer_config_manager
trinity.manager.manager
trinity.trainer
trinity.trainer.trainer
trinity.trainer.verl
trinity.trainer.verl.core_algos
trinity.trainer.verl.fsdp_workers
trinity.trainer.verl.ray_trainer
trinity.trainer.verl_trainer
trinity.utils
trinity.utils.distributed
trinity.utils.dlc_utils
trinity.utils.eval_utils
trinity.utils.log
trinity.utils.monitor
trinity.utils.registry
modules (trinity.utils.registry.Registry property)
Monitor (class in trinity.utils.monitor)
monitor (trinity.common.config.Config attribute)
monitor_args (trinity.common.config.MonitorConfig attribute)
monitor_type (trinity.common.config.MonitorConfig attribute)
MonitorConfig (class in trinity.common.config)
MonitorType (class in trinity.common.constants)
MultiTurnWorkflow (class in trinity.common.workflows.workflow)
N
n (trinity.common.config.GenerationConfig attribute)
(trinity.common.verl_config.Rollout attribute)
n_gpus_per_node (trinity.common.verl_config.Trainer attribute)
name (trinity.common.config.Config attribute)
(trinity.common.config.StorageConfig attribute)
(trinity.common.verl_config.CustomRewardFunction attribute)
(trinity.utils.registry.Registry property)
NCCL (trinity.common.constants.SyncMethod attribute)
NewLineFormatter (class in trinity.utils.log)
nnodes (trinity.common.verl_config.Trainer attribute)
node_num (trinity.common.config.ClusterConfig attribute)
norm_adv_by_std_in_grpo (trinity.common.verl_config.Algorithm attribute)
O
ok (trinity.explorer.workflow_runner.Status attribute)
OPMD (trinity.common.constants.AlgorithmType attribute)
opmd_baseline (trinity.common.verl_config.Actor attribute)
Optim (class in trinity.common.verl_config)
optim (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
optimizer_offload (trinity.common.verl_config.FSDPConfig attribute)
override_config (trinity.common.verl_config.ActorModel attribute)
(trinity.common.verl_config.CriticModel attribute)
P
pad_token_id (trinity.common.config.BufferConfig attribute)
PAIRWISE_OPMD (trinity.common.constants.AlgorithmType attribute)
param_offload (trinity.common.verl_config.FSDPConfig attribute)
patch_and_serve_http() (in module trinity.common.models.openai_api)
path (trinity.common.config.StorageConfig attribute)
(trinity.common.verl_config.ActorModel attribute)
(trinity.common.verl_config.CriticModel attribute)
(trinity.common.verl_config.CustomRewardFunction attribute)
PLAINTEXT (trinity.common.constants.PromptType attribute)
pop_idle() (trinity.explorer.runner_pool.RunnerPool method)
(trinity.explorer.RunnerPool method)
PPO (trinity.common.constants.AlgorithmType attribute)
ppo_epochs (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
ppo_max_token_len_per_gpu (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
ppo_micro_batch_size (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
ppo_micro_batch_size_per_gpu (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
ppo_mini_batch_size (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
prepare() (trinity.manager.TrainEngineWrapper method)
(trinity.trainer.TrainEngineWrapper method)
(trinity.trainer.trainer.TrainEngineWrapper method)
(trinity.trainer.verl_trainer.VerlPPOTrainerWrapper method)
priority (trinity.buffer.schema.sql_schema.ExperienceModel attribute)
PRIORITY (trinity.common.constants.ReadStrategy attribute)
priority (trinity.common.schema.ExperienceModel attribute)
(trinity.common.schema.RftDatasetModel attribute)
priority_weights (trinity.common.config.DataProcessorConfig attribute)
process_messages_to_experience() (trinity.common.workflows.workflow.MultiTurnWorkflow method)
project (trinity.common.config.Config attribute)
project_name (trinity.common.verl_config.Trainer attribute)
prompt (trinity.buffer.schema.sql_schema.ExperienceModel attribute)
(trinity.common.schema.ExperienceModel attribute)
(trinity.common.schema.RftDatasetModel attribute)
prompt_key (trinity.common.config.FormatConfig attribute)
prompt_length (trinity.common.experience.Experience attribute)
(trinity.common.experience.Experiences attribute)
prompt_text (trinity.common.experience.Experience attribute)
prompt_type (trinity.common.config.FormatConfig attribute)
PromptType (class in trinity.common.constants)
Q
quality_score (trinity.common.schema.RftDatasetModel attribute)
quality_score_detail (trinity.common.schema.RftDatasetModel attribute)
QUEUE (trinity.common.constants.StorageType attribute)
QueueReader (class in trinity.buffer.reader.queue_reader)
QueueWriter (class in trinity.buffer.writer.queue_writer)
R
RANDOM (trinity.common.constants.ReadStrategy attribute)
raw_task (trinity.common.workflows.Task attribute)
(trinity.common.workflows.workflow.Task attribute)
RayPPOTrainer (class in trinity.trainer.verl.ray_trainer)
read() (trinity.buffer.buffer_reader.BufferReader method)
(trinity.buffer.reader.file_reader.DPODataReader method)
(trinity.buffer.reader.file_reader.RolloutDataReader method)
(trinity.buffer.reader.file_reader.SFTDataReader method)
(trinity.buffer.reader.queue_reader.QueueReader method)
(trinity.buffer.reader.sql_reader.SQLReader method)
read_batch_size (trinity.common.config.BufferConfig attribute)
read_experience_strategy (trinity.common.config.TrainerInput attribute)
ReadStrategy (class in trinity.common.constants)
Ref (class in trinity.common.verl_config)
ref (trinity.common.verl_config.ActorRolloutRef attribute)
RefPolicy (trinity.trainer.verl.ray_trainer.Role attribute)
register_check() (trinity.manager.config_registry.config_registry.ConfigRegistry method)
register_config() (trinity.manager.config_registry.config_registry.ConfigRegistry method)
register_module() (trinity.utils.registry.Registry method)
Registry (class in trinity.utils.registry)
registry (trinity.buffer.schema.Base attribute)
REINFORCE_PLUS_PLUS (trinity.trainer.verl.ray_trainer.AdvantageEstimator attribute)
rejected (trinity.buffer.schema.sql_schema.DPODataModel attribute)
(trinity.common.experience.Experience attribute)
(trinity.common.schema.DPODataModel attribute)
(trinity.common.schema.RftDatasetModel attribute)
rejected_key (trinity.common.config.FormatConfig attribute)
REMAX (trinity.trainer.verl.ray_trainer.AdvantageEstimator attribute)
remove_previous_ckpt_in_save (trinity.common.verl_config.Trainer attribute)
repeat_times (trinity.common.config.AlgorithmConfig attribute)
reply_prefix (trinity.common.config.ExplorerInput attribute)
(trinity.common.config.FormatConfig attribute)
reset() (trinity.common.workflows.MathWorkflow method)
(trinity.common.workflows.SimpleWorkflow method)
(trinity.common.workflows.WebShopWorkflow method)
(trinity.common.workflows.workflow.MathWorkflow method)
(trinity.common.workflows.workflow.SimpleWorkflow method)
(trinity.common.workflows.workflow.Workflow method)
reset_experiences_example_table() (trinity.trainer.verl_trainer.VerlPPOTrainerWrapper method)
reset_prefix_cache() (trinity.common.models.vllm_async_model.vLLMAysncRolloutModel method)
(trinity.common.models.vllm_model.vLLMRolloutModel method)
reset_session_state() (trinity.manager.config_manager.ConfigManager method)
resettable (trinity.common.workflows.SimpleWorkflow property)
(trinity.common.workflows.WebShopWorkflow property)
(trinity.common.workflows.workflow.SimpleWorkflow property)
(trinity.common.workflows.workflow.Workflow property)
resource_pool_dict (trinity.trainer.verl.ray_trainer.ResourcePoolManager attribute)
resource_pool_spec (trinity.trainer.verl.ray_trainer.ResourcePoolManager attribute)
ResourcePoolManager (class in trinity.trainer.verl.ray_trainer)
response (trinity.buffer.schema.sql_schema.ExperienceModel attribute)
(trinity.common.schema.ExperienceModel attribute)
(trinity.common.schema.RftDatasetModel attribute)
response_key (trinity.common.config.FormatConfig attribute)
response_text (trinity.common.experience.Experience attribute)
resume_from_path (trinity.common.verl_config.Trainer attribute)
resume_mode (trinity.common.verl_config.Trainer attribute)
retry_session() (in module trinity.buffer.utils)
reward (trinity.buffer.schema.sql_schema.ExperienceModel attribute)
(trinity.common.experience.Experience attribute)
(trinity.common.schema.ExperienceModel attribute)
(trinity.common.schema.RftDatasetModel attribute)
reward_fn (trinity.common.schema.RftDatasetModel attribute)
(trinity.common.workflows.Task attribute)
(trinity.common.workflows.workflow.Task attribute)
reward_fn_key (trinity.common.config.FormatConfig attribute)
reward_key (trinity.common.config.FormatConfig attribute)
reward_manager (trinity.common.verl_config.RewardModel attribute)
reward_model (trinity.common.verl_config.veRLConfig attribute)
reward_type (trinity.buffer.schema.sql_schema.TaskModel attribute)
(trinity.common.schema.TaskModel attribute)
RewardFn (class in trinity.common.rewards)
(class in trinity.common.rewards.reward_fn)
RewardModel (class in trinity.common.verl_config)
(trinity.trainer.verl.ray_trainer.Role attribute)
RewardModelWorker (class in trinity.trainer.verl.fsdp_workers)
rewards (trinity.common.experience.Experiences attribute)
RewardShapper (class in trinity.common.rewards.base)
RftDatasetModel (class in trinity.common.schema)
RLOO (trinity.trainer.verl.ray_trainer.AdvantageEstimator attribute)
Role (class in trinity.trainer.verl.ray_trainer)
Rollout (class in trinity.common.verl_config)
rollout (trinity.common.verl_config.ActorRolloutRef attribute)
Rollout (trinity.trainer.verl.ray_trainer.Role attribute)
rollout_args (trinity.common.config.StorageConfig attribute)
(trinity.common.workflows.Task attribute)
(trinity.common.workflows.workflow.Task attribute)
rollout_model (trinity.common.config.ExplorerConfig attribute)
rollout_n (trinity.common.verl_config.Critic attribute)
RolloutDataReader (class in trinity.buffer.reader.file_reader)
run() (trinity.common.workflows.AlfworldWorkflow method)
(trinity.common.workflows.SciWorldWorkflow method)
(trinity.common.workflows.SimpleWorkflow method)
(trinity.common.workflows.WebShopWorkflow method)
(trinity.common.workflows.workflow.MultiTurnWorkflow method)
(trinity.common.workflows.workflow.SimpleWorkflow method)
(trinity.common.workflows.workflow.Workflow method)
run_api_server() (trinity.common.models.vllm_async_model.vLLMAysncRolloutModel method)
run_api_server_in_ray_actor() (in module trinity.common.models.openai_api)
run_config() (trinity.manager.config_manager.ConfigManager method)
run_id (trinity.common.experience.Experience attribute)
run_ids (trinity.common.experience.Experiences attribute)
run_server_in_ray() (in module trinity.common.models.openai_api)
run_tasks() (trinity.explorer.runner_pool.RunnerPool method)
(trinity.explorer.RunnerPool method)
runner_num (trinity.common.config.ExplorerConfig attribute)
RunnerPool (class in trinity.explorer)
(class in trinity.explorer.runner_pool)
S
save() (trinity.common.config.Config method)
save_checkpoint() (trinity.manager.TrainEngineWrapper method)
(trinity.trainer.TrainEngineWrapper method)
(trinity.trainer.trainer.TrainEngineWrapper method)
(trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
(trinity.trainer.verl.fsdp_workers.CriticWorker method)
(trinity.trainer.verl_trainer.VerlPPOTrainerWrapper method)
save_explorer() (trinity.manager.manager.CacheManager method)
save_freq (trinity.common.verl_config.Trainer attribute)
save_interval (trinity.common.config.TrainerConfig attribute)
save_trainer() (trinity.manager.manager.CacheManager method)
SciWorldWorkflow (class in trinity.common.workflows)
seed (trinity.common.config.InferenceModelConfig attribute)
serialize() (trinity.common.experience.Experience method)
serialized_exp (trinity.buffer.schema.sql_schema.DPODataModel attribute)
(trinity.buffer.schema.sql_schema.ExperienceModel attribute)
(trinity.buffer.schema.sql_schema.SFTDataModel attribute)
(trinity.common.schema.DPODataModel attribute)
(trinity.common.schema.ExperienceModel attribute)
(trinity.common.schema.SFTDataModel attribute)
set_actor_checkpoint() (in module trinity.manager.config_registry.trainer_config_manager)
set_actor_clip_ratio() (in module trinity.manager.config_registry.trainer_config_manager)
set_actor_entropy_coef() (in module trinity.manager.config_registry.trainer_config_manager)
set_actor_grad_clip() (in module trinity.manager.config_registry.trainer_config_manager)
set_actor_kl_loss_coef() (in module trinity.manager.config_registry.trainer_config_manager)
set_actor_kl_loss_type() (in module trinity.manager.config_registry.trainer_config_manager)
set_actor_lr() (in module trinity.manager.config_registry.trainer_config_manager)
set_actor_lr_warmup_steps_ratio() (in module trinity.manager.config_registry.trainer_config_manager)
set_actor_opmd_baseline() (in module trinity.manager.config_registry.trainer_config_manager)
set_actor_ppo_micro_batch_size_per_gpu() (in module trinity.manager.config_registry.trainer_config_manager)
set_actor_tau() (in module trinity.manager.config_registry.trainer_config_manager)
set_actor_ulysses_sequence_parallel_size() (in module trinity.manager.config_registry.trainer_config_manager)
set_actor_use_kl_loss() (in module trinity.manager.config_registry.trainer_config_manager)
set_actor_use_uid() (in module trinity.manager.config_registry.trainer_config_manager)
set_actor_warmup_style() (in module trinity.manager.config_registry.trainer_config_manager)
set_algorithm_type() (in module trinity.manager.config_registry.model_config_manager)
set_auxiliary_models() (in module trinity.manager.config_registry.explorer_config_manager)
set_buffer_max_retry_times() (in module trinity.manager.config_registry.buffer_config_manager)
set_checkpoint_root_dir() (in module trinity.manager.config_registry.model_config_manager)
set_critic_checkpoint() (in module trinity.manager.config_registry.trainer_config_manager)
set_critic_cliprange_value() (in module trinity.manager.config_registry.trainer_config_manager)
set_critic_grad_clip() (in module trinity.manager.config_registry.trainer_config_manager)
set_critic_lr() (in module trinity.manager.config_registry.trainer_config_manager)
set_critic_lr_warmup_steps_ratio() (in module trinity.manager.config_registry.trainer_config_manager)
set_critic_model_path() (in module trinity.manager.config_registry.model_config_manager)
set_critic_ppo_micro_batch_size_per_gpu() (in module trinity.manager.config_registry.trainer_config_manager)
set_critic_ulysses_sequence_parallel_size() (in module trinity.manager.config_registry.trainer_config_manager)
set_critic_warmup() (in module trinity.manager.config_registry.trainer_config_manager)
set_critic_warmup_style() (in module trinity.manager.config_registry.trainer_config_manager)
set_default_hdfs_dir() (in module trinity.manager.config_registry.trainer_config_manager)
set_default_reward_fn_type() (in module trinity.manager.config_registry.buffer_config_manager)
set_default_workflow_type() (in module trinity.manager.config_registry.buffer_config_manager)
set_del_local_ckpt_after_load() (in module trinity.manager.config_registry.trainer_config_manager)
set_dpo_dataset_kwargs() (in module trinity.manager.config_registry.buffer_config_manager)
set_dtype() (in module trinity.manager.config_registry.explorer_config_manager)
set_enable_chunked_prefill() (in module trinity.manager.config_registry.explorer_config_manager)
set_enable_openai_api() (in module trinity.manager.config_registry.explorer_config_manager)
set_enable_prefix_caching() (in module trinity.manager.config_registry.explorer_config_manager)
set_enable_preview() (in module trinity.manager.config_registry.trainer_config_manager)
set_enable_thinking() (in module trinity.manager.config_registry.explorer_config_manager)
set_enforce_eager() (in module trinity.manager.config_registry.explorer_config_manager)
set_engine_num() (in module trinity.manager.config_registry.explorer_config_manager)
set_engine_type() (in module trinity.manager.config_registry.explorer_config_manager)
set_eval_interval() (in module trinity.manager.config_registry.explorer_config_manager)
set_eval_on_latest_checkpoint() (in module trinity.manager.config_registry.explorer_config_manager)
set_eval_tasksets() (in module trinity.manager.config_registry.buffer_config_manager)
set_exp_name() (in module trinity.manager.config_registry.model_config_manager)
set_experience_buffer_path() (in module trinity.manager.config_registry.buffer_config_manager)
set_explorer_max_retry_times() (in module trinity.manager.config_registry.explorer_config_manager)
set_gamma() (in module trinity.manager.config_registry.model_config_manager)
set_gpu_memory_utilization() (in module trinity.manager.config_registry.explorer_config_manager)
set_gpu_per_node() (in module trinity.manager.config_registry.model_config_manager)
set_horizon() (in module trinity.manager.config_registry.trainer_config_manager)
set_kl_ctrl_coef() (in module trinity.manager.config_registry.trainer_config_manager)
set_kl_ctrl_type() (in module trinity.manager.config_registry.trainer_config_manager)
set_kl_penalty() (in module trinity.manager.config_registry.trainer_config_manager)
set_lam() (in module trinity.manager.config_registry.model_config_manager)
set_max_actor_ckpt_to_keep() (in module trinity.manager.config_registry.trainer_config_manager)
set_max_critic_ckpt_to_keep() (in module trinity.manager.config_registry.trainer_config_manager)
set_max_prompt_tokens() (in module trinity.manager.config_registry.model_config_manager)
set_max_response_tokens() (in module trinity.manager.config_registry.model_config_manager)
set_max_retry_interval() (in module trinity.manager.config_registry.buffer_config_manager)
set_max_timeout() (in module trinity.manager.config_registry.explorer_config_manager)
set_mode() (trinity.manager.TrainEngineWrapper method)
(trinity.trainer.TrainEngineWrapper method)
(trinity.trainer.trainer.TrainEngineWrapper method)
(trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
(trinity.trainer.verl_trainer.VerlPPOTrainerWrapper method)
set_model_path() (in module trinity.manager.config_registry.model_config_manager)
set_monitor_type() (in module trinity.manager.config_registry.model_config_manager)
set_node_num() (in module trinity.manager.config_registry.model_config_manager)
set_norm_adv_by_std_in_grpo() (in module trinity.manager.config_registry.trainer_config_manager)
set_optimizer_offload() (in module trinity.manager.config_registry.trainer_config_manager)
set_param_offload() (in module trinity.manager.config_registry.trainer_config_manager)
set_ppo_epochs() (in module trinity.manager.config_registry.trainer_config_manager)
set_project() (in module trinity.manager.config_registry.model_config_manager)
set_ref_log_prob_micro_batch_size_per_gpu() (in module trinity.manager.config_registry.trainer_config_manager)
set_remove_previous_ckpt_in_save() (in module trinity.manager.config_registry.trainer_config_manager)
set_repeat_times() (in module trinity.manager.config_registry.model_config_manager)
set_reply_prefix() (in module trinity.manager.config_registry.buffer_config_manager)
set_resume_from_path() (in module trinity.manager.config_registry.trainer_config_manager)
set_resume_mode() (in module trinity.manager.config_registry.trainer_config_manager)
set_runner_num() (in module trinity.manager.config_registry.explorer_config_manager)
set_save_interval() (in module trinity.manager.config_registry.trainer_config_manager)
set_seed() (in module trinity.manager.config_registry.explorer_config_manager)
set_sft_warmup_dataset_args() (in module trinity.manager.config_registry.buffer_config_manager)
set_sft_warmup_dataset_path() (in module trinity.manager.config_registry.buffer_config_manager)
set_sft_warmup_steps() (in module trinity.manager.config_registry.buffer_config_manager)
set_storage_type() (in module trinity.manager.config_registry.buffer_config_manager)
set_sync_interval() (in module trinity.manager.config_registry.explorer_config_manager)
set_sync_method() (in module trinity.manager.config_registry.explorer_config_manager)
set_sync_timeout() (in module trinity.manager.config_registry.explorer_config_manager)
set_system_prompt() (in module trinity.manager.config_registry.buffer_config_manager)
set_target_kl() (in module trinity.manager.config_registry.trainer_config_manager)
set_taskset_args() (in module trinity.manager.config_registry.buffer_config_manager)
set_taskset_path() (in module trinity.manager.config_registry.buffer_config_manager)
set_tensor_parallel_size() (in module trinity.manager.config_registry.explorer_config_manager)
set_total_epochs() (in module trinity.manager.config_registry.buffer_config_manager)
set_total_gpu_num() (in module trinity.manager.config_registry.model_config_manager)
set_total_training_steps() (in module trinity.manager.config_registry.trainer_config_manager)
set_train_batch_size() (in module trinity.manager.config_registry.buffer_config_manager)
set_trainer_gpu_num() (in module trinity.manager.config_registry.model_config_manager)
set_trainer_type() (in module trinity.manager.config_registry.trainer_config_manager)
set_training_args() (in module trinity.manager.config_registry.trainer_config_manager)
set_training_strategy() (in module trinity.manager.config_registry.trainer_config_manager)
set_unfinished_fields() (trinity.manager.config_registry.config_registry.ConfigRegistry method)
set_use_kl_in_reward() (in module trinity.manager.config_registry.trainer_config_manager)
set_use_v1() (in module trinity.manager.config_registry.explorer_config_manager)
setup_ray_cluster() (in module trinity.utils.dlc_utils)
setup_weight_sync_group() (trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
SFT (trinity.common.constants.AlgorithmType attribute)
sft_to_rft() (trinity.trainer.verl_trainer.VerlPPOTrainerWrapper method)
sft_warmup_dataset (trinity.common.config.TrainerInput attribute)
sft_warmup_steps (trinity.common.config.TrainerInput attribute)
(trinity.common.verl_config.Trainer attribute)
SFTDataModel (class in trinity.buffer.schema.sql_schema)
(class in trinity.common.schema)
SFTDataReader (class in trinity.buffer.reader.file_reader)
shape() (trinity.common.rewards.accuracy_reward.AccuracyRewardShapper method)
(trinity.common.rewards.base.RewardShapper method)
(trinity.common.rewards.composite_reward.CompositeRewardShapper method)
(trinity.common.rewards.format_reward.FormatRewardShapper method)
shuffle (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
shutdown() (trinity.common.models.vllm_async_model.vLLMAysncRolloutModel method)
(trinity.manager.TrainEngineWrapper method)
(trinity.trainer.TrainEngineWrapper method)
(trinity.trainer.trainer.TrainEngineWrapper method)
(trinity.trainer.verl_trainer.VerlPPOTrainerWrapper method)
simple_answer_parser() (in module trinity.utils.eval_utils)
SimpleWorkflow (class in trinity.common.workflows)
(class in trinity.common.workflows.workflow)
sleep() (trinity.common.models.vllm_async_model.vLLMAysncRolloutModel method)
(trinity.common.models.vllm_model.vLLMRolloutModel method)
solution (trinity.common.schema.RftDatasetModel attribute)
solution_key (trinity.common.config.FormatConfig attribute)
source_data_path (trinity.common.config.DataProcessorConfig attribute)
split (trinity.common.config.StorageConfig attribute)
SQL (trinity.common.constants.StorageType attribute)
SQLReader (class in trinity.buffer.reader.sql_reader)
SQLWriter (class in trinity.buffer.writer.sql_writer)
Status (class in trinity.explorer.workflow_runner)
storage_type (trinity.common.config.StorageConfig attribute)
StorageConfig (class in trinity.common.config)
StorageType (class in trinity.common.constants)
strategy (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
(trinity.common.verl_config.RewardModel attribute)
subset_name (trinity.common.config.StorageConfig attribute)
sync_freq (trinity.common.verl_config.Trainer attribute)
sync_interval (trinity.common.config.SynchronizerConfig attribute)
sync_method (trinity.common.config.SynchronizerConfig attribute)
sync_model() (trinity.common.models.vllm_async_model.vLLMAysncRolloutModel method)
(trinity.common.models.vllm_model.vLLMRolloutModel method)
sync_timeout (trinity.common.config.SynchronizerConfig attribute)
sync_weight() (trinity.manager.TrainEngineWrapper method)
(trinity.trainer.TrainEngineWrapper method)
(trinity.trainer.trainer.TrainEngineWrapper method)
(trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
(trinity.trainer.verl_trainer.VerlPPOTrainerWrapper method)
synchronize_config() (trinity.common.verl_config.veRLConfig method)
synchronizer (trinity.common.config.Config attribute)
(trinity.common.verl_config.ActorRolloutRef attribute)
(trinity.common.verl_config.veRLConfig attribute)
SynchronizerConfig (class in trinity.common.config)
SyncMethod (class in trinity.common.constants)
SyncMethodEnumMeta (class in trinity.common.constants)
system_prompt (trinity.common.config.ExplorerInput attribute)
(trinity.common.config.FormatConfig attribute)
T
target_kl (trinity.common.verl_config.KL_Ctrl attribute)
Task (class in trinity.common.workflows)
(class in trinity.common.workflows.workflow)
task_desc (trinity.buffer.schema.sql_schema.TaskModel attribute)
(trinity.common.schema.TaskModel attribute)
(trinity.common.workflows.Task property)
(trinity.common.workflows.workflow.Task property)
task_type (trinity.common.config.StorageConfig attribute)
TaskModel (class in trinity.buffer.schema.sql_schema)
(class in trinity.common.schema)
taskset (trinity.common.config.ExplorerInput attribute)
TaskType (class in trinity.common.constants)
tau (trinity.common.verl_config.Actor attribute)
temperature (trinity.common.config.GenerationConfig attribute)
(trinity.common.verl_config.Rollout attribute)
tensor_parallel_size (trinity.common.config.InferenceModelConfig attribute)
TENSORBOARD (trinity.common.constants.MonitorType attribute)
TensorboardMonitor (class in trinity.utils.monitor)
test_freq (trinity.common.verl_config.Trainer attribute)
to_dict() (trinity.common.schema.RftDatasetModel method)
to_experience() (trinity.buffer.schema.sql_schema.DPODataModel method)
(trinity.buffer.schema.sql_schema.ExperienceModel method)
(trinity.buffer.schema.sql_schema.SFTDataModel method)
(trinity.common.schema.DPODataModel method)
(trinity.common.schema.ExperienceModel method)
(trinity.common.schema.SFTDataModel method)
to_workflow() (trinity.common.workflows.Task method)
(trinity.common.workflows.workflow.Task method)
tokenize_and_mask_messages_default() (in module trinity.common.models.utils)
tokenize_and_mask_messages_hf() (in module trinity.common.models.utils)
tokenizer_path (trinity.common.config.BufferConfig attribute)
(trinity.common.verl_config.CriticModel attribute)
tokens (trinity.common.experience.Experience attribute)
(trinity.common.experience.Experiences attribute)
top_k (trinity.common.config.GenerationConfig attribute)
top_p (trinity.common.config.GenerationConfig attribute)
total_epochs (trinity.common.config.BufferConfig attribute)
(trinity.common.config.StorageConfig attribute)
(trinity.common.verl_config.Trainer attribute)
total_training_steps (trinity.common.verl_config.Optim attribute)
(trinity.common.verl_config.Trainer attribute)
train_batch_size (trinity.common.verl_config.Data attribute)
train_dpo_step() (trinity.manager.TrainEngineWrapper method)
(trinity.trainer.TrainEngineWrapper method)
(trinity.trainer.trainer.TrainEngineWrapper method)
(trinity.trainer.verl_trainer.VerlPPOTrainerWrapper method)
train_rft_step() (trinity.manager.TrainEngineWrapper method)
(trinity.trainer.TrainEngineWrapper method)
(trinity.trainer.trainer.TrainEngineWrapper method)
(trinity.trainer.verl_trainer.VerlPPOTrainerWrapper method)
train_sft_step() (trinity.manager.TrainEngineWrapper method)
(trinity.trainer.TrainEngineWrapper method)
(trinity.trainer.trainer.TrainEngineWrapper method)
(trinity.trainer.verl_trainer.VerlPPOTrainerWrapper method)
TrainEngineWrapper (class in trinity.manager)
(class in trinity.trainer)
(class in trinity.trainer.trainer)
Trainer (class in trinity.common.verl_config)
trainer (trinity.common.config.Config attribute)
(trinity.common.verl_config.veRLConfig attribute)
trainer_config (trinity.common.config.TrainerConfig attribute)
trainer_config_path (trinity.common.config.TrainerConfig attribute)
trainer_input (trinity.common.config.BufferConfig attribute)
trainer_type (trinity.common.config.TrainerConfig attribute)
TrainerConfig (class in trinity.common.config)
TrainerInput (class in trinity.common.config)
training_rollout_mode (trinity.common.verl_config.Trainer attribute)
trinity
module
trinity.buffer
module
trinity.buffer.buffer
module
trinity.buffer.buffer_reader
module
trinity.buffer.buffer_writer
module
trinity.buffer.queue
module
trinity.buffer.reader
module
trinity.buffer.reader.file_reader
module
trinity.buffer.reader.queue_reader
module
trinity.buffer.reader.sql_reader
module
trinity.buffer.schema
module
trinity.buffer.schema.sql_schema
module
trinity.buffer.utils
module
trinity.buffer.writer
module
trinity.buffer.writer.queue_writer
module
trinity.buffer.writer.sql_writer
module
trinity.common
module
trinity.common.config
module
trinity.common.constants
module
trinity.common.experience
module
trinity.common.models
module
trinity.common.models.model
module
trinity.common.models.openai_api
module
trinity.common.models.utils
module
trinity.common.models.vllm_async_model
module
trinity.common.models.vllm_model
module
trinity.common.models.vllm_worker
module
trinity.common.rewards
module
trinity.common.rewards.accuracy_reward
module
trinity.common.rewards.agents_reward
module
trinity.common.rewards.base
module
trinity.common.rewards.composite_reward
module
trinity.common.rewards.format_reward
module
trinity.common.rewards.human_reward
module
trinity.common.rewards.reward_fn
module
trinity.common.rewards.tool_reward
module
trinity.common.schema
module
trinity.common.verl_config
module
trinity.common.workflows
module
trinity.common.workflows.workflow
module
trinity.explorer
module
trinity.explorer.explorer
module
trinity.explorer.runner_pool
module
trinity.explorer.workflow_runner
module
trinity.manager
module
trinity.manager.config_manager
module
trinity.manager.config_registry
module
trinity.manager.config_registry.buffer_config_manager
module
trinity.manager.config_registry.config_registry
module
trinity.manager.config_registry.explorer_config_manager
module
trinity.manager.config_registry.model_config_manager
module
trinity.manager.config_registry.trainer_config_manager
module
trinity.manager.manager
module
trinity.trainer
module
trinity.trainer.trainer
module
trinity.trainer.verl
module
trinity.trainer.verl.core_algos
module
trinity.trainer.verl.fsdp_workers
module
trinity.trainer.verl.ray_trainer
module
trinity.trainer.verl_trainer
module
trinity.utils
module
trinity.utils.distributed
module
trinity.utils.dlc_utils
module
trinity.utils.eval_utils
module
trinity.utils.log
module
trinity.utils.monitor
module
trinity.utils.registry
module
truth (trinity.common.workflows.Task property)
(trinity.common.workflows.workflow.Task property)
type (trinity.common.verl_config.KL_Ctrl attribute)
U
ulysses_sequence_parallel_size (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
(trinity.common.verl_config.Ref attribute)
(trinity.common.verl_config.RewardModel attribute)
update() (trinity.trainer.verl.core_algos.AdaptiveKLController method)
(trinity.trainer.verl.core_algos.FixedKLController method)
(trinity.trainer.verl.core_algos.KLController method)
update_actor() (trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
update_critic() (trinity.trainer.verl.fsdp_workers.CriticWorker method)
update_weight() (trinity.common.models.vllm_async_model.vLLMAysncRolloutModel method)
(trinity.common.models.vllm_model.vLLMRolloutModel method)
(trinity.common.models.vllm_worker.WorkerExtension method)
use_critic() (in module trinity.manager.config_registry.trainer_config_manager)
use_dynamic_bsz (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
(trinity.common.verl_config.RewardModel attribute)
use_fsdp() (in module trinity.manager.config_registry.trainer_config_manager)
use_kl_in_reward (trinity.common.verl_config.Algorithm attribute)
use_kl_loss (trinity.common.verl_config.Actor attribute)
use_remove_padding (trinity.common.verl_config.ActorModel attribute)
(trinity.common.verl_config.CriticModel attribute)
use_uid (trinity.common.verl_config.Actor attribute)
use_v1 (trinity.common.config.InferenceModelConfig attribute)
V
val_before_train (trinity.common.verl_config.Trainer attribute)
val_generations_to_log_to_wandb (trinity.common.verl_config.Trainer attribute)
validate_equation() (in module trinity.utils.eval_utils)
veRLConfig (class in trinity.common.verl_config)
VerlPPOTrainerWrapper (class in trinity.trainer.verl_trainer)
vLLMAysncRolloutModel (class in trinity.common.models.vllm_async_model)
vLLMRolloutModel (class in trinity.common.models.vllm_model)
W
wait_for_checkpoint (trinity.common.config.SynchronizerConfig attribute)
wait_for_ray_setup() (in module trinity.utils.dlc_utils)
wait_for_ray_worker_nodes() (in module trinity.utils.dlc_utils)
wake_up() (trinity.common.models.vllm_async_model.vLLMAysncRolloutModel method)
(trinity.common.models.vllm_model.vLLMRolloutModel method)
WANDB (trinity.common.constants.MonitorType attribute)
WandbMonitor (class in trinity.utils.monitor)
warmup_style (trinity.common.verl_config.Optim attribute)
WebShopWorkflow (class in trinity.common.workflows)
WorkerExtension (class in trinity.common.models.vllm_worker)
Workflow (class in trinity.common.workflows.workflow)
workflow (trinity.common.schema.RftDatasetModel attribute)
(trinity.common.workflows.Task attribute)
(trinity.common.workflows.workflow.Task attribute)
workflow_key (trinity.common.config.FormatConfig attribute)
workflow_type (trinity.buffer.schema.sql_schema.TaskModel attribute)
(trinity.common.schema.TaskModel attribute)
wrap_policy (trinity.common.verl_config.FSDPConfig attribute)
WrapPolicy (class in trinity.common.verl_config)
write() (trinity.buffer.buffer_writer.BufferWriter method)
(trinity.buffer.writer.queue_writer.QueueWriter method)
(trinity.buffer.writer.sql_writer.SQLWriter method)