Trinity-RFT
Examples
Quick Start
Off-Policy RFT
Asynchronous RFT
Concatenated Multi-Turn RFT
General Multi-Step RFT
Multi-Step ReAct
Offline DPO and SFT
Data Processing
Guidelines
Developer Guide
Configuration Guide
Algorithm Development
FAQ
FAQ
API Reference
API Reference
Trinity-RFT
Index
Edit on GitHub
Index
_
|
A
|
B
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
J
|
K
|
L
|
M
|
N
|
O
|
P
|
Q
|
R
|
S
|
T
|
U
|
V
|
W
_
__init__() (trinity.algorithm.add_strategy.add_strategy.AddStrategy method)
(trinity.algorithm.add_strategy.add_strategy.GRPOAddStrategy method)
(trinity.algorithm.add_strategy.add_strategy.OPMDAddStrategy method)
(trinity.algorithm.add_strategy.add_strategy.RewardVarianceAddStrategy method)
(trinity.algorithm.add_strategy.AddStrategy method)
(trinity.algorithm.add_strategy.correct_bias_add_strategy.CorrectBiasAddStrategy method)
(trinity.algorithm.add_strategy.CorrectBiasAddStrategy method)
(trinity.algorithm.add_strategy.duplicate_add_strategy.DuplicateInformativeAddStrategy method)
(trinity.algorithm.add_strategy.DuplicateInformativeAddStrategy method)
(trinity.algorithm.add_strategy.GRPOAddStrategy method)
(trinity.algorithm.add_strategy.OPMDAddStrategy method)
(trinity.algorithm.add_strategy.RewardVarianceAddStrategy method)
(trinity.algorithm.add_strategy.step_wise_add_strategy.StepWiseGRPOStrategy method)
(trinity.algorithm.add_strategy.StepWiseGRPOStrategy method)
(trinity.algorithm.AddStrategy method)
(trinity.algorithm.advantage_fn.grpo_advantage.GRPOAdvantageFn method)
(trinity.algorithm.advantage_fn.GRPOAdvantageFn method)
(trinity.algorithm.advantage_fn.opmd_advantage.OPMDAdvantageFn method)
(trinity.algorithm.advantage_fn.OPMDAdvantageFn method)
(trinity.algorithm.advantage_fn.ppo_advantage.PPOAdvantageFn method)
(trinity.algorithm.advantage_fn.PPOAdvantageFn method)
(trinity.algorithm.advantage_fn.reinforce_plus_plus_advantage.REINFORCEPLUSPLUSAdvantageFn method)
(trinity.algorithm.advantage_fn.REINFORCEPLUSPLUSAdvantageFn method)
(trinity.algorithm.advantage_fn.remax_advantage.REMAXAdvantageFn method)
(trinity.algorithm.advantage_fn.REMAXAdvantageFn method)
(trinity.algorithm.advantage_fn.rloo_advantage.RLOOAdvantageFn method)
(trinity.algorithm.advantage_fn.RLOOAdvantageFn method)
(trinity.algorithm.algorithm_manager.AlgorithmManager method)
(trinity.algorithm.entropy_loss_fn.entropy_loss_fn.DefaultEntropyLossFn method)
(trinity.algorithm.entropy_loss_fn.entropy_loss_fn.DummyEntropyLossFn method)
(trinity.algorithm.key_mapper.KeyMapper method)
(trinity.algorithm.kl_fn.kl_fn.KLFn method)
(trinity.algorithm.kl_fn.KLFn method)
(trinity.algorithm.KLFn method)
(trinity.algorithm.policy_loss_fn.dpo_loss.DPOLossFn method)
(trinity.algorithm.policy_loss_fn.DPOLossFn method)
(trinity.algorithm.policy_loss_fn.gspo_policy_loss.GSPOLossFn method)
(trinity.algorithm.policy_loss_fn.GSPOLossFn method)
(trinity.algorithm.policy_loss_fn.mix_policy_loss.MIXPolicyLossFn method)
(trinity.algorithm.policy_loss_fn.MIXPolicyLossFn method)
(trinity.algorithm.policy_loss_fn.opmd_policy_loss.OPMDPolicyLossFn method)
(trinity.algorithm.policy_loss_fn.OPMDPolicyLossFn method)
(trinity.algorithm.policy_loss_fn.policy_loss_fn.PolicyLossFn method)
(trinity.algorithm.policy_loss_fn.PolicyLossFn method)
(trinity.algorithm.policy_loss_fn.ppo_policy_loss.PPOPolicyLossFn method)
(trinity.algorithm.policy_loss_fn.PPOPolicyLossFn method)
(trinity.algorithm.policy_loss_fn.sft_loss.SFTLossFn method)
(trinity.algorithm.policy_loss_fn.SFTLossFn method)
(trinity.algorithm.PolicyLossFn method)
(trinity.algorithm.sample_strategy.DefaultSampleStrategy method)
(trinity.algorithm.sample_strategy.mix_sample_strategy.MixSampleStrategy method)
(trinity.algorithm.sample_strategy.MixSampleStrategy method)
(trinity.algorithm.sample_strategy.sample_strategy.DefaultSampleStrategy method)
(trinity.algorithm.sample_strategy.sample_strategy.SampleStrategy method)
(trinity.algorithm.sample_strategy.sample_strategy.WarmupSampleStrategy method)
(trinity.algorithm.sample_strategy.SampleStrategy method)
(trinity.algorithm.sample_strategy.WarmupSampleStrategy method)
(trinity.algorithm.SampleStrategy method)
(trinity.buffer.queue.AsyncPriorityQueue method)
(trinity.buffer.queue.AsyncQueue method)
(trinity.buffer.ray_wrapper.DBWrapper method)
(trinity.buffer.ray_wrapper.FileWrapper method)
(trinity.buffer.ray_wrapper.QueueWrapper method)
(trinity.buffer.reader.file_reader.DPODataReader method)
(trinity.buffer.reader.file_reader.DummyProgressBar method)
(trinity.buffer.reader.file_reader.RawDataReader method)
(trinity.buffer.reader.file_reader.RolloutDataReader method)
(trinity.buffer.reader.file_reader.SFTDataReader method)
(trinity.buffer.reader.queue_reader.QueueReader method)
(trinity.buffer.reader.sql_reader.SQLReader method)
(trinity.buffer.schema.Base method)
(trinity.buffer.writer.file_writer.JSONWriter method)
(trinity.buffer.writer.queue_writer.QueueWriter method)
(trinity.buffer.writer.sql_writer.SQLWriter method)
(trinity.common.config.AlgorithmConfig method)
(trinity.common.config.BufferConfig method)
(trinity.common.config.ClusterConfig method)
(trinity.common.config.Config method)
(trinity.common.config.DataPipelineConfig method)
(trinity.common.config.DataProcessorConfig method)
(trinity.common.config.ExplorerConfig method)
(trinity.common.config.ExplorerInput method)
(trinity.common.config.FormatConfig method)
(trinity.common.config.GenerationConfig method)
(trinity.common.config.InferenceModelConfig method)
(trinity.common.config.ModelConfig method)
(trinity.common.config.MonitorConfig method)
(trinity.common.config.RewardShapingConfig method)
(trinity.common.config.StorageConfig method)
(trinity.common.config.SynchronizerConfig method)
(trinity.common.config.TrainerConfig method)
(trinity.common.config.TrainerInput method)
(trinity.common.experience.CustomField method)
(trinity.common.experience.EID method)
(trinity.common.experience.Experience method)
(trinity.common.experience.Experiences method)
(trinity.common.models.model.ModelWrapper method)
(trinity.common.models.vllm_model.vLLMRolloutModel method)
(trinity.common.rewards.accuracy_reward.AccuracyReward method)
(trinity.common.rewards.AccuracyReward method)
(trinity.common.rewards.countdown_reward.CountDownRewardFn method)
(trinity.common.rewards.CountDownRewardFn method)
(trinity.common.rewards.dapo_reward.MathDAPORewardFn method)
(trinity.common.rewards.format_reward.FormatReward method)
(trinity.common.rewards.FormatReward method)
(trinity.common.rewards.math_reward.MathBoxedRewardFn method)
(trinity.common.rewards.math_reward.MathRewardFn method)
(trinity.common.rewards.MathBoxedRewardFn method)
(trinity.common.rewards.MathDAPORewardFn method)
(trinity.common.rewards.MathRewardFn method)
(trinity.common.rewards.reward_fn.RewardFn method)
(trinity.common.rewards.reward_fn.RMGalleryFn method)
(trinity.common.rewards.RewardFn method)
(trinity.common.rewards.RMGalleryFn method)
(trinity.common.verl_config.Actor method)
(trinity.common.verl_config.ActorModel method)
(trinity.common.verl_config.ActorRolloutRef method)
(trinity.common.verl_config.Algorithm method)
(trinity.common.verl_config.Checkpoint method)
(trinity.common.verl_config.Critic method)
(trinity.common.verl_config.CriticModel method)
(trinity.common.verl_config.CustomRewardFunction method)
(trinity.common.verl_config.Data method)
(trinity.common.verl_config.FSDPConfig method)
(trinity.common.verl_config.FusedKernelOptions method)
(trinity.common.verl_config.KL_Ctrl method)
(trinity.common.verl_config.Optim method)
(trinity.common.verl_config.Ref method)
(trinity.common.verl_config.RewardModel method)
(trinity.common.verl_config.Rollout method)
(trinity.common.verl_config.Trainer method)
(trinity.common.verl_config.veRLConfig method)
(trinity.common.verl_config.WrapPolicy method)
(trinity.common.workflows.AgentScopeReactV2MathWorkflow method)
(trinity.common.workflows.AlfworldWorkflow method)
(trinity.common.workflows.eval_workflow.MathEvalWorkflow method)
(trinity.common.workflows.math_rm_workflow.MathRMWorkflow method)
(trinity.common.workflows.MathEvalWorkflow method)
(trinity.common.workflows.MathRMWorkflow method)
(trinity.common.workflows.MathWorkflow method)
(trinity.common.workflows.SciWorldWorkflow method)
(trinity.common.workflows.SimpleWorkflow method)
(trinity.common.workflows.step_wise_workflow.RewardPropagationWorkflow method)
(trinity.common.workflows.step_wise_workflow.StepWiseRewardWorkflow method)
(trinity.common.workflows.StepWiseAlfworldWorkflow method)
(trinity.common.workflows.Task method)
(trinity.common.workflows.WebShopWorkflow method)
(trinity.common.workflows.Workflow method)
(trinity.common.workflows.workflow.MathWorkflow method)
(trinity.common.workflows.workflow.MultiTurnWorkflow method)
(trinity.common.workflows.workflow.SimpleWorkflow method)
(trinity.common.workflows.workflow.Task method)
(trinity.common.workflows.workflow.Workflow method)
(trinity.explorer.Explorer method)
(trinity.explorer.explorer.Explorer method)
(trinity.explorer.scheduler.RunnerWrapper method)
(trinity.explorer.scheduler.Scheduler method)
(trinity.explorer.scheduler.TaskWrapper method)
(trinity.explorer.workflow_runner.Status method)
(trinity.explorer.workflow_runner.WorkflowRunner method)
(trinity.manager.CacheManager method)
(trinity.manager.config_manager.ConfigManager method)
(trinity.manager.config_registry.config_registry.ConfigRegistry method)
(trinity.manager.manager.CacheManager method)
(trinity.manager.Synchronizer method)
(trinity.manager.synchronizer.Synchronizer method)
(trinity.trainer.Trainer method)
(trinity.trainer.trainer.Trainer method)
(trinity.trainer.verl.dp_actor.DataParallelPPOActor method)
(trinity.trainer.verl.fsdp_checkpoint_manager.FSDPCheckpointManager method)
(trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
(trinity.trainer.verl.fsdp_workers.CriticWorker method)
(trinity.trainer.verl_trainer.VerlPPOTrainerWrapper method)
(trinity.utils.dlc_utils.ClusterStatus method)
(trinity.utils.log.NewLineFormatter method)
(trinity.utils.monitor.MlflowMonitor method)
(trinity.utils.monitor.Monitor method)
(trinity.utils.monitor.TensorboardMonitor method)
(trinity.utils.monitor.WandbMonitor method)
(trinity.utils.registry.Registry method)
(trinity.utils.timer.Timer method)
_ready_condition (trinity.manager.Synchronizer attribute)
(trinity.manager.synchronizer.Synchronizer attribute)
A
AbsFn (class in trinity.algorithm.kl_fn.kl_fn)
AccuracyReward (class in trinity.common.rewards)
(class in trinity.common.rewards.accuracy_reward)
acquire() (trinity.buffer.buffer_writer.BufferWriter method)
(trinity.buffer.BufferWriter method)
(trinity.buffer.ray_wrapper.DBWrapper method)
(trinity.buffer.ray_wrapper.FileWrapper method)
(trinity.buffer.ray_wrapper.QueueWrapper method)
(trinity.buffer.writer.file_writer.JSONWriter method)
(trinity.buffer.writer.queue_writer.QueueWriter method)
(trinity.buffer.writer.sql_writer.SQLWriter method)
action_mask (trinity.common.experience.Experience attribute)
action_masks (trinity.common.experience.Experiences attribute)
Actor (class in trinity.common.verl_config)
actor (trinity.common.verl_config.ActorRolloutRef attribute)
actor_grad_clip (trinity.common.config.TrainerConfig attribute)
actor_rollout_ref (trinity.common.verl_config.veRLConfig attribute)
ActorModel (class in trinity.common.verl_config)
ActorRolloutRef (class in trinity.common.verl_config)
ActorRolloutRefWorker (class in trinity.trainer.verl.fsdp_workers)
ADD (trinity.common.constants.OpType attribute)
add() (trinity.algorithm.add_strategy.add_strategy.AddStrategy method)
(trinity.algorithm.add_strategy.add_strategy.GroupAdvantageStrategy method)
(trinity.algorithm.add_strategy.add_strategy.RewardVarianceAddStrategy method)
(trinity.algorithm.add_strategy.AddStrategy method)
(trinity.algorithm.add_strategy.duplicate_add_strategy.DuplicateInformativeAddStrategy method)
(trinity.algorithm.add_strategy.DuplicateInformativeAddStrategy method)
(trinity.algorithm.add_strategy.RewardVarianceAddStrategy method)
(trinity.algorithm.add_strategy.step_wise_add_strategy.StepWiseGRPOStrategy method)
(trinity.algorithm.add_strategy.StepWiseGRPOStrategy method)
(trinity.algorithm.AddStrategy method)
add_module() (trinity.manager.Synchronizer method)
(trinity.manager.synchronizer.Synchronizer method)
add_strategy (trinity.common.config.AlgorithmConfig attribute)
add_strategy_args (trinity.common.config.AlgorithmConfig attribute)
AddStrategy (class in trinity.algorithm)
(class in trinity.algorithm.add_strategy)
(class in trinity.algorithm.add_strategy.add_strategy)
adv_estimator (trinity.common.verl_config.Algorithm attribute)
advantage_fn (trinity.common.config.AlgorithmConfig attribute)
advantage_fn_args (trinity.common.config.AlgorithmConfig attribute)
AdvantageFn (class in trinity.algorithm)
(class in trinity.algorithm.advantage_fn)
(class in trinity.algorithm.advantage_fn.advantage_fn)
advantages (trinity.common.experience.Experience attribute)
(trinity.common.experience.Experiences attribute)
agent_model_name (trinity.common.config.DataPipelineConfig attribute)
AgentScopeReactV2MathWorkflow (class in trinity.common.workflows)
AlfworldWorkflow (class in trinity.common.workflows)
Algorithm (class in trinity.common.verl_config)
algorithm (trinity.common.config.Config attribute)
(trinity.common.verl_config.veRLConfig attribute)
algorithm_type (trinity.common.config.AlgorithmConfig attribute)
(trinity.common.config.StorageConfig attribute)
AlgorithmConfig (class in trinity.common.config)
AlgorithmManager (class in trinity.algorithm.algorithm_manager)
AlgorithmType (class in trinity.algorithm)
(class in trinity.algorithm.algorithm)
api_server_ready() (trinity.common.models.vllm_model.vLLMRolloutModel method)
apply_kl_penalty_to_reward() (trinity.algorithm.kl_fn.kl_fn.DummyKLFn method)
(trinity.algorithm.kl_fn.kl_fn.KLFn method)
(trinity.algorithm.kl_fn.KLFn method)
(trinity.algorithm.KLFn method)
AsyncPriorityQueue (class in trinity.buffer.queue)
AsyncQueue (class in trinity.buffer.queue)
attention_masks (trinity.common.experience.Experiences attribute)
auxiliary_models (trinity.common.config.ExplorerConfig attribute)
B
balance_batch (trinity.common.verl_config.Trainer attribute)
Base (class in trinity.buffer.schema)
BaseFileReader (class in trinity.buffer.reader.file_reader)
batch (trinity.common.experience.EID attribute)
batch_id (trinity.common.workflows.Task attribute)
(trinity.common.workflows.workflow.Task attribute)
(trinity.explorer.scheduler.TaskWrapper attribute)
batch_size (trinity.common.config.BufferConfig attribute)
(trinity.common.experience.Experiences property)
beginner_mode() (trinity.manager.config_manager.ConfigManager method)
bench_on_latest_checkpoint (trinity.common.config.ExplorerConfig attribute)
benchmark() (trinity.explorer.Explorer method)
(trinity.explorer.explorer.Explorer method)
betas (trinity.common.verl_config.Optim attribute)
broadcast_advantages() (trinity.algorithm.add_strategy.step_wise_add_strategy.StepWiseGRPOStrategy method)
(trinity.algorithm.add_strategy.StepWiseGRPOStrategy method)
buffer (trinity.common.config.Config attribute)
(trinity.common.verl_config.veRLConfig attribute)
BufferConfig (class in trinity.common.config)
BufferReader (class in trinity.buffer)
(class in trinity.buffer.buffer_reader)
BufferWriter (class in trinity.buffer)
(class in trinity.buffer.buffer_writer)
bundle_indices (trinity.common.config.InferenceModelConfig attribute)
C
cache_dir (trinity.common.config.BufferConfig attribute)
(trinity.common.config.MonitorConfig attribute)
CacheManager (class in trinity.manager)
(class in trinity.manager.manager)
calculate_group_advantage() (trinity.algorithm.add_strategy.add_strategy.GroupAdvantageStrategy method)
(trinity.algorithm.add_strategy.add_strategy.GRPOAddStrategy method)
(trinity.algorithm.add_strategy.add_strategy.OPMDAddStrategy method)
(trinity.algorithm.add_strategy.correct_bias_add_strategy.CorrectBiasAddStrategy method)
(trinity.algorithm.add_strategy.CorrectBiasAddStrategy method)
(trinity.algorithm.add_strategy.GRPOAddStrategy method)
(trinity.algorithm.add_strategy.OPMDAddStrategy method)
(trinity.algorithm.add_strategy.step_wise_add_strategy.StepWiseGRPOStrategy method)
(trinity.algorithm.add_strategy.StepWiseGRPOStrategy method)
calculate_kl() (trinity.algorithm.kl_fn.kl_fn.AbsFn method)
(trinity.algorithm.kl_fn.kl_fn.DummyKLFn method)
(trinity.algorithm.kl_fn.kl_fn.K1Fn method)
(trinity.algorithm.kl_fn.kl_fn.K2Fn method)
(trinity.algorithm.kl_fn.kl_fn.K3Fn method)
(trinity.algorithm.kl_fn.kl_fn.KLFn method)
(trinity.algorithm.kl_fn.KLFn method)
(trinity.algorithm.KLFn method)
calculate_kl_loss() (trinity.algorithm.kl_fn.kl_fn.DummyKLFn method)
(trinity.algorithm.kl_fn.kl_fn.KLFn method)
(trinity.algorithm.kl_fn.KLFn method)
(trinity.algorithm.KLFn method)
calculate_metrics() (trinity.utils.monitor.Monitor method)
can_balance_batch (trinity.algorithm.algorithm.AlgorithmType attribute)
(trinity.algorithm.algorithm.DPOAlgorithm attribute)
(trinity.algorithm.algorithm.GRPOAlgorithm attribute)
(trinity.algorithm.algorithm.MIXAlgorithm attribute)
(trinity.algorithm.algorithm.OPMDAlgorithm attribute)
(trinity.algorithm.algorithm.PPOAlgorithm attribute)
(trinity.algorithm.algorithm.SFTAlgorithm attribute)
(trinity.algorithm.AlgorithmType attribute)
capacity (trinity.buffer.queue.AsyncPriorityQueue attribute)
(trinity.common.config.StorageConfig attribute)
CaseInsensitiveEnum (class in trinity.common.constants)
CaseInsensitiveEnumMeta (class in trinity.common.constants)
chat() (trinity.common.models.model.InferenceModel method)
(trinity.common.models.model.ModelWrapper method)
(trinity.common.models.vllm_model.vLLMRolloutModel method)
chat_async() (trinity.common.models.model.ModelWrapper method)
chat_template (trinity.common.config.FormatConfig attribute)
(trinity.common.config.InferenceModelConfig attribute)
CHATPAIR (trinity.common.constants.PromptType attribute)
check_and_update() (trinity.common.config.Config method)
check_auxiliary_models() (in module trinity.manager.config_registry.explorer_config_manager)
check_checkpoint_root_dir() (in module trinity.manager.config_registry.model_config_manager)
check_config() (trinity.algorithm.algorithm.AlgorithmType class method)
(trinity.algorithm.algorithm.DPOAlgorithm class method)
(trinity.algorithm.AlgorithmType class method)
check_experience_buffer_path() (in module trinity.manager.config_registry.buffer_config_manager)
check_model_path() (in module trinity.manager.config_registry.model_config_manager)
check_resume_from_path() (in module trinity.manager.config_registry.trainer_config_manager)
check_sft_warmup_dataset_path() (in module trinity.manager.config_registry.buffer_config_manager)
check_taskset_path() (in module trinity.manager.config_registry.buffer_config_manager)
check_tensor_parallel_size() (in module trinity.manager.config_registry.explorer_config_manager)
check_train_batch_size() (in module trinity.manager.config_registry.buffer_config_manager)
Checkpoint (class in trinity.common.verl_config)
CHECKPOINT (trinity.common.constants.SyncMethod attribute)
checkpoint (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
(trinity.common.verl_config.Ref attribute)
checkpoint_job_dir (trinity.common.config.Config attribute)
checkpoint_root_dir (trinity.common.config.Config attribute)
checkpoint_shard_counter (trinity.manager.Synchronizer attribute)
(trinity.manager.synchronizer.Synchronizer attribute)
chosen (trinity.buffer.schema.sql_schema.DPODataModel attribute)
(trinity.common.experience.Experience attribute)
chosen_key (trinity.common.config.FormatConfig attribute)
chosen_text (trinity.common.experience.Experience attribute)
clean_strategy (trinity.common.config.DataPipelineConfig attribute)
clear_optimizer_state() (trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
(trinity.trainer.verl.fsdp_workers.CriticWorker method)
clip_ratio (trinity.common.verl_config.Actor attribute)
cliprange_value (trinity.common.verl_config.Critic attribute)
close() (trinity.buffer.queue.AsyncPriorityQueue method)
(trinity.buffer.queue.AsyncQueue method)
(trinity.buffer.queue.QueueBuffer method)
(trinity.buffer.reader.file_reader.DummyProgressBar method)
(trinity.utils.monitor.MlflowMonitor method)
(trinity.utils.monitor.Monitor method)
(trinity.utils.monitor.TensorboardMonitor method)
(trinity.utils.monitor.WandbMonitor method)
cluster (trinity.common.config.Config attribute)
ClusterConfig (class in trinity.common.config)
ClusterStatus (class in trinity.utils.dlc_utils)
collect_experiences (trinity.common.config.ExplorerConfig attribute)
compute_advantage_in_trainer (trinity.algorithm.algorithm.AlgorithmType attribute)
(trinity.algorithm.algorithm.DPOAlgorithm attribute)
(trinity.algorithm.algorithm.GRPOAlgorithm attribute)
(trinity.algorithm.algorithm.MIXAlgorithm attribute)
(trinity.algorithm.algorithm.OPMDAlgorithm attribute)
(trinity.algorithm.algorithm.PPOAlgorithm attribute)
(trinity.algorithm.algorithm.SFTAlgorithm attribute)
(trinity.algorithm.AlgorithmType attribute)
compute_data_metrics() (in module trinity.trainer.verl.utils)
compute_log_prob() (trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
compute_overlong_penalty() (trinity.common.rewards.dapo_reward.MathDAPORewardFn method)
(trinity.common.rewards.MathDAPORewardFn method)
compute_ref_log_prob() (trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
compute_score() (in module trinity.utils.eval_utils)
compute_score_v0() (in module trinity.common.workflows.customized_toolcall_workflows)
compute_toolcall_reward() (in module trinity.common.workflows.customized_toolcall_workflows)
compute_values() (trinity.trainer.verl.fsdp_workers.CriticWorker method)
Config (class in trinity.common.config)
ConfigManager (class in trinity.manager.config_manager)
ConfigRegistry (class in trinity.manager.config_registry.config_registry)
ConstantMeta (class in trinity.algorithm.algorithm)
construct_prompt() (in module trinity.common.workflows.customized_toolcall_workflows)
consumed (trinity.buffer.schema.sql_schema.DPODataModel attribute)
(trinity.buffer.schema.sql_schema.ExperienceModel attribute)
(trinity.buffer.schema.sql_schema.SFTDataModel attribute)
continue_from_checkpoint (trinity.common.config.Config attribute)
convert_api_output_to_experience() (in module trinity.common.models.model)
convert_messages_to_experience() (trinity.common.models.model.InferenceModel method)
(trinity.common.models.model.ModelWrapper method)
(trinity.common.models.vllm_model.vLLMRolloutModel method)
convert_messages_to_experience_async() (trinity.common.models.model.ModelWrapper method)
convert_word_number() (in module trinity.utils.math_eval_utils)
CorrectBiasAddStrategy (class in trinity.algorithm.add_strategy)
(class in trinity.algorithm.add_strategy.correct_bias_add_strategy)
CountDownRewardFn (class in trinity.common.rewards)
(class in trinity.common.rewards.countdown_reward)
create_dynamic_table() (in module trinity.buffer.schema)
(in module trinity.buffer.schema.sql_schema)
create_inference_models() (in module trinity.common.models)
Critic (class in trinity.common.verl_config)
critic (trinity.common.verl_config.veRLConfig attribute)
critic_model_path (trinity.common.config.ModelConfig attribute)
critic_warmup (trinity.common.verl_config.Trainer attribute)
CriticModel (class in trinity.common.verl_config)
CriticWorker (class in trinity.trainer.verl.fsdp_workers)
custom_chat_template (trinity.common.config.ModelConfig attribute)
(trinity.common.verl_config.ActorModel attribute)
custom_fields (trinity.common.experience.Experiences attribute)
custom_reward_function (trinity.common.verl_config.veRLConfig attribute)
CustomField (class in trinity.common.experience)
CustomRewardFunction (class in trinity.common.verl_config)
D
Data (class in trinity.common.verl_config)
data (trinity.common.verl_config.veRLConfig attribute)
data_dist (trinity.common.config.DataPipelineConfig attribute)
data_processor (trinity.common.config.Config attribute)
data_processor_url (trinity.common.config.DataProcessorConfig attribute)
data_type (trinity.common.experience.CustomField attribute)
DataParallelPPOActor (class in trinity.trainer.verl.dp_actor)
DataPipelineConfig (class in trinity.common.config)
DataProcessorConfig (class in trinity.common.config)
DataProcessorPipelineType (class in trinity.common.constants)
DBWrapper (class in trinity.buffer.ray_wrapper)
DEFAULT (trinity.common.constants.ReadStrategy attribute)
DEFAULT_ANSWER_PARSER() (trinity.common.rewards.math_reward.MathRewardFn method)
(trinity.common.rewards.MathRewardFn method)
default_args() (trinity.algorithm.add_strategy.add_strategy.AddStrategy class method)
(trinity.algorithm.add_strategy.add_strategy.GRPOAddStrategy class method)
(trinity.algorithm.add_strategy.add_strategy.OPMDAddStrategy class method)
(trinity.algorithm.add_strategy.add_strategy.RewardVarianceAddStrategy class method)
(trinity.algorithm.add_strategy.AddStrategy class method)
(trinity.algorithm.add_strategy.correct_bias_add_strategy.CorrectBiasAddStrategy class method)
(trinity.algorithm.add_strategy.CorrectBiasAddStrategy class method)
(trinity.algorithm.add_strategy.duplicate_add_strategy.DuplicateInformativeAddStrategy class method)
(trinity.algorithm.add_strategy.DuplicateInformativeAddStrategy class method)
(trinity.algorithm.add_strategy.GRPOAddStrategy class method)
(trinity.algorithm.add_strategy.OPMDAddStrategy class method)
(trinity.algorithm.add_strategy.RewardVarianceAddStrategy class method)
(trinity.algorithm.add_strategy.step_wise_add_strategy.StepWiseGRPOStrategy class method)
(trinity.algorithm.add_strategy.StepWiseGRPOStrategy class method)
(trinity.algorithm.AddStrategy class method)
(trinity.algorithm.advantage_fn.advantage_fn.AdvantageFn class method)
(trinity.algorithm.advantage_fn.AdvantageFn class method)
(trinity.algorithm.advantage_fn.grpo_advantage.GRPOAdvantageFn class method)
(trinity.algorithm.advantage_fn.GRPOAdvantageFn class method)
(trinity.algorithm.advantage_fn.opmd_advantage.OPMDAdvantageFn class method)
(trinity.algorithm.advantage_fn.OPMDAdvantageFn class method)
(trinity.algorithm.advantage_fn.ppo_advantage.PPOAdvantageFn class method)
(trinity.algorithm.advantage_fn.PPOAdvantageFn class method)
(trinity.algorithm.advantage_fn.reinforce_plus_plus_advantage.REINFORCEPLUSPLUSAdvantageFn class method)
(trinity.algorithm.advantage_fn.REINFORCEPLUSPLUSAdvantageFn class method)
(trinity.algorithm.advantage_fn.remax_advantage.REMAXAdvantageFn class method)
(trinity.algorithm.advantage_fn.REMAXAdvantageFn class method)
(trinity.algorithm.advantage_fn.rloo_advantage.RLOOAdvantageFn class method)
(trinity.algorithm.advantage_fn.RLOOAdvantageFn class method)
(trinity.algorithm.AdvantageFn class method)
(trinity.algorithm.entropy_loss_fn.entropy_loss_fn.EntropyLossFn class method)
(trinity.algorithm.entropy_loss_fn.EntropyLossFn class method)
(trinity.algorithm.EntropyLossFn class method)
(trinity.algorithm.kl_fn.kl_fn.KLFn class method)
(trinity.algorithm.kl_fn.KLFn class method)
(trinity.algorithm.KLFn class method)
(trinity.algorithm.policy_loss_fn.dpo_loss.DPOLossFn class method)
(trinity.algorithm.policy_loss_fn.DPOLossFn class method)
(trinity.algorithm.policy_loss_fn.gspo_policy_loss.GSPOLossFn class method)
(trinity.algorithm.policy_loss_fn.GSPOLossFn class method)
(trinity.algorithm.policy_loss_fn.mix_policy_loss.MIXPolicyLossFn class method)
(trinity.algorithm.policy_loss_fn.MIXPolicyLossFn class method)
(trinity.algorithm.policy_loss_fn.opmd_policy_loss.OPMDPolicyLossFn class method)
(trinity.algorithm.policy_loss_fn.OPMDPolicyLossFn class method)
(trinity.algorithm.policy_loss_fn.policy_loss_fn.PolicyLossFn class method)
(trinity.algorithm.policy_loss_fn.PolicyLossFn class method)
(trinity.algorithm.policy_loss_fn.ppo_policy_loss.PPOPolicyLossFn class method)
(trinity.algorithm.policy_loss_fn.PPOPolicyLossFn class method)
(trinity.algorithm.policy_loss_fn.sft_loss.SFTLossFn class method)
(trinity.algorithm.policy_loss_fn.SFTLossFn class method)
(trinity.algorithm.PolicyLossFn class method)
(trinity.algorithm.sample_strategy.DefaultSampleStrategy class method)
(trinity.algorithm.sample_strategy.mix_sample_strategy.MixSampleStrategy class method)
(trinity.algorithm.sample_strategy.MixSampleStrategy class method)
(trinity.algorithm.sample_strategy.sample_strategy.DefaultSampleStrategy class method)
(trinity.algorithm.sample_strategy.sample_strategy.SampleStrategy class method)
(trinity.algorithm.sample_strategy.sample_strategy.WarmupSampleStrategy class method)
(trinity.algorithm.sample_strategy.SampleStrategy class method)
(trinity.algorithm.sample_strategy.WarmupSampleStrategy class method)
(trinity.algorithm.SampleStrategy class method)
(trinity.utils.monitor.MlflowMonitor class method)
(trinity.utils.monitor.Monitor class method)
(trinity.utils.monitor.WandbMonitor class method)
default_config (trinity.manager.config_registry.config_registry.ConfigRegistry property)
default_config() (trinity.algorithm.algorithm.AlgorithmType class method)
(trinity.algorithm.algorithm.DPOAlgorithm class method)
(trinity.algorithm.algorithm.GRPOAlgorithm class method)
(trinity.algorithm.algorithm.MIXAlgorithm class method)
(trinity.algorithm.algorithm.OPMDAlgorithm class method)
(trinity.algorithm.algorithm.PPOAlgorithm class method)
(trinity.algorithm.algorithm.SFTAlgorithm class method)
(trinity.algorithm.AlgorithmType class method)
default_eval_workflow_type (trinity.common.config.ExplorerInput attribute)
(trinity.common.config.StorageConfig attribute)
DEFAULT_FORMAT_PATTERN (trinity.common.rewards.math_reward.MathRewardFn attribute)
(trinity.common.rewards.MathRewardFn attribute)
default_hdfs_dir (trinity.common.verl_config.Trainer attribute)
default_local_dir (trinity.common.verl_config.Trainer attribute)
default_reward_fn_type (trinity.common.config.ExplorerInput attribute)
(trinity.common.config.StorageConfig attribute)
default_storage_path() (in module trinity.buffer.utils)
default_workflow_type (trinity.common.config.ExplorerInput attribute)
(trinity.common.config.StorageConfig attribute)
DefaultEntropyLossFn (class in trinity.algorithm.entropy_loss_fn.entropy_loss_fn)
DefaultSampleStrategy (class in trinity.algorithm.sample_strategy)
(class in trinity.algorithm.sample_strategy.sample_strategy)
del_local_ckpt_after_load (trinity.common.verl_config.Trainer attribute)
deserialize() (trinity.common.experience.Experience class method)
destination_field (trinity.common.experience.CustomField attribute)
DIV (trinity.common.constants.OpType attribute)
dj_config_path (trinity.common.config.DataPipelineConfig attribute)
dj_process_desc (trinity.common.config.DataPipelineConfig attribute)
DPO (trinity.common.experience.ExperienceType attribute)
DPOAlgorithm (class in trinity.algorithm.algorithm)
DPODataModel (class in trinity.buffer.schema.sql_schema)
DPODataReader (class in trinity.buffer.reader.file_reader)
DPOLossFn (class in trinity.algorithm.policy_loss_fn)
(class in trinity.algorithm.policy_loss_fn.dpo_loss)
dtype (trinity.common.config.InferenceModelConfig attribute)
DummyEntropyLossFn (class in trinity.algorithm.entropy_loss_fn.entropy_loss_fn)
DummyKLFn (class in trinity.algorithm.kl_fn.kl_fn)
DummyProgressBar (class in trinity.buffer.reader.file_reader)
DuplicateInformativeAddStrategy (class in trinity.algorithm.add_strategy)
(class in trinity.algorithm.add_strategy.duplicate_add_strategy)
DYNAMIC_BY_EXPLORER (trinity.common.constants.SyncStyle attribute)
DYNAMIC_BY_TRAINER (trinity.common.constants.SyncStyle attribute)
E
EID (class in trinity.common.experience)
eid (trinity.common.experience.Experience attribute)
eids (trinity.common.experience.Experiences attribute)
empty_experiences() (in module trinity.common.experience)
enable (trinity.common.verl_config.RewardModel attribute)
enable_auto_tool_choice (trinity.common.config.InferenceModelConfig attribute)
enable_chunked_prefill (trinity.common.config.InferenceModelConfig attribute)
enable_exp_buffer (trinity.common.verl_config.Trainer attribute)
enable_gradient_checkpointing (trinity.common.verl_config.ActorModel attribute)
(trinity.common.verl_config.CriticModel attribute)
enable_history (trinity.common.config.InferenceModelConfig attribute)
enable_openai_api (trinity.common.config.InferenceModelConfig attribute)
enable_prefix_caching (trinity.common.config.InferenceModelConfig attribute)
enable_preview (trinity.common.config.TrainerConfig attribute)
(trinity.common.verl_config.veRLConfig attribute)
enable_progress_bar (trinity.common.config.StorageConfig attribute)
enable_ray_timeline (trinity.common.config.MonitorConfig attribute)
enable_thinking (trinity.common.config.InferenceModelConfig attribute)
enforce_eager (trinity.common.config.InferenceModelConfig attribute)
engine_num (trinity.common.config.InferenceModelConfig attribute)
engine_type (trinity.common.config.InferenceModelConfig attribute)
entropy_checkpointing (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Ref attribute)
entropy_coeff (trinity.common.verl_config.Actor attribute)
entropy_from_logits_with_chunking (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Ref attribute)
entropy_loss_fn (trinity.common.config.AlgorithmConfig attribute)
entropy_loss_fn_args (trinity.common.config.AlgorithmConfig attribute)
EntropyLossFn (class in trinity.algorithm)
(class in trinity.algorithm.entropy_loss_fn)
(class in trinity.algorithm.entropy_loss_fn.entropy_loss_fn)
env_vars (trinity.common.config.ExplorerConfig attribute)
EVAL (trinity.common.constants.TaskType attribute)
eval() (trinity.explorer.Explorer method)
(trinity.explorer.explorer.Explorer method)
eval_interval (trinity.common.config.ExplorerConfig attribute)
eval_on_startup (trinity.common.config.ExplorerConfig attribute)
eval_tasksets (trinity.common.config.ExplorerInput attribute)
evaluate_equation() (in module trinity.utils.eval_utils)
Experience (class in trinity.common.experience)
EXPERIENCE (trinity.common.constants.DataProcessorPipelineType attribute)
experience_buffer (trinity.common.config.TrainerInput attribute)
experience_pipeline (trinity.common.config.DataProcessorConfig attribute)
experience_type (trinity.common.experience.Experience attribute)
ExperienceModel (class in trinity.buffer.schema.sql_schema)
Experiences (class in trinity.common.experience)
ExperienceType (class in trinity.common.experience)
experiment_name (trinity.common.verl_config.Trainer attribute)
expert_mode() (trinity.manager.config_manager.ConfigManager method)
EXPLORE (trinity.common.constants.TaskType attribute)
explore() (trinity.explorer.Explorer method)
(trinity.explorer.explorer.Explorer method)
explore_step() (trinity.explorer.Explorer method)
(trinity.explorer.explorer.Explorer method)
Explorer (class in trinity.explorer)
(class in trinity.explorer.explorer)
explorer (trinity.common.config.Config attribute)
explorer_input (trinity.common.config.BufferConfig attribute)
explorer_name (trinity.common.verl_config.ActorRolloutRef attribute)
explorer_output (trinity.common.config.BufferConfig attribute)
explorer_status_counts (trinity.manager.Synchronizer attribute)
(trinity.manager.synchronizer.Synchronizer attribute)
explorer_visible() (in module trinity.manager.config_registry.explorer_config_manager)
explorer_world_size (trinity.common.config.SynchronizerConfig attribute)
ExplorerConfig (class in trinity.common.config)
ExplorerInput (class in trinity.common.config)
external_lib (trinity.common.verl_config.ActorModel attribute)
(trinity.common.verl_config.CriticModel attribute)
extract_answer() (in module trinity.utils.math_eval_utils)
extract_experience_from_history() (trinity.common.models.model.ModelWrapper method)
extract_logprobs() (in module trinity.common.models.model)
extract_solution() (in module trinity.utils.eval_utils)
extract_solution_v0() (in module trinity.common.workflows.customized_toolcall_workflows)
F
FIFO (trinity.common.constants.ReadStrategy attribute)
FILE (trinity.common.constants.StorageType attribute)
FileWrapper (class in trinity.buffer.ray_wrapper)
find_boxed_answer() (in module trinity.utils.eval_utils)
finish() (trinity.utils.dlc_utils.ClusterStatus method)
fix_a_slash_b() (in module trinity.utils.math_eval_utils)
fix_fracs() (in module trinity.utils.math_eval_utils)
fix_sqrt() (in module trinity.utils.math_eval_utils)
FIXED (trinity.common.constants.SyncStyle attribute)
flatten() (trinity.common.config.Config method)
format (trinity.common.config.DataPipelineConfig attribute)
(trinity.common.config.StorageConfig attribute)
format() (trinity.utils.log.NewLineFormatter method)
format_args (trinity.common.workflows.Task attribute)
(trinity.common.workflows.workflow.Task attribute)
format_messages() (trinity.common.workflows.eval_workflow.MathEvalWorkflow method)
(trinity.common.workflows.MathEvalWorkflow method)
(trinity.common.workflows.SimpleWorkflow method)
(trinity.common.workflows.workflow.SimpleWorkflow method)
format_prompt() (trinity.common.workflows.customized_math_workflows.MathBoxedWorkflow method)
(trinity.common.workflows.customized_toolcall_workflows.ToolCallWorkflow method)
(trinity.common.workflows.MathBoxedWorkflow method)
(trinity.common.workflows.ToolCallWorkflow method)
FormatConfig (class in trinity.common.config)
FormatReward (class in trinity.common.rewards)
(class in trinity.common.rewards.format_reward)
forward_max_token_len_per_gpu (trinity.common.verl_config.Critic attribute)
(trinity.common.verl_config.RewardModel attribute)
forward_micro_batch_size (trinity.common.verl_config.Critic attribute)
forward_micro_batch_size_per_gpu (trinity.common.verl_config.Critic attribute)
forward_prefetch (trinity.common.verl_config.FSDPConfig attribute)
from_experience() (trinity.buffer.schema.sql_schema.ExperienceModel class method)
from_messages() (trinity.buffer.schema.sql_schema.SFTDataModel class method)
from_trinity() (trinity.algorithm.key_mapper.KeyMapper method)
fsdp_config (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.CriticModel attribute)
(trinity.common.verl_config.Ref attribute)
fsdp_size (trinity.common.verl_config.FSDPConfig attribute)
FSDPCheckpointManager (class in trinity.trainer.verl.fsdp_checkpoint_manager)
FSDPConfig (class in trinity.common.verl_config)
fused_kernel_options (trinity.common.verl_config.ActorModel attribute)
FusedKernelOptions (class in trinity.common.verl_config)
G
gamma (trinity.common.verl_config.Algorithm attribute)
gather() (trinity.common.experience.Experience class method)
gather_action_masks() (in module trinity.common.experience)
gather_advantages() (in module trinity.common.experience)
gather_attention_masks() (in module trinity.common.experience)
gather_experiences() (trinity.common.experience.Experiences class method)
gather_logprobs() (in module trinity.common.experience)
gather_metrics() (in module trinity.utils.monitor)
gather_returns() (in module trinity.common.experience)
gather_token_ids() (in module trinity.common.experience)
generate() (trinity.common.models.model.InferenceModel method)
(trinity.common.models.model.ModelWrapper method)
(trinity.common.models.vllm_model.vLLMRolloutModel method)
generate_async() (trinity.common.models.model.ModelWrapper method)
generate_config() (trinity.manager.config_manager.ConfigManager method)
generate_env_inference_samples() (trinity.common.workflows.AlfworldWorkflow method)
(trinity.common.workflows.SciWorldWorkflow method)
(trinity.common.workflows.WebShopWorkflow method)
GenerationConfig (class in trinity.common.config)
get() (trinity.buffer.queue.AsyncPriorityQueue method)
(trinity.buffer.queue.QueueBuffer method)
(trinity.manager.config_registry.config_registry.ConfigRegistry method)
(trinity.utils.registry.Registry method)
get_actor() (trinity.manager.Synchronizer class method)
(trinity.manager.synchronizer.Synchronizer class method)
get_available_address() (trinity.common.models.model.InferenceModel method)
get_batch() (trinity.buffer.ray_wrapper.QueueWrapper method)
get_buffer_reader() (in module trinity.buffer)
(in module trinity.buffer.buffer)
get_buffer_writer() (in module trinity.buffer)
(in module trinity.buffer.buffer)
get_check_func() (trinity.manager.config_registry.config_registry.ConfigRegistry method)
get_checkpoint_dir_with_step_num() (in module trinity.common.models.utils)
get_configs() (trinity.manager.config_manager.ConfigManager method)
(trinity.manager.config_registry.config_registry.ConfigRegistry method)
get_current_algorithm_config() (trinity.algorithm.algorithm_manager.AlgorithmManager method)
get_dlc_env_vars() (in module trinity.utils.dlc_utils)
get_explorer_status_counts() (trinity.manager.Synchronizer method)
(trinity.manager.synchronizer.Synchronizer method)
get_logger() (in module trinity.utils.log)
get_model_response() (trinity.common.workflows.AlfworldWorkflow method)
(trinity.common.workflows.SciWorldWorkflow method)
(trinity.common.workflows.WebShopWorkflow method)
get_model_response_text() (trinity.common.workflows.AlfworldWorkflow method)
(trinity.common.workflows.SciWorldWorkflow method)
(trinity.common.workflows.WebShopWorkflow method)
get_model_state_dict() (trinity.manager.Synchronizer method)
(trinity.manager.synchronizer.Synchronizer method)
get_model_version() (trinity.common.models.model.InferenceModel method)
(trinity.common.models.vllm_model.vLLMRolloutModel method)
get_openai_client() (trinity.common.models.model.ModelWrapper method)
get_queue() (trinity.buffer.queue.QueueBuffer class method)
get_results() (trinity.explorer.scheduler.Scheduler method)
get_state_dict_meta() (trinity.manager.Synchronizer method)
(trinity.manager.synchronizer.Synchronizer method)
get_trainer_status() (trinity.manager.Synchronizer method)
(trinity.manager.synchronizer.Synchronizer method)
get_trainer_wrapper() (in module trinity.trainer)
(in module trinity.trainer.trainer)
get_verl_checkpoint_info() (in module trinity.common.models.utils)
get_weight() (trinity.explorer.Explorer method)
(trinity.explorer.explorer.Explorer method)
get_wrapper() (trinity.buffer.ray_wrapper.DBWrapper class method)
(trinity.buffer.ray_wrapper.FileWrapper class method)
(trinity.buffer.ray_wrapper.QueueWrapper class method)
gpu_memory_utilization (trinity.common.config.InferenceModelConfig attribute)
gpu_per_node (trinity.common.config.ClusterConfig attribute)
grad_clip (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
group (trinity.common.config.Config attribute)
group_by() (in module trinity.algorithm.add_strategy.add_strategy)
group_experiences() (trinity.algorithm.add_strategy.add_strategy.GroupAdvantageStrategy method)
(trinity.algorithm.add_strategy.add_strategy.GRPOAddStrategy method)
(trinity.algorithm.add_strategy.add_strategy.OPMDAddStrategy method)
(trinity.algorithm.add_strategy.GRPOAddStrategy method)
(trinity.algorithm.add_strategy.OPMDAddStrategy method)
group_name (trinity.common.verl_config.Trainer attribute)
GroupAdvantageStrategy (class in trinity.algorithm.add_strategy.add_strategy)
GRPOAddStrategy (class in trinity.algorithm.add_strategy)
(class in trinity.algorithm.add_strategy.add_strategy)
GRPOAdvantageFn (class in trinity.algorithm.advantage_fn)
(class in trinity.algorithm.advantage_fn.grpo_advantage)
GRPOAlgorithm (class in trinity.algorithm.algorithm)
GSPOLossFn (class in trinity.algorithm.policy_loss_fn)
(class in trinity.algorithm.policy_loss_fn.gspo_policy_loss)
H
has_api_server() (trinity.common.models.vllm_model.vLLMRolloutModel method)
has_step() (trinity.explorer.scheduler.Scheduler method)
horizon (trinity.common.verl_config.KL_Ctrl attribute)
hybrid_engine (trinity.common.verl_config.ActorRolloutRef attribute)
I
id (trinity.buffer.schema.sql_schema.DPODataModel attribute)
(trinity.buffer.schema.sql_schema.ExperienceModel attribute)
(trinity.buffer.schema.sql_schema.SFTDataModel attribute)
(trinity.buffer.schema.sql_schema.TaskModel attribute)
ignore_keys (trinity.algorithm.policy_loss_fn.policy_loss_fn.PolicyLossFnMeta attribute)
impl_backend (trinity.common.verl_config.FusedKernelOptions attribute)
index (trinity.common.config.StorageConfig attribute)
InferenceModel (class in trinity.common.models.model)
InferenceModelConfig (class in trinity.common.config)
info (trinity.common.experience.Experience attribute)
init_model() (trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
(trinity.trainer.verl.fsdp_workers.CriticWorker method)
init_process_group() (in module trinity.utils.distributed)
(trinity.common.models.vllm_model.vLLMRolloutModel method)
(trinity.common.models.vllm_worker.WorkerExtension method)
init_workers() (trinity.trainer.verl_trainer.VerlPPOTrainerWrapper method)
input_buffers (trinity.common.config.DataPipelineConfig attribute)
is_alive() (trinity.explorer.Explorer method)
(trinity.explorer.explorer.Explorer method)
(trinity.explorer.workflow_runner.WorkflowRunner method)
(trinity.trainer.Trainer method)
(trinity.trainer.trainer.Trainer method)
is_database_url() (in module trinity.buffer.ray_wrapper)
is_equiv() (in module trinity.utils.eval_utils)
is_eval (trinity.common.workflows.Task attribute)
(trinity.common.workflows.workflow.Task attribute)
is_ipv6_address() (in module trinity.utils.distributed)
is_json_file() (in module trinity.buffer.ray_wrapper)
is_running() (in module trinity.utils.dlc_utils)
J
JSONWriter (class in trinity.buffer.writer.file_writer)
K
K1Fn (class in trinity.algorithm.kl_fn.kl_fn)
K2Fn (class in trinity.algorithm.kl_fn.kl_fn)
K3Fn (class in trinity.algorithm.kl_fn.kl_fn)
KeyMapper (class in trinity.algorithm.key_mapper)
kl_coef (trinity.common.verl_config.KL_Ctrl attribute)
KL_Ctrl (class in trinity.common.verl_config)
kl_ctrl (trinity.common.verl_config.Algorithm attribute)
kl_loss_coef (trinity.common.verl_config.Actor attribute)
kl_loss_fn (trinity.common.config.AlgorithmConfig attribute)
kl_loss_fn_args (trinity.common.config.AlgorithmConfig attribute)
kl_loss_type (trinity.common.verl_config.Actor attribute)
kl_penalty (trinity.common.verl_config.Algorithm attribute)
kl_penalty_fn (trinity.common.config.AlgorithmConfig attribute)
kl_penalty_fn_args (trinity.common.config.AlgorithmConfig attribute)
KLFn (class in trinity.algorithm)
(class in trinity.algorithm.kl_fn)
(class in trinity.algorithm.kl_fn.kl_fn)
L
label_key (trinity.common.config.FormatConfig attribute)
lam (trinity.common.verl_config.Algorithm attribute)
last_boxed_only_string() (in module trinity.utils.eval_utils)
length() (trinity.buffer.ray_wrapper.QueueWrapper method)
LFU (trinity.common.constants.ReadStrategy attribute)
linear_decay_priority() (in module trinity.buffer.queue)
list() (trinity.utils.registry.Registry method)
load_checkpoint() (trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
(trinity.trainer.verl.fsdp_workers.CriticWorker method)
load_config() (in module trinity.common.config)
(in module trinity.common.verl_config)
load_contents (trinity.common.verl_config.Checkpoint attribute)
load_explorer() (trinity.manager.CacheManager method)
(trinity.manager.manager.CacheManager method)
load_from_file() (in module trinity.utils.plugin_loader)
load_plugins() (in module trinity.utils.plugin_loader)
load_state_dict() (in module trinity.common.models.utils)
load_state_dict_from_verl_checkpoint() (in module trinity.common.models.utils)
load_trainer() (trinity.manager.CacheManager method)
(trinity.manager.manager.CacheManager method)
log() (trinity.utils.monitor.MlflowMonitor method)
(trinity.utils.monitor.Monitor method)
(trinity.utils.monitor.TensorboardMonitor method)
(trinity.utils.monitor.WandbMonitor method)
log_prob_max_token_len_per_gpu (trinity.common.verl_config.Ref attribute)
log_prob_micro_batch_size (trinity.common.verl_config.Ref attribute)
(trinity.common.verl_config.Rollout attribute)
log_prob_micro_batch_size_per_gpu (trinity.common.verl_config.Ref attribute)
(trinity.common.verl_config.Rollout attribute)
log_prob_use_dynamic_bsz (trinity.common.verl_config.Ref attribute)
log_table() (trinity.utils.monitor.MlflowMonitor method)
(trinity.utils.monitor.Monitor method)
(trinity.utils.monitor.TensorboardMonitor method)
(trinity.utils.monitor.WandbMonitor method)
logger (trinity.common.verl_config.Trainer attribute)
logprobs (trinity.common.config.GenerationConfig attribute)
(trinity.common.experience.Experience attribute)
(trinity.common.experience.Experiences attribute)
logprobs() (trinity.common.models.model.InferenceModel method)
(trinity.common.models.model.ModelWrapper method)
(trinity.common.models.vllm_model.vLLMRolloutModel method)
logprobs_async() (trinity.common.models.model.ModelWrapper method)
loss_agg_mode (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
lr (trinity.common.verl_config.Optim attribute)
lr_warmup_steps (trinity.common.verl_config.Optim attribute)
lr_warmup_steps_ratio (trinity.common.verl_config.Optim attribute)
LRU (trinity.common.constants.ReadStrategy attribute)
M
maintain_session_state() (trinity.manager.config_manager.ConfigManager method)
masked_mean() (in module trinity.algorithm.utils)
masked_sum() (in module trinity.algorithm.utils)
masked_var() (in module trinity.algorithm.utils)
masked_whiten() (in module trinity.algorithm.utils)
math_equal() (in module trinity.utils.math_eval_utils)
MathBoxedRewardFn (class in trinity.common.rewards)
(class in trinity.common.rewards.math_reward)
MathBoxedWorkflow (class in trinity.common.workflows)
(class in trinity.common.workflows.customized_math_workflows)
MathDAPORewardFn (class in trinity.common.rewards)
(class in trinity.common.rewards.dapo_reward)
MathEvalWorkflow (class in trinity.common.workflows)
(class in trinity.common.workflows.eval_workflow)
MathRewardFn (class in trinity.common.rewards)
(class in trinity.common.rewards.math_reward)
MathRMWorkflow (class in trinity.common.workflows)
(class in trinity.common.workflows.math_rm_workflow)
MathWorkflow (class in trinity.common.workflows)
(class in trinity.common.workflows.workflow)
max_actor_ckpt_to_keep (trinity.common.verl_config.Trainer attribute)
max_critic_ckpt_to_keep (trinity.common.verl_config.Trainer attribute)
max_length (trinity.common.verl_config.RewardModel attribute)
max_model_len (trinity.common.config.InferenceModelConfig attribute)
(trinity.common.config.ModelConfig attribute)
max_prompt_tokens (trinity.common.config.InferenceModelConfig attribute)
(trinity.common.config.ModelConfig attribute)
max_read_timeout (trinity.common.config.StorageConfig attribute)
max_repeat_times_per_runner (trinity.common.config.ExplorerConfig attribute)
max_response_tokens (trinity.common.config.InferenceModelConfig attribute)
(trinity.common.config.ModelConfig attribute)
max_retry_interval (trinity.common.config.BufferConfig attribute)
max_retry_times (trinity.common.config.BufferConfig attribute)
(trinity.common.config.ExplorerConfig attribute)
max_step_num (trinity.common.workflows.step_wise_workflow.RewardPropagationWorkflow property)
(trinity.common.workflows.step_wise_workflow.StepWiseRewardWorkflow property)
(trinity.common.workflows.StepWiseAlfworldWorkflow property)
max_timeout (trinity.common.config.ExplorerConfig attribute)
MEMORY (trinity.common.constants.SyncMethod attribute)
merge_by_placement() (in module trinity.common.models.utils)
message (trinity.explorer.workflow_runner.Status attribute)
messages (trinity.buffer.schema.sql_schema.SFTDataModel attribute)
MESSAGES (trinity.common.constants.PromptType attribute)
messages (trinity.common.experience.Experience attribute)
messages_key (trinity.common.config.FormatConfig attribute)
metadata (trinity.buffer.schema.Base attribute)
metric (trinity.explorer.workflow_runner.Status attribute)
metrics (trinity.common.experience.Experience attribute)
micro_batch_size_per_gpu (trinity.common.verl_config.RewardModel attribute)
min_lr_ratio (trinity.common.verl_config.Optim attribute)
min_num_params (trinity.common.verl_config.FSDPConfig attribute)
(trinity.common.verl_config.WrapPolicy attribute)
min_priority_score (trinity.common.config.DataPipelineConfig attribute)
min_size_ratio (trinity.common.config.DataPipelineConfig attribute)
MIXAlgorithm (class in trinity.algorithm.algorithm)
MIXPolicyLossFn (class in trinity.algorithm.policy_loss_fn)
(class in trinity.algorithm.policy_loss_fn.mix_policy_loss)
MixSampleStrategy (class in trinity.algorithm.sample_strategy)
(class in trinity.algorithm.sample_strategy.mix_sample_strategy)
MlflowMonitor (class in trinity.utils.monitor)
mode (trinity.common.config.Config attribute)
model (trinity.common.config.Config attribute)
(trinity.common.verl_config.ActorRolloutRef attribute)
(trinity.common.verl_config.Critic attribute)
(trinity.common.verl_config.RewardModel attribute)
model_path (trinity.common.config.InferenceModelConfig attribute)
(trinity.common.config.ModelConfig attribute)
model_state_dict (trinity.manager.Synchronizer attribute)
(trinity.manager.synchronizer.Synchronizer attribute)
model_version (trinity.common.models.model.ModelWrapper property)
(trinity.manager.Synchronizer attribute)
(trinity.manager.synchronizer.Synchronizer attribute)
ModelConfig (class in trinity.common.config)
ModelWrapper (class in trinity.common.models.model)
module
trinity
trinity.algorithm
trinity.algorithm.add_strategy
trinity.algorithm.add_strategy.add_strategy
trinity.algorithm.add_strategy.correct_bias_add_strategy
trinity.algorithm.add_strategy.duplicate_add_strategy
trinity.algorithm.add_strategy.step_wise_add_strategy
trinity.algorithm.advantage_fn
trinity.algorithm.advantage_fn.advantage_fn
trinity.algorithm.advantage_fn.grpo_advantage
trinity.algorithm.advantage_fn.opmd_advantage
trinity.algorithm.advantage_fn.ppo_advantage
trinity.algorithm.advantage_fn.reinforce_plus_plus_advantage
trinity.algorithm.advantage_fn.remax_advantage
trinity.algorithm.advantage_fn.rloo_advantage
trinity.algorithm.algorithm
trinity.algorithm.algorithm_manager
trinity.algorithm.entropy_loss_fn
trinity.algorithm.entropy_loss_fn.entropy_loss_fn
trinity.algorithm.key_mapper
trinity.algorithm.kl_fn
trinity.algorithm.kl_fn.kl_fn
trinity.algorithm.policy_loss_fn
trinity.algorithm.policy_loss_fn.dpo_loss
trinity.algorithm.policy_loss_fn.gspo_policy_loss
trinity.algorithm.policy_loss_fn.mix_policy_loss
trinity.algorithm.policy_loss_fn.opmd_policy_loss
trinity.algorithm.policy_loss_fn.policy_loss_fn
trinity.algorithm.policy_loss_fn.ppo_policy_loss
trinity.algorithm.policy_loss_fn.sft_loss
trinity.algorithm.sample_strategy
trinity.algorithm.sample_strategy.mix_sample_strategy
trinity.algorithm.sample_strategy.sample_strategy
trinity.algorithm.sample_strategy.utils
trinity.algorithm.utils
trinity.buffer
trinity.buffer.buffer
trinity.buffer.buffer_reader
trinity.buffer.buffer_writer
trinity.buffer.queue
trinity.buffer.ray_wrapper
trinity.buffer.reader
trinity.buffer.reader.file_reader
trinity.buffer.reader.queue_reader
trinity.buffer.reader.sql_reader
trinity.buffer.schema
trinity.buffer.schema.sql_schema
trinity.buffer.utils
trinity.buffer.writer
trinity.buffer.writer.file_writer
trinity.buffer.writer.queue_writer
trinity.buffer.writer.sql_writer
trinity.common
trinity.common.config
trinity.common.constants
trinity.common.experience
trinity.common.models
trinity.common.models.model
trinity.common.models.utils
trinity.common.models.vllm_model
trinity.common.models.vllm_worker
trinity.common.rewards
trinity.common.rewards.accuracy_reward
trinity.common.rewards.agents_reward
trinity.common.rewards.countdown_reward
trinity.common.rewards.dapo_reward
trinity.common.rewards.format_reward
trinity.common.rewards.human_reward
trinity.common.rewards.math_reward
trinity.common.rewards.reward_fn
trinity.common.rewards.tool_reward
trinity.common.rewards.utils
trinity.common.verl_config
trinity.common.workflows
trinity.common.workflows.customized_math_workflows
trinity.common.workflows.customized_toolcall_workflows
trinity.common.workflows.eval_workflow
trinity.common.workflows.math_rm_workflow
trinity.common.workflows.step_wise_workflow
trinity.common.workflows.workflow
trinity.explorer
trinity.explorer.explorer
trinity.explorer.scheduler
trinity.explorer.workflow_runner
trinity.manager
trinity.manager.config_manager
trinity.manager.config_registry
trinity.manager.config_registry.algorithm_config_manager
trinity.manager.config_registry.buffer_config_manager
trinity.manager.config_registry.config_registry
trinity.manager.config_registry.explorer_config_manager
trinity.manager.config_registry.model_config_manager
trinity.manager.config_registry.trainer_config_manager
trinity.manager.manager
trinity.manager.synchronizer
trinity.plugins
trinity.trainer
trinity.trainer.trainer
trinity.trainer.verl
trinity.trainer.verl.dp_actor
trinity.trainer.verl.fsdp_checkpoint_manager
trinity.trainer.verl.fsdp_workers
trinity.trainer.verl.utils
trinity.trainer.verl_trainer
trinity.utils
trinity.utils.distributed
trinity.utils.dlc_utils
trinity.utils.eval_utils
trinity.utils.log
trinity.utils.math_eval_utils
trinity.utils.monitor
trinity.utils.plugin_loader
trinity.utils.registry
trinity.utils.timer
modules (trinity.utils.registry.Registry property)
Monitor (class in trinity.utils.monitor)
monitor (trinity.common.config.Config attribute)
monitor_args (trinity.common.config.MonitorConfig attribute)
monitor_type (trinity.common.config.MonitorConfig attribute)
MonitorConfig (class in trinity.common.config)
MonitorType (class in trinity.common.constants)
MUL (trinity.common.constants.OpType attribute)
MULTI_TURN (trinity.common.experience.ExperienceType attribute)
multi_turn (trinity.common.verl_config.Rollout attribute)
MultiTurnWorkflow (class in trinity.common.workflows.workflow)
N
n (trinity.common.config.GenerationConfig attribute)
(trinity.common.verl_config.Rollout attribute)
n_gpus_per_node (trinity.common.verl_config.Trainer attribute)
name (trinity.common.config.Config attribute)
(trinity.common.config.ExplorerConfig attribute)
(trinity.common.config.StorageConfig attribute)
(trinity.common.config.TrainerConfig attribute)
(trinity.common.verl_config.CustomRewardFunction attribute)
(trinity.utils.registry.Registry property)
name() (trinity.algorithm.algorithm.AlgorithmType class method)
(trinity.algorithm.AlgorithmType class method)
NCCL (trinity.common.constants.SyncMethod attribute)
need_eval() (trinity.explorer.Explorer method)
(trinity.explorer.explorer.Explorer method)
need_save() (trinity.algorithm.algorithm_manager.AlgorithmManager method)
need_sync() (trinity.explorer.Explorer method)
(trinity.explorer.explorer.Explorer method)
(trinity.trainer.Trainer method)
(trinity.trainer.trainer.Trainer method)
NewLineFormatter (class in trinity.utils.log)
nnodes (trinity.common.verl_config.Trainer attribute)
node_num (trinity.common.config.ClusterConfig attribute)
norm_adv_by_std_in_grpo (trinity.common.verl_config.Algorithm attribute)
numeric_equal() (in module trinity.utils.math_eval_utils)
O
ok (trinity.explorer.workflow_runner.Status attribute)
op_type (trinity.common.config.RewardShapingConfig attribute)
OPMDAddStrategy (class in trinity.algorithm.add_strategy)
(class in trinity.algorithm.add_strategy.add_strategy)
OPMDAdvantageFn (class in trinity.algorithm.advantage_fn)
(class in trinity.algorithm.advantage_fn.opmd_advantage)
OPMDAlgorithm (class in trinity.algorithm.algorithm)
OPMDPolicyLossFn (class in trinity.algorithm.policy_loss_fn)
(class in trinity.algorithm.policy_loss_fn.opmd_policy_loss)
Optim (class in trinity.common.verl_config)
optim (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
optimizer_offload (trinity.common.verl_config.FSDPConfig attribute)
OpType (class in trinity.common.constants)
output_buffer (trinity.common.config.DataPipelineConfig attribute)
override_config (trinity.common.verl_config.ActorModel attribute)
(trinity.common.verl_config.CriticModel attribute)
P
pad_token_id (trinity.common.config.BufferConfig attribute)
param_offload (trinity.common.verl_config.FSDPConfig attribute)
path (trinity.common.config.StorageConfig attribute)
(trinity.common.verl_config.ActorModel attribute)
(trinity.common.verl_config.CriticModel attribute)
(trinity.common.verl_config.CustomRewardFunction attribute)
PLAINTEXT (trinity.common.constants.PromptType attribute)
policy_loss_fn (trinity.common.config.AlgorithmConfig attribute)
policy_loss_fn_args (trinity.common.config.AlgorithmConfig attribute)
PolicyLossFn (class in trinity.algorithm)
(class in trinity.algorithm.policy_loss_fn)
(class in trinity.algorithm.policy_loss_fn.policy_loss_fn)
PolicyLossFnMeta (class in trinity.algorithm.policy_loss_fn.policy_loss_fn)
ppo_epochs (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
ppo_max_token_len_per_gpu (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
ppo_micro_batch_size (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
ppo_micro_batch_size_per_gpu (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
ppo_mini_batch_size (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
PPOAdvantageFn (class in trinity.algorithm.advantage_fn)
(class in trinity.algorithm.advantage_fn.ppo_advantage)
PPOAlgorithm (class in trinity.algorithm.algorithm)
PPOPolicyLossFn (class in trinity.algorithm.policy_loss_fn)
(class in trinity.algorithm.policy_loss_fn.ppo_policy_loss)
prefix_metrics() (in module trinity.algorithm.utils)
prepare() (trinity.explorer.Explorer method)
(trinity.explorer.explorer.Explorer method)
(trinity.trainer.TrainEngineWrapper method)
(trinity.trainer.Trainer method)
(trinity.trainer.trainer.TrainEngineWrapper method)
(trinity.trainer.trainer.Trainer method)
(trinity.trainer.verl_trainer.VerlPPOTrainerWrapper method)
priority (trinity.buffer.schema.sql_schema.ExperienceModel attribute)
PRIORITY (trinity.common.constants.ReadStrategy attribute)
priority_fn (trinity.buffer.queue.AsyncPriorityQueue attribute)
priority_groups (trinity.buffer.queue.AsyncPriorityQueue attribute)
priority_weights (trinity.common.config.DataPipelineConfig attribute)
process_messages_to_experience() (trinity.common.workflows.workflow.MultiTurnWorkflow method)
project (trinity.common.config.Config attribute)
project_name (trinity.common.verl_config.Trainer attribute)
prompt (trinity.buffer.schema.sql_schema.ExperienceModel attribute)
prompt_key (trinity.common.config.FormatConfig attribute)
prompt_length (trinity.common.experience.Experience attribute)
(trinity.common.experience.Experiences attribute)
prompt_text (trinity.common.experience.Experience attribute)
prompt_type (trinity.common.config.FormatConfig attribute)
PromptType (class in trinity.common.constants)
put() (trinity.buffer.queue.AsyncPriorityQueue method)
(trinity.buffer.queue.QueueBuffer method)
put_batch() (trinity.buffer.ray_wrapper.QueueWrapper method)
Q
qsize() (trinity.buffer.queue.AsyncPriorityQueue method)
(trinity.buffer.queue.QueueBuffer method)
QUEUE (trinity.common.constants.StorageType attribute)
QueueBuffer (class in trinity.buffer.queue)
QueueReader (class in trinity.buffer.reader.queue_reader)
QueueWrapper (class in trinity.buffer.ray_wrapper)
QueueWriter (class in trinity.buffer.writer.queue_writer)
R
RANDOM (trinity.common.constants.ReadStrategy attribute)
raw (trinity.common.config.StorageConfig attribute)
raw_task (trinity.common.workflows.Task attribute)
(trinity.common.workflows.workflow.Task attribute)
RawDataReader (class in trinity.buffer.reader.file_reader)
ray_namespace (trinity.common.config.Config attribute)
(trinity.common.config.StorageConfig attribute)
(trinity.common.config.SynchronizerConfig attribute)
read() (trinity.buffer.buffer_reader.BufferReader method)
(trinity.buffer.BufferReader method)
(trinity.buffer.ray_wrapper.DBWrapper method)
(trinity.buffer.ray_wrapper.FileWrapper method)
(trinity.buffer.reader.file_reader.DPODataReader method)
(trinity.buffer.reader.file_reader.RawDataReader method)
(trinity.buffer.reader.file_reader.RolloutDataReader method)
(trinity.buffer.reader.file_reader.SFTDataReader method)
(trinity.buffer.reader.queue_reader.QueueReader method)
(trinity.buffer.reader.sql_reader.SQLReader method)
read_async() (trinity.buffer.buffer_reader.BufferReader method)
(trinity.buffer.BufferReader method)
(trinity.buffer.reader.file_reader.BaseFileReader method)
(trinity.buffer.reader.queue_reader.QueueReader method)
(trinity.buffer.reader.sql_reader.SQLReader method)
read_experience_strategy (trinity.common.config.TrainerInput attribute)
ReadStrategy (class in trinity.common.constants)
ready_to_nccl_sync() (trinity.manager.Synchronizer method)
(trinity.manager.synchronizer.Synchronizer method)
reasoning_parser (trinity.common.config.InferenceModelConfig attribute)
Ref (class in trinity.common.verl_config)
ref (trinity.common.verl_config.ActorRolloutRef attribute)
register_check() (trinity.manager.config_registry.config_registry.ConfigRegistry method)
register_config() (trinity.manager.config_registry.config_registry.ConfigRegistry method)
register_module() (trinity.utils.registry.Registry method)
Registry (class in trinity.utils.registry)
registry (trinity.buffer.schema.Base attribute)
REINFORCEPLUSPLUSAdvantageFn (class in trinity.algorithm.advantage_fn)
(class in trinity.algorithm.advantage_fn.reinforce_plus_plus_advantage)
rejected (trinity.buffer.schema.sql_schema.DPODataModel attribute)
(trinity.common.experience.Experience attribute)
rejected_key (trinity.common.config.FormatConfig attribute)
rejected_text (trinity.common.experience.Experience attribute)
release() (trinity.buffer.buffer_writer.BufferWriter method)
(trinity.buffer.BufferWriter method)
(trinity.buffer.ray_wrapper.DBWrapper method)
(trinity.buffer.ray_wrapper.FileWrapper method)
(trinity.buffer.ray_wrapper.QueueWrapper method)
(trinity.buffer.writer.file_writer.JSONWriter method)
(trinity.buffer.writer.queue_writer.QueueWriter method)
(trinity.buffer.writer.sql_writer.SQLWriter method)
REMAXAdvantageFn (class in trinity.algorithm.advantage_fn)
(class in trinity.algorithm.advantage_fn.remax_advantage)
remove_boxed() (in module trinity.utils.eval_utils)
remove_previous_ckpt_in_save (trinity.common.verl_config.Trainer attribute)
remove_right_units() (in module trinity.utils.eval_utils)
repeat_times (trinity.common.config.AlgorithmConfig attribute)
(trinity.common.config.StorageConfig attribute)
(trinity.common.workflows.Task attribute)
(trinity.common.workflows.workflow.Task attribute)
(trinity.explorer.scheduler.TaskWrapper attribute)
repeatable (trinity.common.workflows.AgentScopeReactV2MathWorkflow property)
(trinity.common.workflows.eval_workflow.MathEvalWorkflow property)
(trinity.common.workflows.MathEvalWorkflow property)
(trinity.common.workflows.step_wise_workflow.RewardPropagationWorkflow property)
(trinity.common.workflows.step_wise_workflow.StepWiseRewardWorkflow property)
(trinity.common.workflows.Workflow property)
(trinity.common.workflows.workflow.Workflow property)
replay_buffer_kwargs (trinity.common.config.StorageConfig attribute)
reply_prefix (trinity.common.config.ExplorerInput attribute)
(trinity.common.config.FormatConfig attribute)
representative_sample() (in module trinity.algorithm.sample_strategy.utils)
REQUIRE_SYNC (trinity.common.constants.RunningStatus attribute)
reset() (trinity.common.workflows.AgentScopeReactV2MathWorkflow method)
(trinity.common.workflows.customized_math_workflows.MathBoxedWorkflow method)
(trinity.common.workflows.customized_toolcall_workflows.ToolCallWorkflow method)
(trinity.common.workflows.MathBoxedWorkflow method)
(trinity.common.workflows.MathWorkflow method)
(trinity.common.workflows.SimpleWorkflow method)
(trinity.common.workflows.ToolCallWorkflow method)
(trinity.common.workflows.WebShopWorkflow method)
(trinity.common.workflows.Workflow method)
(trinity.common.workflows.workflow.MathWorkflow method)
(trinity.common.workflows.workflow.SimpleWorkflow method)
(trinity.common.workflows.workflow.Workflow method)
reset_prefix_cache() (trinity.common.models.vllm_model.vLLMRolloutModel method)
reset_session_state() (trinity.manager.config_manager.ConfigManager method)
resettable (trinity.common.workflows.AgentScopeReactV2MathWorkflow property)
(trinity.common.workflows.eval_workflow.MathEvalWorkflow property)
(trinity.common.workflows.MathEvalWorkflow property)
(trinity.common.workflows.SimpleWorkflow property)
(trinity.common.workflows.WebShopWorkflow property)
(trinity.common.workflows.Workflow property)
(trinity.common.workflows.workflow.SimpleWorkflow property)
(trinity.common.workflows.workflow.Workflow property)
response (trinity.buffer.schema.sql_schema.ExperienceModel attribute)
response_key (trinity.common.config.FormatConfig attribute)
response_text (trinity.common.experience.Experience attribute)
restart_runner() (trinity.explorer.scheduler.RunnerWrapper method)
resume_from_path (trinity.common.verl_config.Trainer attribute)
resume_mode (trinity.common.verl_config.Trainer attribute)
retry_session() (in module trinity.buffer.utils)
returns (trinity.common.experience.Experience attribute)
(trinity.common.experience.Experiences attribute)
reuse_cooldown_time (trinity.buffer.queue.AsyncPriorityQueue attribute)
(trinity.common.config.StorageConfig attribute)
reward (trinity.buffer.schema.sql_schema.ExperienceModel attribute)
(trinity.common.experience.Experience attribute)
reward() (trinity.common.workflows.step_wise_workflow.RewardPropagationWorkflow method)
(trinity.common.workflows.step_wise_workflow.StepWiseRewardWorkflow method)
(trinity.common.workflows.StepWiseAlfworldWorkflow method)
reward_fn (trinity.common.workflows.Task attribute)
(trinity.common.workflows.workflow.Task attribute)
reward_fn_args (trinity.common.config.StorageConfig attribute)
(trinity.common.workflows.Task attribute)
(trinity.common.workflows.workflow.Task attribute)
reward_fn_key (trinity.common.config.FormatConfig attribute)
reward_key (trinity.common.config.FormatConfig attribute)
reward_manager (trinity.common.verl_config.RewardModel attribute)
reward_model (trinity.common.verl_config.veRLConfig attribute)
reward_shaping (trinity.common.config.DataPipelineConfig attribute)
reward_type (trinity.buffer.schema.sql_schema.TaskModel attribute)
RewardFn (class in trinity.common.rewards)
(class in trinity.common.rewards.reward_fn)
RewardModel (class in trinity.common.verl_config)
RewardPropagationWorkflow (class in trinity.common.workflows.step_wise_workflow)
rewards (trinity.common.experience.Experiences attribute)
RewardShapingConfig (class in trinity.common.config)
RewardVarianceAddStrategy (class in trinity.algorithm.add_strategy)
(class in trinity.algorithm.add_strategy.add_strategy)
rid (trinity.common.experience.EID property)
RLOOAdvantageFn (class in trinity.algorithm.advantage_fn)
(class in trinity.algorithm.advantage_fn.rloo_advantage)
RMGalleryFn (class in trinity.common.rewards)
(class in trinity.common.rewards.reward_fn)
Rollout (class in trinity.common.verl_config)
rollout (trinity.common.verl_config.ActorRolloutRef attribute)
rollout_args (trinity.common.config.StorageConfig attribute)
(trinity.common.workflows.Task attribute)
(trinity.common.workflows.Workflow property)
(trinity.common.workflows.workflow.Task attribute)
(trinity.common.workflows.workflow.Workflow property)
rollout_model (trinity.common.config.ExplorerConfig attribute)
rollout_n (trinity.common.verl_config.Critic attribute)
RolloutDataReader (class in trinity.buffer.reader.file_reader)
run (trinity.common.experience.EID attribute)
run() (trinity.common.workflows.AgentScopeReactV2MathWorkflow method)
(trinity.common.workflows.AlfworldWorkflow method)
(trinity.common.workflows.customized_math_workflows.MathBoxedWorkflow method)
(trinity.common.workflows.customized_toolcall_workflows.ToolCallWorkflow method)
(trinity.common.workflows.eval_workflow.MathEvalWorkflow method)
(trinity.common.workflows.math_rm_workflow.MathRMWorkflow method)
(trinity.common.workflows.MathBoxedWorkflow method)
(trinity.common.workflows.MathEvalWorkflow method)
(trinity.common.workflows.MathRMWorkflow method)
(trinity.common.workflows.SciWorldWorkflow method)
(trinity.common.workflows.SimpleWorkflow method)
(trinity.common.workflows.step_wise_workflow.RewardPropagationWorkflow method)
(trinity.common.workflows.step_wise_workflow.StepWiseRewardWorkflow method)
(trinity.common.workflows.StepWiseAlfworldWorkflow method)
(trinity.common.workflows.ToolCallWorkflow method)
(trinity.common.workflows.WebShopWorkflow method)
(trinity.common.workflows.Workflow method)
(trinity.common.workflows.workflow.MultiTurnWorkflow method)
(trinity.common.workflows.workflow.SimpleWorkflow method)
(trinity.common.workflows.workflow.Workflow method)
run_api_server() (trinity.common.models.vllm_model.vLLMRolloutModel method)
run_config() (trinity.manager.config_manager.ConfigManager method)
run_id_base (trinity.explorer.scheduler.TaskWrapper attribute)
run_task() (trinity.explorer.workflow_runner.WorkflowRunner method)
run_with_retry() (trinity.explorer.scheduler.RunnerWrapper method)
runner_num (trinity.common.config.ExplorerConfig attribute)
runner_per_model (trinity.common.config.ExplorerConfig attribute)
RunnerWrapper (class in trinity.explorer.scheduler)
RUNNING (trinity.common.constants.RunningStatus attribute)
running() (trinity.utils.dlc_utils.ClusterStatus method)
RunningStatus (class in trinity.common.constants)
S
sample() (trinity.algorithm.sample_strategy.DefaultSampleStrategy method)
(trinity.algorithm.sample_strategy.mix_sample_strategy.MixSampleStrategy method)
(trinity.algorithm.sample_strategy.MixSampleStrategy method)
(trinity.algorithm.sample_strategy.sample_strategy.DefaultSampleStrategy method)
(trinity.algorithm.sample_strategy.sample_strategy.SampleStrategy method)
(trinity.algorithm.sample_strategy.sample_strategy.WarmupSampleStrategy method)
(trinity.algorithm.sample_strategy.SampleStrategy method)
(trinity.algorithm.sample_strategy.WarmupSampleStrategy method)
(trinity.algorithm.SampleStrategy method)
sample_strategy (trinity.common.config.AlgorithmConfig attribute)
sample_strategy_args (trinity.common.config.AlgorithmConfig attribute)
SampleStrategy (class in trinity.algorithm)
(class in trinity.algorithm.sample_strategy)
(class in trinity.algorithm.sample_strategy.sample_strategy)
save() (trinity.common.config.Config method)
save_checkpoint() (trinity.explorer.Explorer method)
(trinity.explorer.explorer.Explorer method)
(trinity.trainer.TrainEngineWrapper method)
(trinity.trainer.trainer.TrainEngineWrapper method)
(trinity.trainer.verl.fsdp_checkpoint_manager.FSDPCheckpointManager method)
(trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
(trinity.trainer.verl.fsdp_workers.CriticWorker method)
(trinity.trainer.verl_trainer.VerlPPOTrainerWrapper method)
save_contents (trinity.common.verl_config.Checkpoint attribute)
save_explorer() (trinity.manager.CacheManager method)
(trinity.manager.manager.CacheManager method)
save_freq (trinity.common.verl_config.Trainer attribute)
save_interval (trinity.common.config.TrainerConfig attribute)
save_state_dict() (trinity.trainer.TrainEngineWrapper method)
(trinity.trainer.trainer.TrainEngineWrapper method)
(trinity.trainer.verl_trainer.VerlPPOTrainerWrapper method)
save_trainer() (trinity.manager.CacheManager method)
(trinity.manager.manager.CacheManager method)
schedule() (trinity.explorer.scheduler.Scheduler method)
Scheduler (class in trinity.explorer.scheduler)
schema (trinity.algorithm.algorithm.AlgorithmType attribute)
(trinity.algorithm.algorithm.DPOAlgorithm attribute)
(trinity.algorithm.algorithm.GRPOAlgorithm attribute)
(trinity.algorithm.algorithm.MIXAlgorithm attribute)
(trinity.algorithm.algorithm.OPMDAlgorithm attribute)
(trinity.algorithm.algorithm.PPOAlgorithm attribute)
(trinity.algorithm.algorithm.SFTAlgorithm attribute)
(trinity.algorithm.AlgorithmType attribute)
SciWorldWorkflow (class in trinity.common.workflows)
seed (trinity.common.config.InferenceModelConfig attribute)
select_keys (trinity.algorithm.policy_loss_fn.dpo_loss.DPOLossFn property)
(trinity.algorithm.policy_loss_fn.DPOLossFn property)
(trinity.algorithm.policy_loss_fn.gspo_policy_loss.GSPOLossFn property)
(trinity.algorithm.policy_loss_fn.GSPOLossFn property)
(trinity.algorithm.policy_loss_fn.mix_policy_loss.MIXPolicyLossFn property)
(trinity.algorithm.policy_loss_fn.MIXPolicyLossFn property)
(trinity.algorithm.policy_loss_fn.opmd_policy_loss.OPMDPolicyLossFn property)
(trinity.algorithm.policy_loss_fn.OPMDPolicyLossFn property)
(trinity.algorithm.policy_loss_fn.policy_loss_fn.PolicyLossFn property)
(trinity.algorithm.policy_loss_fn.PolicyLossFn property)
(trinity.algorithm.policy_loss_fn.ppo_policy_loss.PPOPolicyLossFn property)
(trinity.algorithm.policy_loss_fn.PPOPolicyLossFn property)
(trinity.algorithm.policy_loss_fn.sft_loss.SFTLossFn property)
(trinity.algorithm.policy_loss_fn.SFTLossFn property)
(trinity.algorithm.PolicyLossFn property)
serialize() (trinity.common.experience.Experience method)
serialized_exp (trinity.buffer.schema.sql_schema.DPODataModel attribute)
(trinity.buffer.schema.sql_schema.ExperienceModel attribute)
(trinity.buffer.schema.sql_schema.SFTDataModel attribute)
set_actor_checkpoint() (in module trinity.manager.config_registry.trainer_config_manager)
set_actor_entropy_checkpointing() (in module trinity.manager.config_registry.trainer_config_manager)
set_actor_entropy_from_logits_with_chunking() (in module trinity.manager.config_registry.trainer_config_manager)
set_actor_grad_clip() (in module trinity.manager.config_registry.trainer_config_manager)
set_actor_lr() (in module trinity.manager.config_registry.trainer_config_manager)
set_actor_lr_warmup_steps_ratio() (in module trinity.manager.config_registry.trainer_config_manager)
set_actor_ppo_micro_batch_size_per_gpu() (in module trinity.manager.config_registry.trainer_config_manager)
set_actor_ulysses_sequence_parallel_size() (in module trinity.manager.config_registry.trainer_config_manager)
set_actor_warmup_style() (in module trinity.manager.config_registry.trainer_config_manager)
set_adaptive_in_kl_penalty_fn() (in module trinity.manager.config_registry.algorithm_config_manager)
set_advantage_fn() (in module trinity.manager.config_registry.algorithm_config_manager)
set_algorithm() (trinity.trainer.verl.dp_actor.DataParallelPPOActor method)
(trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
set_algorithm_type() (in module trinity.manager.config_registry.algorithm_config_manager)
set_auxiliary_models() (in module trinity.manager.config_registry.explorer_config_manager)
set_bench_on_latest_checkpoint() (in module trinity.manager.config_registry.explorer_config_manager)
set_beta_in_policy_loss_fn() (in module trinity.manager.config_registry.algorithm_config_manager)
set_buffer_max_retry_times() (in module trinity.manager.config_registry.buffer_config_manager)
set_checkpoint_root_dir() (in module trinity.manager.config_registry.model_config_manager)
set_clip_range_in_policy_loss_fn() (in module trinity.manager.config_registry.algorithm_config_manager)
set_critic_checkpoint() (in module trinity.manager.config_registry.trainer_config_manager)
set_critic_cliprange_value() (in module trinity.manager.config_registry.trainer_config_manager)
set_critic_grad_clip() (in module trinity.manager.config_registry.trainer_config_manager)
set_critic_lr() (in module trinity.manager.config_registry.trainer_config_manager)
set_critic_lr_warmup_steps_ratio() (in module trinity.manager.config_registry.trainer_config_manager)
set_critic_model_path() (in module trinity.manager.config_registry.model_config_manager)
set_critic_ppo_micro_batch_size_per_gpu() (in module trinity.manager.config_registry.trainer_config_manager)
set_critic_ulysses_sequence_parallel_size() (in module trinity.manager.config_registry.trainer_config_manager)
set_critic_warmup() (in module trinity.manager.config_registry.trainer_config_manager)
set_critic_warmup_style() (in module trinity.manager.config_registry.trainer_config_manager)
set_default_eval_workflow_type() (in module trinity.manager.config_registry.buffer_config_manager)
set_default_hdfs_dir() (in module trinity.manager.config_registry.trainer_config_manager)
set_default_reward_fn_type() (in module trinity.manager.config_registry.buffer_config_manager)
set_default_workflow_type() (in module trinity.manager.config_registry.buffer_config_manager)
set_del_local_ckpt_after_load() (in module trinity.manager.config_registry.trainer_config_manager)
set_dpo_dataset_kwargs() (in module trinity.manager.config_registry.buffer_config_manager)
set_dtype() (in module trinity.manager.config_registry.explorer_config_manager)
set_enable_auto_tool_choice() (in module trinity.manager.config_registry.explorer_config_manager)
set_enable_chunked_prefill() (in module trinity.manager.config_registry.explorer_config_manager)
set_enable_openai_api() (in module trinity.manager.config_registry.explorer_config_manager)
set_enable_prefix_caching() (in module trinity.manager.config_registry.explorer_config_manager)
set_enable_preview() (in module trinity.manager.config_registry.trainer_config_manager)
set_enable_thinking() (in module trinity.manager.config_registry.explorer_config_manager)
set_enforce_eager() (in module trinity.manager.config_registry.explorer_config_manager)
set_engine_num() (in module trinity.manager.config_registry.explorer_config_manager)
set_engine_type() (in module trinity.manager.config_registry.explorer_config_manager)
set_entropy_coef_in_entropy_loss_fn() (in module trinity.manager.config_registry.algorithm_config_manager)
set_entropy_loss_fn() (in module trinity.manager.config_registry.algorithm_config_manager)
set_epsilon_in_advantage_fn() (in module trinity.manager.config_registry.algorithm_config_manager)
set_eval_interval() (in module trinity.manager.config_registry.explorer_config_manager)
set_eval_tasksets() (in module trinity.manager.config_registry.buffer_config_manager)
set_exp_name() (in module trinity.manager.config_registry.model_config_manager)
set_experience_buffer_path() (in module trinity.manager.config_registry.buffer_config_manager)
set_expert_data_ratio_in_sample_strategy() (in module trinity.manager.config_registry.algorithm_config_manager)
set_explore_batch_size() (in module trinity.manager.config_registry.buffer_config_manager)
set_explorer_max_retry_times() (in module trinity.manager.config_registry.explorer_config_manager)
set_explorer_status() (trinity.manager.Synchronizer method)
(trinity.manager.synchronizer.Synchronizer method)
set_forward_prefetch() (in module trinity.manager.config_registry.trainer_config_manager)
set_gamma_in_advantage_fn() (in module trinity.manager.config_registry.algorithm_config_manager)
set_gpu_memory_utilization() (in module trinity.manager.config_registry.explorer_config_manager)
set_gpu_per_node() (in module trinity.manager.config_registry.model_config_manager)
set_horizon() (in module trinity.manager.config_registry.trainer_config_manager)
set_impl_backend() (in module trinity.manager.config_registry.trainer_config_manager)
set_kl_coef_in_kl_loss_fn() (in module trinity.manager.config_registry.algorithm_config_manager)
set_kl_coef_in_kl_penalty_fn() (in module trinity.manager.config_registry.algorithm_config_manager)
set_kl_ctrl_coef() (in module trinity.manager.config_registry.trainer_config_manager)
set_kl_ctrl_type() (in module trinity.manager.config_registry.trainer_config_manager)
set_kl_loss_fn() (in module trinity.manager.config_registry.algorithm_config_manager)
set_kl_penalty() (in module trinity.manager.config_registry.trainer_config_manager)
set_kl_penalty_fn() (in module trinity.manager.config_registry.algorithm_config_manager)
set_label_smoothing_in_policy_loss_fn() (in module trinity.manager.config_registry.algorithm_config_manager)
set_lam_in_advantage_fn() (in module trinity.manager.config_registry.algorithm_config_manager)
set_max_actor_ckpt_to_keep() (in module trinity.manager.config_registry.trainer_config_manager)
set_max_critic_ckpt_to_keep() (in module trinity.manager.config_registry.trainer_config_manager)
set_max_model_len() (in module trinity.manager.config_registry.model_config_manager)
set_max_response_tokens() (in module trinity.manager.config_registry.model_config_manager)
set_max_retry_interval() (in module trinity.manager.config_registry.buffer_config_manager)
set_max_timeout() (in module trinity.manager.config_registry.explorer_config_manager)
set_model_path() (in module trinity.manager.config_registry.model_config_manager)
set_model_state_dict() (trinity.manager.Synchronizer method)
(trinity.manager.synchronizer.Synchronizer method)
set_model_state_dict_with_step_num() (trinity.manager.Synchronizer method)
(trinity.manager.synchronizer.Synchronizer method)
set_monitor_type() (in module trinity.manager.config_registry.model_config_manager)
set_mu_in_policy_loss_fn() (in module trinity.manager.config_registry.algorithm_config_manager)
set_node_num() (in module trinity.manager.config_registry.model_config_manager)
set_norm_adv_by_std_in_grpo() (in module trinity.manager.config_registry.trainer_config_manager)
set_opmd_baseline_in_advantage_fn() (in module trinity.manager.config_registry.algorithm_config_manager)
set_optimizer_offload() (in module trinity.manager.config_registry.trainer_config_manager)
set_param_offload() (in module trinity.manager.config_registry.trainer_config_manager)
set_policy_loss_fn() (in module trinity.manager.config_registry.algorithm_config_manager)
set_ppo_epochs() (in module trinity.manager.config_registry.trainer_config_manager)
set_priority_decay() (in module trinity.manager.config_registry.buffer_config_manager)
set_priority_fn() (in module trinity.manager.config_registry.buffer_config_manager)
set_project() (in module trinity.manager.config_registry.model_config_manager)
set_reasoning_parser() (in module trinity.manager.config_registry.explorer_config_manager)
set_ref_log_prob_micro_batch_size_per_gpu() (in module trinity.manager.config_registry.trainer_config_manager)
set_remove_previous_ckpt_in_save() (in module trinity.manager.config_registry.trainer_config_manager)
set_repeat_times() (in module trinity.manager.config_registry.algorithm_config_manager)
(trinity.common.workflows.SimpleWorkflow method)
(trinity.common.workflows.Workflow method)
(trinity.common.workflows.workflow.MultiTurnWorkflow method)
(trinity.common.workflows.workflow.SimpleWorkflow method)
(trinity.common.workflows.workflow.Workflow method)
set_reply_prefix() (in module trinity.manager.config_registry.buffer_config_manager)
set_resume_from_path() (in module trinity.manager.config_registry.trainer_config_manager)
set_resume_mode() (in module trinity.manager.config_registry.trainer_config_manager)
set_reuse_cooldown_time() (in module trinity.manager.config_registry.buffer_config_manager)
set_runner_per_model() (in module trinity.manager.config_registry.explorer_config_manager)
set_sample_strategy() (in module trinity.manager.config_registry.algorithm_config_manager)
set_save_interval() (in module trinity.manager.config_registry.trainer_config_manager)
set_seed() (in module trinity.manager.config_registry.explorer_config_manager)
set_sft_warmup_dataset_args() (in module trinity.manager.config_registry.buffer_config_manager)
set_sft_warmup_dataset_path() (in module trinity.manager.config_registry.buffer_config_manager)
set_sft_warmup_steps() (in module trinity.manager.config_registry.buffer_config_manager)
set_storage_type() (in module trinity.manager.config_registry.buffer_config_manager)
set_sync_interval() (in module trinity.manager.config_registry.explorer_config_manager)
set_sync_method() (in module trinity.manager.config_registry.explorer_config_manager)
set_sync_timeout() (in module trinity.manager.config_registry.explorer_config_manager)
set_system_prompt() (in module trinity.manager.config_registry.buffer_config_manager)
set_target_kl() (in module trinity.manager.config_registry.trainer_config_manager)
set_taskset_args() (in module trinity.manager.config_registry.buffer_config_manager)
set_taskset_path() (in module trinity.manager.config_registry.buffer_config_manager)
set_tau_in_advantage_fn() (in module trinity.manager.config_registry.algorithm_config_manager)
set_tau_in_policy_loss_fn() (in module trinity.manager.config_registry.algorithm_config_manager)
set_tensor_parallel_size() (in module trinity.manager.config_registry.explorer_config_manager)
set_tool_call_parser() (in module trinity.manager.config_registry.explorer_config_manager)
set_total_epochs() (in module trinity.manager.config_registry.buffer_config_manager)
set_total_gpu_num() (in module trinity.manager.config_registry.model_config_manager)
set_total_training_steps() (in module trinity.manager.config_registry.trainer_config_manager)
set_train_batch_size() (in module trinity.manager.config_registry.buffer_config_manager)
set_trainer_gpu_num() (in module trinity.manager.config_registry.model_config_manager)
set_trainer_status() (trinity.manager.Synchronizer method)
(trinity.manager.synchronizer.Synchronizer method)
set_trainer_type() (in module trinity.manager.config_registry.trainer_config_manager)
set_training_args() (in module trinity.manager.config_registry.trainer_config_manager)
set_training_strategy() (in module trinity.manager.config_registry.trainer_config_manager)
set_unfinished_fields() (trinity.manager.config_registry.config_registry.ConfigRegistry method)
set_use_kl_in_reward() (in module trinity.manager.config_registry.trainer_config_manager)
set_use_priority_queue() (in module trinity.manager.config_registry.buffer_config_manager)
set_use_token_level_loss_in_policy_loss_fn() (in module trinity.manager.config_registry.algorithm_config_manager)
set_use_v1() (in module trinity.manager.config_registry.explorer_config_manager)
setup_ray_cluster() (in module trinity.utils.dlc_utils)
setup_weight_sync_group() (trinity.explorer.Explorer method)
(trinity.explorer.explorer.Explorer method)
(trinity.manager.Synchronizer method)
(trinity.manager.synchronizer.Synchronizer method)
(trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
sft_to_rft() (trinity.trainer.verl_trainer.VerlPPOTrainerWrapper method)
sft_warmup_dataset (trinity.common.config.TrainerInput attribute)
sft_warmup_steps (trinity.common.config.TrainerInput attribute)
(trinity.common.verl_config.Trainer attribute)
SFTAlgorithm (class in trinity.algorithm.algorithm)
SFTDataModel (class in trinity.buffer.schema.sql_schema)
SFTDataReader (class in trinity.buffer.reader.file_reader)
SFTLossFn (class in trinity.algorithm.policy_loss_fn)
(class in trinity.algorithm.policy_loss_fn.sft_loss)
shuffle (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
shutdown() (trinity.common.models.vllm_model.vLLMRolloutModel method)
(trinity.explorer.Explorer method)
(trinity.explorer.explorer.Explorer method)
(trinity.trainer.Trainer method)
(trinity.trainer.trainer.Trainer method)
sid (trinity.common.experience.EID property)
simple_answer_parser() (in module trinity.utils.eval_utils)
SimpleWorkflow (class in trinity.common.workflows)
(class in trinity.common.workflows.workflow)
SINGLE_TURN (trinity.common.experience.ExperienceType attribute)
sleep() (trinity.common.models.vllm_model.vLLMRolloutModel method)
solution_key (trinity.common.config.FormatConfig attribute)
sort_batch_id() (in module trinity.explorer.scheduler)
source_field (trinity.common.experience.CustomField attribute)
split (trinity.common.config.StorageConfig attribute)
split_dpo_experience_to_single_turn() (in module trinity.common.experience)
SQL (trinity.common.constants.StorageType attribute)
SQLReader (class in trinity.buffer.reader.sql_reader)
SQLWriter (class in trinity.buffer.writer.sql_writer)
start() (trinity.explorer.scheduler.Scheduler method)
stats_key (trinity.common.config.RewardShapingConfig attribute)
Status (class in trinity.explorer.workflow_runner)
step (trinity.common.experience.EID attribute)
step() (trinity.common.workflows.step_wise_workflow.RewardPropagationWorkflow method)
(trinity.common.workflows.step_wise_workflow.StepWiseRewardWorkflow method)
(trinity.common.workflows.StepWiseAlfworldWorkflow method)
StepWiseAlfworldWorkflow (class in trinity.common.workflows)
StepWiseGRPOStrategy (class in trinity.algorithm.add_strategy)
(class in trinity.algorithm.add_strategy.step_wise_add_strategy)
StepWiseRewardWorkflow (class in trinity.common.workflows.step_wise_workflow)
stop() (trinity.explorer.scheduler.Scheduler method)
stop_ray_cluster() (in module trinity.utils.dlc_utils)
STOPPED (trinity.common.constants.RunningStatus attribute)
stopped() (trinity.buffer.queue.AsyncPriorityQueue method)
(trinity.buffer.queue.AsyncQueue method)
(trinity.buffer.queue.QueueBuffer method)
storage_type (trinity.common.config.StorageConfig attribute)
StorageConfig (class in trinity.common.config)
StorageType (class in trinity.common.constants)
strategy (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
(trinity.common.verl_config.RewardModel attribute)
strip_string() (in module trinity.utils.math_eval_utils)
SUB (trinity.common.constants.OpType attribute)
subset_name (trinity.common.config.StorageConfig attribute)
suffix (trinity.common.experience.EID attribute)
symbolic_equal() (in module trinity.utils.math_eval_utils)
sync_freq (trinity.common.verl_config.Trainer attribute)
sync_interval (trinity.common.config.SynchronizerConfig attribute)
sync_method (trinity.common.config.SynchronizerConfig attribute)
sync_model() (trinity.common.models.vllm_model.vLLMRolloutModel method)
sync_offset (trinity.common.config.SynchronizerConfig attribute)
sync_style (trinity.common.config.SynchronizerConfig attribute)
sync_timeout (trinity.common.config.SynchronizerConfig attribute)
sync_weight() (trinity.explorer.Explorer method)
(trinity.explorer.explorer.Explorer method)
(trinity.trainer.TrainEngineWrapper method)
(trinity.trainer.Trainer method)
(trinity.trainer.trainer.TrainEngineWrapper method)
(trinity.trainer.trainer.Trainer method)
(trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
(trinity.trainer.verl_trainer.VerlPPOTrainerWrapper method)
synchronize_config() (trinity.common.verl_config.veRLConfig method)
Synchronizer (class in trinity.manager)
(class in trinity.manager.synchronizer)
synchronizer (trinity.common.config.Config attribute)
(trinity.common.verl_config.ActorRolloutRef attribute)
(trinity.common.verl_config.veRLConfig attribute)
SynchronizerConfig (class in trinity.common.config)
SyncMethod (class in trinity.common.constants)
SyncMethodEnumMeta (class in trinity.common.constants)
SyncStyle (class in trinity.common.constants)
system_prompt (trinity.common.config.ExplorerInput attribute)
(trinity.common.config.FormatConfig attribute)
T
target_kl (trinity.common.verl_config.KL_Ctrl attribute)
Task (class in trinity.common.workflows)
(class in trinity.common.workflows.workflow)
TASK (trinity.common.constants.DataProcessorPipelineType attribute)
task (trinity.common.experience.EID attribute)
(trinity.explorer.scheduler.TaskWrapper attribute)
task_desc (trinity.buffer.schema.sql_schema.TaskModel attribute)
(trinity.common.workflows.Task property)
(trinity.common.workflows.workflow.Task property)
task_done_callback() (trinity.explorer.scheduler.Scheduler method)
task_id (trinity.common.workflows.Task attribute)
(trinity.common.workflows.workflow.Task attribute)
task_pipeline (trinity.common.config.DataProcessorConfig attribute)
task_type (trinity.common.config.StorageConfig attribute)
TaskModel (class in trinity.buffer.schema.sql_schema)
taskset (trinity.common.config.ExplorerInput attribute)
TaskType (class in trinity.common.constants)
TaskWrapper (class in trinity.explorer.scheduler)
temperature (trinity.common.config.GenerationConfig attribute)
(trinity.common.verl_config.Rollout attribute)
tensor_parallel_size (trinity.common.config.InferenceModelConfig attribute)
TENSORBOARD (trinity.common.constants.MonitorType attribute)
TensorboardMonitor (class in trinity.utils.monitor)
test_freq (trinity.common.verl_config.Trainer attribute)
tid (trinity.common.experience.EID property)
Timer (class in trinity.utils.timer)
to_data_proto() (in module trinity.trainer.verl.utils)
to_dict() (trinity.common.experience.EID method)
(trinity.common.experience.Experience method)
(trinity.common.workflows.Task method)
(trinity.common.workflows.workflow.Task method)
to_experience() (trinity.buffer.schema.sql_schema.DPODataModel method)
(trinity.buffer.schema.sql_schema.ExperienceModel method)
(trinity.buffer.schema.sql_schema.SFTDataModel method)
to_rm_gallery_messages() (in module trinity.common.rewards.utils)
to_trinity() (trinity.algorithm.key_mapper.KeyMapper method)
to_workflow() (trinity.common.workflows.Task method)
(trinity.common.workflows.workflow.Task method)
tokenize_and_mask_messages_default() (in module trinity.common.models.utils)
tokenize_and_mask_messages_hf() (in module trinity.common.models.utils)
tokenizer_path (trinity.common.config.BufferConfig attribute)
(trinity.common.verl_config.CriticModel attribute)
tokens (trinity.common.experience.Experience attribute)
(trinity.common.experience.Experiences attribute)
tool_call_parser (trinity.common.config.InferenceModelConfig attribute)
ToolCallWorkflow (class in trinity.common.workflows)
(class in trinity.common.workflows.customized_toolcall_workflows)
top_k (trinity.common.config.GenerationConfig attribute)
top_p (trinity.common.config.GenerationConfig attribute)
total_epochs (trinity.common.config.BufferConfig attribute)
(trinity.common.config.StorageConfig attribute)
(trinity.common.verl_config.Trainer attribute)
total_steps (trinity.common.config.BufferConfig attribute)
(trinity.common.config.StorageConfig attribute)
total_training_steps (trinity.common.verl_config.Optim attribute)
(trinity.common.verl_config.Trainer attribute)
train() (trinity.trainer.Trainer method)
(trinity.trainer.trainer.Trainer method)
train_batch_size (trinity.common.config.BufferConfig attribute)
(trinity.common.verl_config.Data attribute)
train_step() (trinity.trainer.TrainEngineWrapper method)
(trinity.trainer.Trainer method)
(trinity.trainer.trainer.TrainEngineWrapper method)
(trinity.trainer.trainer.Trainer method)
(trinity.trainer.verl_trainer.VerlPPOTrainerWrapper method)
train_step_num (trinity.trainer.TrainEngineWrapper property)
(trinity.trainer.Trainer property)
(trinity.trainer.trainer.TrainEngineWrapper property)
(trinity.trainer.trainer.Trainer property)
(trinity.trainer.verl_trainer.VerlPPOTrainerWrapper property)
TrainEngineWrapper (class in trinity.trainer)
(class in trinity.trainer.trainer)
Trainer (class in trinity.common.verl_config)
(class in trinity.trainer)
(class in trinity.trainer.trainer)
trainer (trinity.common.config.Config attribute)
(trinity.common.verl_config.veRLConfig attribute)
trainer_config (trinity.common.config.TrainerConfig attribute)
trainer_config_path (trinity.common.config.TrainerConfig attribute)
trainer_input (trinity.common.config.BufferConfig attribute)
trainer_status (trinity.manager.Synchronizer attribute)
(trinity.manager.synchronizer.Synchronizer attribute)
trainer_type (trinity.common.config.TrainerConfig attribute)
TrainerConfig (class in trinity.common.config)
TrainerInput (class in trinity.common.config)
training_rollout_mode (trinity.common.verl_config.Trainer attribute)
trinity
module
trinity.algorithm
module
trinity.algorithm.add_strategy
module
trinity.algorithm.add_strategy.add_strategy
module
trinity.algorithm.add_strategy.correct_bias_add_strategy
module
trinity.algorithm.add_strategy.duplicate_add_strategy
module
trinity.algorithm.add_strategy.step_wise_add_strategy
module
trinity.algorithm.advantage_fn
module
trinity.algorithm.advantage_fn.advantage_fn
module
trinity.algorithm.advantage_fn.grpo_advantage
module
trinity.algorithm.advantage_fn.opmd_advantage
module
trinity.algorithm.advantage_fn.ppo_advantage
module
trinity.algorithm.advantage_fn.reinforce_plus_plus_advantage
module
trinity.algorithm.advantage_fn.remax_advantage
module
trinity.algorithm.advantage_fn.rloo_advantage
module
trinity.algorithm.algorithm
module
trinity.algorithm.algorithm_manager
module
trinity.algorithm.entropy_loss_fn
module
trinity.algorithm.entropy_loss_fn.entropy_loss_fn
module
trinity.algorithm.key_mapper
module
trinity.algorithm.kl_fn
module
trinity.algorithm.kl_fn.kl_fn
module
trinity.algorithm.policy_loss_fn
module
trinity.algorithm.policy_loss_fn.dpo_loss
module
trinity.algorithm.policy_loss_fn.gspo_policy_loss
module
trinity.algorithm.policy_loss_fn.mix_policy_loss
module
trinity.algorithm.policy_loss_fn.opmd_policy_loss
module
trinity.algorithm.policy_loss_fn.policy_loss_fn
module
trinity.algorithm.policy_loss_fn.ppo_policy_loss
module
trinity.algorithm.policy_loss_fn.sft_loss
module
trinity.algorithm.sample_strategy
module
trinity.algorithm.sample_strategy.mix_sample_strategy
module
trinity.algorithm.sample_strategy.sample_strategy
module
trinity.algorithm.sample_strategy.utils
module
trinity.algorithm.utils
module
trinity.buffer
module
trinity.buffer.buffer
module
trinity.buffer.buffer_reader
module
trinity.buffer.buffer_writer
module
trinity.buffer.queue
module
trinity.buffer.ray_wrapper
module
trinity.buffer.reader
module
trinity.buffer.reader.file_reader
module
trinity.buffer.reader.queue_reader
module
trinity.buffer.reader.sql_reader
module
trinity.buffer.schema
module
trinity.buffer.schema.sql_schema
module
trinity.buffer.utils
module
trinity.buffer.writer
module
trinity.buffer.writer.file_writer
module
trinity.buffer.writer.queue_writer
module
trinity.buffer.writer.sql_writer
module
trinity.common
module
trinity.common.config
module
trinity.common.constants
module
trinity.common.experience
module
trinity.common.models
module
trinity.common.models.model
module
trinity.common.models.utils
module
trinity.common.models.vllm_model
module
trinity.common.models.vllm_worker
module
trinity.common.rewards
module
trinity.common.rewards.accuracy_reward
module
trinity.common.rewards.agents_reward
module
trinity.common.rewards.countdown_reward
module
trinity.common.rewards.dapo_reward
module
trinity.common.rewards.format_reward
module
trinity.common.rewards.human_reward
module
trinity.common.rewards.math_reward
module
trinity.common.rewards.reward_fn
module
trinity.common.rewards.tool_reward
module
trinity.common.rewards.utils
module
trinity.common.verl_config
module
trinity.common.workflows
module
trinity.common.workflows.customized_math_workflows
module
trinity.common.workflows.customized_toolcall_workflows
module
trinity.common.workflows.eval_workflow
module
trinity.common.workflows.math_rm_workflow
module
trinity.common.workflows.step_wise_workflow
module
trinity.common.workflows.workflow
module
trinity.explorer
module
trinity.explorer.explorer
module
trinity.explorer.scheduler
module
trinity.explorer.workflow_runner
module
trinity.manager
module
trinity.manager.config_manager
module
trinity.manager.config_registry
module
trinity.manager.config_registry.algorithm_config_manager
module
trinity.manager.config_registry.buffer_config_manager
module
trinity.manager.config_registry.config_registry
module
trinity.manager.config_registry.explorer_config_manager
module
trinity.manager.config_registry.model_config_manager
module
trinity.manager.config_registry.trainer_config_manager
module
trinity.manager.manager
module
trinity.manager.synchronizer
module
trinity.plugins
module
trinity.trainer
module
trinity.trainer.trainer
module
trinity.trainer.verl
module
trinity.trainer.verl.dp_actor
module
trinity.trainer.verl.fsdp_checkpoint_manager
module
trinity.trainer.verl.fsdp_workers
module
trinity.trainer.verl.utils
module
trinity.trainer.verl_trainer
module
trinity.utils
module
trinity.utils.distributed
module
trinity.utils.dlc_utils
module
trinity.utils.eval_utils
module
trinity.utils.log
module
trinity.utils.math_eval_utils
module
trinity.utils.monitor
module
trinity.utils.plugin_loader
module
trinity.utils.registry
module
trinity.utils.timer
module
truth (trinity.common.workflows.Task property)
(trinity.common.workflows.workflow.Task property)
type (trinity.common.verl_config.KL_Ctrl attribute)
U
uid (trinity.common.experience.EID property)
ulysses_sequence_parallel_size (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
(trinity.common.verl_config.Ref attribute)
(trinity.common.verl_config.RewardModel attribute)
update() (trinity.buffer.reader.file_reader.DummyProgressBar method)
update_actor() (trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
update_critic() (trinity.trainer.verl.fsdp_workers.CriticWorker method)
update_kl_coef() (trinity.algorithm.kl_fn.kl_fn.KLFn method)
(trinity.algorithm.kl_fn.KLFn method)
(trinity.algorithm.KLFn method)
update_policy() (trinity.trainer.verl.dp_actor.DataParallelPPOActor method)
update_weight() (trinity.common.models.vllm_worker.WorkerExtension method)
upload_state_dict() (trinity.trainer.TrainEngineWrapper method)
(trinity.trainer.trainer.TrainEngineWrapper method)
(trinity.trainer.verl.fsdp_checkpoint_manager.FSDPCheckpointManager method)
(trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
(trinity.trainer.verl_trainer.VerlPPOTrainerWrapper method)
use_critic (trinity.algorithm.algorithm.AlgorithmType attribute)
(trinity.algorithm.algorithm.DPOAlgorithm attribute)
(trinity.algorithm.algorithm.GRPOAlgorithm attribute)
(trinity.algorithm.algorithm.MIXAlgorithm attribute)
(trinity.algorithm.algorithm.OPMDAlgorithm attribute)
(trinity.algorithm.algorithm.PPOAlgorithm attribute)
(trinity.algorithm.algorithm.SFTAlgorithm attribute)
(trinity.algorithm.AlgorithmType attribute)
use_critic() (in module trinity.manager.config_registry.trainer_config_manager)
use_dynamic_bsz (trinity.common.verl_config.Actor attribute)
(trinity.common.verl_config.Critic attribute)
(trinity.common.verl_config.RewardModel attribute)
use_fsdp() (in module trinity.manager.config_registry.trainer_config_manager)
use_fused_kernels (trinity.common.verl_config.ActorModel attribute)
use_kl_in_reward (trinity.common.verl_config.Algorithm attribute)
use_kl_loss (trinity.common.verl_config.Actor attribute)
use_priority_queue (trinity.common.config.StorageConfig attribute)
use_reference (trinity.algorithm.algorithm.AlgorithmType attribute)
(trinity.algorithm.algorithm.DPOAlgorithm attribute)
(trinity.algorithm.algorithm.GRPOAlgorithm attribute)
(trinity.algorithm.algorithm.MIXAlgorithm attribute)
(trinity.algorithm.algorithm.OPMDAlgorithm attribute)
(trinity.algorithm.algorithm.PPOAlgorithm attribute)
(trinity.algorithm.algorithm.SFTAlgorithm attribute)
(trinity.algorithm.AlgorithmType attribute)
use_remove_padding (trinity.common.verl_config.ActorModel attribute)
(trinity.common.verl_config.CriticModel attribute)
use_rollout (trinity.algorithm.algorithm.MIXAlgorithm attribute)
use_token_level_loss (trinity.common.config.AlgorithmConfig attribute)
use_v1 (trinity.common.config.InferenceModelConfig attribute)
V
val_before_train (trinity.common.verl_config.Trainer attribute)
val_generations_to_log_to_wandb (trinity.common.verl_config.Trainer attribute)
val_kwargs (trinity.common.verl_config.Rollout attribute)
validate_equation() (in module trinity.utils.eval_utils)
validate_format() (in module trinity.common.workflows.customized_toolcall_workflows)
validate_result() (in module trinity.common.workflows.customized_toolcall_workflows)
validate_think_pattern() (in module trinity.utils.eval_utils)
verify_math_answer() (in module trinity.utils.math_eval_utils)
veRLConfig (class in trinity.common.verl_config)
VerlPPOTrainerWrapper (class in trinity.trainer.verl_trainer)
vLLMRolloutModel (class in trinity.common.models.vllm_model)
W
wait_all() (trinity.explorer.scheduler.Scheduler method)
wait_for_checkpoint (trinity.common.config.SynchronizerConfig attribute)
wait_for_ray_setup() (in module trinity.utils.dlc_utils)
wait_for_ray_worker_nodes() (in module trinity.utils.dlc_utils)
wait_new_model_state_dict() (trinity.manager.Synchronizer method)
(trinity.manager.synchronizer.Synchronizer method)
wait_on_save_thread() (trinity.trainer.verl.fsdp_checkpoint_manager.FSDPCheckpointManager method)
(trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker method)
(trinity.trainer.verl.fsdp_workers.CriticWorker method)
WAITING_SYNC (trinity.common.constants.RunningStatus attribute)
wake_up() (trinity.common.models.vllm_model.vLLMRolloutModel method)
WANDB (trinity.common.constants.MonitorType attribute)
WandbMonitor (class in trinity.utils.monitor)
warmup_style (trinity.common.verl_config.Optim attribute)
WarmupSampleStrategy (class in trinity.algorithm.sample_strategy)
(class in trinity.algorithm.sample_strategy.sample_strategy)
WebShopWorkflow (class in trinity.common.workflows)
weight (trinity.common.config.RewardShapingConfig attribute)
WorkerExtension (class in trinity.common.models.vllm_worker)
Workflow (class in trinity.common.workflows)
(class in trinity.common.workflows.workflow)
workflow (trinity.common.workflows.Task attribute)
(trinity.common.workflows.workflow.Task attribute)
workflow_args (trinity.common.config.StorageConfig attribute)
(trinity.common.workflows.Task attribute)
(trinity.common.workflows.workflow.Task attribute)
workflow_key (trinity.common.config.FormatConfig attribute)
workflow_type (trinity.buffer.schema.sql_schema.TaskModel attribute)
WorkflowRunner (class in trinity.explorer.workflow_runner)
wrap_in_ray (trinity.common.config.StorageConfig attribute)
wrap_policy (trinity.common.verl_config.FSDPConfig attribute)
WrapPolicy (class in trinity.common.verl_config)
write() (trinity.buffer.buffer_writer.BufferWriter method)
(trinity.buffer.BufferWriter method)
(trinity.buffer.ray_wrapper.DBWrapper method)
(trinity.buffer.ray_wrapper.FileWrapper method)
(trinity.buffer.writer.file_writer.JSONWriter method)
(trinity.buffer.writer.queue_writer.QueueWriter method)
(trinity.buffer.writer.sql_writer.SQLWriter method)
write_async() (trinity.buffer.buffer_writer.BufferWriter method)
(trinity.buffer.BufferWriter method)
(trinity.buffer.writer.file_writer.JSONWriter method)
(trinity.buffer.writer.queue_writer.QueueWriter method)
(trinity.buffer.writer.sql_writer.SQLWriter method)