trinity.common package
Subpackages
- trinity.common.models package
- trinity.common.rewards package
- Submodules
- trinity.common.rewards.accuracy_reward module
- trinity.common.rewards.agents_reward module
- trinity.common.rewards.base module
- trinity.common.rewards.composite_reward module
- trinity.common.rewards.format_reward module
- trinity.common.rewards.human_reward module
- trinity.common.rewards.reward_fn module
- trinity.common.rewards.tool_reward module
- Module contents
- Submodules
- trinity.common.workflows package
Submodules
- trinity.common.config module
FormatConfig
FormatConfig.prompt_type
FormatConfig.prompt_key
FormatConfig.response_key
FormatConfig.messages_key
FormatConfig.tools_key
FormatConfig.chat_template
FormatConfig.system_prompt
FormatConfig.reply_prefix
FormatConfig.reward_fn_key
FormatConfig.workflow_key
FormatConfig.solution_key
FormatConfig.reward_key
FormatConfig.chosen_key
FormatConfig.rejected_key
FormatConfig.label_key
FormatConfig.__init__()
GenerationConfig
StorageConfig
StorageConfig.name
StorageConfig.storage_type
StorageConfig.path
StorageConfig.repeat_times
StorageConfig.raw
StorageConfig.split
StorageConfig.subset_name
StorageConfig.format
StorageConfig.index
StorageConfig.wrap_in_ray
StorageConfig.capacity
StorageConfig.max_read_timeout
StorageConfig.use_priority_queue
StorageConfig.reuse_cooldown_time
StorageConfig.replay_buffer_kwargs
StorageConfig.default_workflow_type
StorageConfig.default_eval_workflow_type
StorageConfig.default_reward_fn_type
StorageConfig.rollout_args
StorageConfig.workflow_args
StorageConfig.reward_fn_args
StorageConfig.enable_progress_bar
StorageConfig.ray_namespace
StorageConfig.algorithm_type
StorageConfig.total_epochs
StorageConfig.total_steps
StorageConfig.task_type
StorageConfig.__init__()
OperatorConfig
ExperiencePipelineConfig
TaskPipelineConfig
DataProcessorConfig
ModelConfig
InferenceModelConfig
InferenceModelConfig.model_path
InferenceModelConfig.engine_type
InferenceModelConfig.engine_num
InferenceModelConfig.tensor_parallel_size
InferenceModelConfig.use_v1
InferenceModelConfig.enforce_eager
InferenceModelConfig.enable_prefix_caching
InferenceModelConfig.enable_chunked_prefill
InferenceModelConfig.gpu_memory_utilization
InferenceModelConfig.dtype
InferenceModelConfig.seed
InferenceModelConfig.max_model_len
InferenceModelConfig.max_prompt_tokens
InferenceModelConfig.max_response_tokens
InferenceModelConfig.min_response_tokens
InferenceModelConfig.ignore_eos
InferenceModelConfig.chat_template
InferenceModelConfig.enable_thinking
InferenceModelConfig.enable_history
InferenceModelConfig.enable_openai_api
InferenceModelConfig.enable_auto_tool_choice
InferenceModelConfig.tool_call_parser
InferenceModelConfig.reasoning_parser
InferenceModelConfig.bundle_indices
InferenceModelConfig.__init__()
AlgorithmConfig
AlgorithmConfig.algorithm_type
AlgorithmConfig.repeat_times
AlgorithmConfig.sample_strategy
AlgorithmConfig.sample_strategy_args
AlgorithmConfig.advantage_fn
AlgorithmConfig.advantage_fn_args
AlgorithmConfig.kl_penalty_fn
AlgorithmConfig.kl_penalty_fn_args
AlgorithmConfig.policy_loss_fn
AlgorithmConfig.policy_loss_fn_args
AlgorithmConfig.kl_loss_fn
AlgorithmConfig.kl_loss_fn_args
AlgorithmConfig.entropy_loss_fn
AlgorithmConfig.entropy_loss_fn_args
AlgorithmConfig.use_token_level_loss
AlgorithmConfig.__init__()
ClusterConfig
ExplorerInput
TrainerInput
BufferConfig
BufferConfig.batch_size
BufferConfig.train_batch_size
BufferConfig.total_epochs
BufferConfig.total_steps
BufferConfig.explorer_input
BufferConfig.trainer_input
BufferConfig.max_retry_times
BufferConfig.max_retry_interval
BufferConfig.explorer_output
BufferConfig.tokenizer_path
BufferConfig.pad_token_id
BufferConfig.cache_dir
BufferConfig.__init__()
ExplorerConfig
ExplorerConfig.name
ExplorerConfig.runner_per_model
ExplorerConfig.max_timeout
ExplorerConfig.max_retry_times
ExplorerConfig.env_vars
ExplorerConfig.max_repeat_times_per_runner
ExplorerConfig.runner_num
ExplorerConfig.rollout_model
ExplorerConfig.auxiliary_models
ExplorerConfig.eval_interval
ExplorerConfig.eval_on_startup
ExplorerConfig.bench_on_latest_checkpoint
ExplorerConfig.__init__()
TrainerConfig
MonitorConfig
SynchronizerConfig
DataJuicerServiceConfig
ServiceConfig
Config
Config.mode
Config.project
Config.group
Config.name
Config.checkpoint_root_dir
Config.checkpoint_job_dir
Config.ray_namespace
Config.continue_from_checkpoint
Config.algorithm
Config.data_processor
Config.model
Config.cluster
Config.buffer
Config.explorer
Config.trainer
Config.monitor
Config.synchronizer
Config.service
Config.save()
Config.check_and_update()
Config.flatten()
Config.__init__()
load_config()
- trinity.common.constants module
- trinity.common.experience module
EID
CustomField
Experience
Experience.__init__()
Experience.eid
Experience.reward
Experience.advantages
Experience.returns
Experience.info
Experience.metrics
Experience.prompt_length
Experience.response_text
Experience.prompt_text
Experience.messages
Experience.tools
Experience.chosen_text
Experience.rejected_text
Experience.tokens
Experience.logprobs
Experience.action_mask
Experience.chosen
Experience.rejected
Experience.serialize()
Experience.deserialize()
Experience.to_dict()
Experience.gather()
split_dpo_experience_to_single_turn()
Experiences
Experiences.__init__()
Experiences.eids
Experiences.tokens
Experiences.rewards
Experiences.advantages
Experiences.returns
Experiences.attention_masks
Experiences.action_masks
Experiences.prompt_length
Experiences.logprobs
Experiences.custom_fields
Experiences.batch_size
Experiences.gather_experiences()
empty_experiences()
gather_token_ids()
gather_action_masks()
gather_attention_masks()
gather_logprobs()
gather_advantages()
gather_returns()
group_by()
to_hf_datasets()
from_hf_datasets()
- trinity.common.schema module
- trinity.common.verl_config module
Data
FusedKernelOptions
ActorModel
Optim
WrapPolicy
FSDPConfig
Checkpoint
Actor
Actor.strategy
Actor.ppo_mini_batch_size
Actor.ppo_micro_batch_size
Actor.ppo_micro_batch_size_per_gpu
Actor.use_dynamic_bsz
Actor.ppo_max_token_len_per_gpu
Actor.grad_clip
Actor.ppo_epochs
Actor.shuffle
Actor.ulysses_sequence_parallel_size
Actor.entropy_from_logits_with_chunking
Actor.entropy_checkpointing
Actor.checkpoint
Actor.optim
Actor.fsdp_config
Actor.loss_agg_mode
Actor.clip_ratio
Actor.entropy_coeff
Actor.use_kl_loss
Actor.kl_loss_coef
Actor.kl_loss_type
Actor.__init__()
Ref
Rollout
ActorRolloutRef
CriticModel
Critic
Critic.strategy
Critic.optim
Critic.model
Critic.ppo_mini_batch_size
Critic.ppo_micro_batch_size
Critic.ppo_micro_batch_size_per_gpu
Critic.forward_micro_batch_size
Critic.forward_micro_batch_size_per_gpu
Critic.use_dynamic_bsz
Critic.ppo_max_token_len_per_gpu
Critic.forward_max_token_len_per_gpu
Critic.ulysses_sequence_parallel_size
Critic.ppo_epochs
Critic.shuffle
Critic.grad_clip
Critic.cliprange_value
Critic.checkpoint
Critic.rollout_n
Critic.loss_agg_mode
Critic.__init__()
RewardModel
CustomRewardFunction
KL_Ctrl
Algorithm
Trainer
Trainer.balance_batch
Trainer.total_epochs
Trainer.total_training_steps
Trainer.project_name
Trainer.group_name
Trainer.experiment_name
Trainer.logger
Trainer.val_generations_to_log_to_wandb
Trainer.nnodes
Trainer.n_gpus_per_node
Trainer.save_freq
Trainer.resume_mode
Trainer.resume_from_path
Trainer.test_freq
Trainer.critic_warmup
Trainer.default_hdfs_dir
Trainer.remove_previous_ckpt_in_save
Trainer.del_local_ckpt_after_load
Trainer.default_local_dir
Trainer.val_before_train
Trainer.training_rollout_mode
Trainer.enable_exp_buffer
Trainer.sync_freq
Trainer.sft_warmup_steps
Trainer.max_actor_ckpt_to_keep
Trainer.max_critic_ckpt_to_keep
Trainer.device
Trainer.__init__()
veRLConfig
load_config()