trinity.common.verl_config module
- class trinity.common.verl_config.Data(train_batch_size: int = 1024)[source]
Bases:
object
- train_batch_size: int = 1024
- __init__(train_batch_size: int = 1024) None
- class trinity.common.verl_config.FusedKernelOptions(impl_backend: str | None = None)[source]
Bases:
object
- impl_backend: str | None = None
- __init__(impl_backend: str | None = None) None
- class trinity.common.verl_config.ActorModel(path: str = '', external_lib: Optional[str] = None, override_config: Dict[str, Any] = <factory>, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False, use_fused_kernels: bool = False, fused_kernel_options: trinity.common.verl_config.FusedKernelOptions = <factory>, custom_chat_template: Optional[str] = None)[source]
Bases:
object
- path: str = ''
- external_lib: str | None = None
- override_config: Dict[str, Any]
- enable_gradient_checkpointing: bool = True
- use_remove_padding: bool = False
- use_fused_kernels: bool = False
- fused_kernel_options: FusedKernelOptions
- custom_chat_template: str | None = None
- __init__(path: str = '', external_lib: str | None = None, override_config: ~typing.Dict[str, ~typing.Any] = <factory>, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False, use_fused_kernels: bool = False, fused_kernel_options: ~trinity.common.verl_config.FusedKernelOptions = <factory>, custom_chat_template: str | None = None) None
- class trinity.common.verl_config.Optim(lr: float = 1e-06, lr_warmup_steps: int = -1, lr_warmup_steps_ratio: float = 0.0, min_lr_ratio: Optional[float] = 0.0, warmup_style: str = 'constant', total_training_steps: int = -1, betas: List[float] = <factory>)[source]
Bases:
object
- lr: float = 1e-06
- lr_warmup_steps: int = -1
- lr_warmup_steps_ratio: float = 0.0
- min_lr_ratio: float | None = 0.0
- warmup_style: str = 'constant'
- total_training_steps: int = -1
- betas: List[float]
- __init__(lr: float = 1e-06, lr_warmup_steps: int = -1, lr_warmup_steps_ratio: float = 0.0, min_lr_ratio: float | None = 0.0, warmup_style: str = 'constant', total_training_steps: int = -1, betas: ~typing.List[float] = <factory>) None
- class trinity.common.verl_config.WrapPolicy(min_num_params: int = 0)[source]
Bases:
object
- min_num_params: int = 0
- __init__(min_num_params: int = 0) None
- class trinity.common.verl_config.FSDPConfig(param_offload: bool = False, optimizer_offload: bool = False, offload_policy: bool = False, reshard_after_forward: bool = True, wrap_policy: trinity.common.verl_config.WrapPolicy = <factory>, fsdp_size: int = -1, forward_prefetch: bool = False)[source]
Bases:
object
- param_offload: bool = False
- optimizer_offload: bool = False
- offload_policy: bool = False
- reshard_after_forward: bool = True
- wrap_policy: WrapPolicy
- fsdp_size: int = -1
- forward_prefetch: bool = False
- __init__(param_offload: bool = False, optimizer_offload: bool = False, offload_policy: bool = False, reshard_after_forward: bool = True, wrap_policy: ~trinity.common.verl_config.WrapPolicy = <factory>, fsdp_size: int = -1, forward_prefetch: bool = False) None
- class trinity.common.verl_config.Checkpoint(load_contents: List[str] = <factory>, save_contents: List[str] = <factory>)[source]
Bases:
object
- load_contents: List[str]
- save_contents: List[str]
- __init__(load_contents: ~typing.List[str] = <factory>, save_contents: ~typing.List[str] = <factory>) None
- class trinity.common.verl_config.Actor(strategy: str = 'fsdp', ppo_mini_batch_size: int = 256, ppo_micro_batch_size: Optional[int] = None, ppo_micro_batch_size_per_gpu: int = 1, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 16384, grad_clip: float = 1.0, ppo_epochs: int = 1, shuffle: bool = False, ulysses_sequence_parallel_size: int = 1, entropy_from_logits_with_chunking: bool = False, entropy_checkpointing: bool = False, checkpoint: trinity.common.verl_config.Checkpoint = <factory>, optim: trinity.common.verl_config.Optim = <factory>, fsdp_config: trinity.common.verl_config.FSDPConfig = <factory>, loss_agg_mode: str = 'token-mean', clip_ratio: float = 0.2, entropy_coeff: float = 0.001, use_kl_loss: bool = False, kl_loss_coef: float = 0.001, kl_loss_type: str = 'low_var_kl')[source]
Bases:
object
- strategy: str = 'fsdp'
- ppo_mini_batch_size: int = 256
- ppo_micro_batch_size: int | None = None
- ppo_micro_batch_size_per_gpu: int = 1
- use_dynamic_bsz: bool = False
- ppo_max_token_len_per_gpu: int = 16384
- grad_clip: float = 1.0
- ppo_epochs: int = 1
- shuffle: bool = False
- ulysses_sequence_parallel_size: int = 1
- entropy_from_logits_with_chunking: bool = False
- entropy_checkpointing: bool = False
- checkpoint: Checkpoint
- fsdp_config: FSDPConfig
- loss_agg_mode: str = 'token-mean'
- clip_ratio: float = 0.2
- entropy_coeff: float = 0.001
- use_kl_loss: bool = False
- kl_loss_coef: float = 0.001
- kl_loss_type: str = 'low_var_kl'
- __init__(strategy: str = 'fsdp', ppo_mini_batch_size: int = 256, ppo_micro_batch_size: int | None = None, ppo_micro_batch_size_per_gpu: int = 1, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 16384, grad_clip: float = 1.0, ppo_epochs: int = 1, shuffle: bool = False, ulysses_sequence_parallel_size: int = 1, entropy_from_logits_with_chunking: bool = False, entropy_checkpointing: bool = False, checkpoint: ~trinity.common.verl_config.Checkpoint = <factory>, optim: ~trinity.common.verl_config.Optim = <factory>, fsdp_config: ~trinity.common.verl_config.FSDPConfig = <factory>, loss_agg_mode: str = 'token-mean', clip_ratio: float = 0.2, entropy_coeff: float = 0.001, use_kl_loss: bool = False, kl_loss_coef: float = 0.001, kl_loss_type: str = 'low_var_kl') None
- class trinity.common.verl_config.Ref(fsdp_config: trinity.common.verl_config.FSDPConfig = <factory>, log_prob_micro_batch_size: Optional[int] = None, log_prob_micro_batch_size_per_gpu: int = 1, log_prob_use_dynamic_bsz: bool = False, log_prob_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1, entropy_from_logits_with_chunking: bool = False, entropy_checkpointing: bool = False, checkpoint: trinity.common.verl_config.Checkpoint = <factory>)[source]
Bases:
object
- fsdp_config: FSDPConfig
- log_prob_micro_batch_size: int | None = None
- log_prob_micro_batch_size_per_gpu: int = 1
- log_prob_use_dynamic_bsz: bool = False
- log_prob_max_token_len_per_gpu: int = 0
- ulysses_sequence_parallel_size: int = 1
- entropy_from_logits_with_chunking: bool = False
- entropy_checkpointing: bool = False
- checkpoint: Checkpoint
- __init__(fsdp_config: ~trinity.common.verl_config.FSDPConfig = <factory>, log_prob_micro_batch_size: int | None = None, log_prob_micro_batch_size_per_gpu: int = 1, log_prob_use_dynamic_bsz: bool = False, log_prob_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1, entropy_from_logits_with_chunking: bool = False, entropy_checkpointing: bool = False, checkpoint: ~trinity.common.verl_config.Checkpoint = <factory>) None
- class trinity.common.verl_config.Rollout(val_kwargs: trinity.common.verl_config._ValKwargs = <factory>, multi_turn: trinity.common.verl_config._MultiTurn = <factory>, temperature: float = 1.0, n: int = 1, log_prob_micro_batch_size: Optional[int] = None, log_prob_micro_batch_size_per_gpu: int = 1)[source]
Bases:
object
- val_kwargs: _ValKwargs
- multi_turn: _MultiTurn
- temperature: float = 1.0
- n: int = 1
- log_prob_micro_batch_size: int | None = None
- log_prob_micro_batch_size_per_gpu: int = 1
- __init__(val_kwargs: ~trinity.common.verl_config._ValKwargs = <factory>, multi_turn: ~trinity.common.verl_config._MultiTurn = <factory>, temperature: float = 1.0, n: int = 1, log_prob_micro_batch_size: int | None = None, log_prob_micro_batch_size_per_gpu: int = 1) None
- class trinity.common.verl_config.ActorRolloutRef(hybrid_engine: bool = True, model: trinity.common.verl_config.ActorModel = <factory>, actor: trinity.common.verl_config.Actor = <factory>, ref: trinity.common.verl_config.Ref = <factory>, rollout: trinity.common.verl_config.Rollout = <factory>, synchronizer: Optional[trinity.common.config.SynchronizerConfig] = None, explorer_name: str = 'explorer')[source]
Bases:
object
- hybrid_engine: bool = True
- model: ActorModel
- synchronizer: SynchronizerConfig | None = None
- explorer_name: str = 'explorer'
- __init__(hybrid_engine: bool = True, model: ~trinity.common.verl_config.ActorModel = <factory>, actor: ~trinity.common.verl_config.Actor = <factory>, ref: ~trinity.common.verl_config.Ref = <factory>, rollout: ~trinity.common.verl_config.Rollout = <factory>, synchronizer: ~trinity.common.config.SynchronizerConfig | None = None, explorer_name: str = 'explorer') None
- class trinity.common.verl_config.CriticModel(path: str = '', tokenizer_path: str = '', override_config: Dict[str, str] = <factory>, external_lib: Optional[str] = None, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False, fsdp_config: trinity.common.verl_config.FSDPConfig = <factory>)[source]
Bases:
object
- path: str = ''
- tokenizer_path: str = ''
- override_config: Dict[str, str]
- external_lib: str | None = None
- enable_gradient_checkpointing: bool = True
- use_remove_padding: bool = False
- fsdp_config: FSDPConfig
- __init__(path: str = '', tokenizer_path: str = '', override_config: ~typing.Dict[str, str] = <factory>, external_lib: str | None = None, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False, fsdp_config: ~trinity.common.verl_config.FSDPConfig = <factory>) None
- class trinity.common.verl_config.Critic(strategy: str = 'fsdp', optim: trinity.common.verl_config.Optim = <factory>, model: trinity.common.verl_config.CriticModel = <factory>, ppo_mini_batch_size: int = 0, ppo_micro_batch_size: Optional[int] = None, ppo_micro_batch_size_per_gpu: int = 1, forward_micro_batch_size: Optional[int] = None, forward_micro_batch_size_per_gpu: Optional[int] = None, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 0, forward_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1, ppo_epochs: int = 0, shuffle: bool = False, grad_clip: float = 0.0, cliprange_value: float = 0.0, checkpoint: trinity.common.verl_config.Checkpoint = <factory>, rollout_n: int = 1, loss_agg_mode: str = 'token-mean')[source]
Bases:
object
- strategy: str = 'fsdp'
- model: CriticModel
- ppo_mini_batch_size: int = 0
- ppo_micro_batch_size: int | None = None
- ppo_micro_batch_size_per_gpu: int = 1
- forward_micro_batch_size: int | None = None
- forward_micro_batch_size_per_gpu: int | None = None
- use_dynamic_bsz: bool = False
- ppo_max_token_len_per_gpu: int = 0
- forward_max_token_len_per_gpu: int = 0
- ulysses_sequence_parallel_size: int = 1
- ppo_epochs: int = 0
- shuffle: bool = False
- grad_clip: float = 0.0
- cliprange_value: float = 0.0
- checkpoint: Checkpoint
- rollout_n: int = 1
- loss_agg_mode: str = 'token-mean'
- __init__(strategy: str = 'fsdp', optim: ~trinity.common.verl_config.Optim = <factory>, model: ~trinity.common.verl_config.CriticModel = <factory>, ppo_mini_batch_size: int = 0, ppo_micro_batch_size: int | None = None, ppo_micro_batch_size_per_gpu: int = 1, forward_micro_batch_size: int | None = None, forward_micro_batch_size_per_gpu: int | None = None, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 0, forward_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1, ppo_epochs: int = 0, shuffle: bool = False, grad_clip: float = 0.0, cliprange_value: float = 0.0, checkpoint: ~trinity.common.verl_config.Checkpoint = <factory>, rollout_n: int = 1, loss_agg_mode: str = 'token-mean') None
- class trinity.common.verl_config.RewardModel(enable: bool = False, strategy: str = 'fsdp', model: trinity.common.verl_config._RewardModel = <factory>, micro_batch_size_per_gpu: int = 1, max_length: Optional[int] = None, ulysses_sequence_parallel_size: int = 1, use_dynamic_bsz: bool = False, forward_max_token_len_per_gpu: int = 0, reward_manager: str = 'naive')[source]
Bases:
object
- enable: bool = False
- strategy: str = 'fsdp'
- model: _RewardModel
- micro_batch_size_per_gpu: int = 1
- max_length: int | None = None
- ulysses_sequence_parallel_size: int = 1
- use_dynamic_bsz: bool = False
- forward_max_token_len_per_gpu: int = 0
- reward_manager: str = 'naive'
- __init__(enable: bool = False, strategy: str = 'fsdp', model: ~trinity.common.verl_config._RewardModel = <factory>, micro_batch_size_per_gpu: int = 1, max_length: int | None = None, ulysses_sequence_parallel_size: int = 1, use_dynamic_bsz: bool = False, forward_max_token_len_per_gpu: int = 0, reward_manager: str = 'naive') None
- class trinity.common.verl_config.CustomRewardFunction(path: str | None = None, name: str = 'compute_score')[source]
Bases:
object
- path: str | None = None
- name: str = 'compute_score'
- __init__(path: str | None = None, name: str = 'compute_score') None
- class trinity.common.verl_config.KL_Ctrl(type: str = 'fixed', kl_coef: float = 0.001, horizon: float = 10000, target_kl: float = 0.1)[source]
Bases:
object
- type: str = 'fixed'
- kl_coef: float = 0.001
- horizon: float = 10000
- target_kl: float = 0.1
- __init__(type: str = 'fixed', kl_coef: float = 0.001, horizon: float = 10000, target_kl: float = 0.1) None
- class trinity.common.verl_config.Algorithm(gamma: float = 1.0, lam: float = 1.0, adv_estimator: str = 'gae', norm_adv_by_std_in_grpo: bool = True, use_kl_in_reward: bool = False, kl_penalty: str = 'kl', kl_ctrl: trinity.common.verl_config.KL_Ctrl = <factory>)[source]
Bases:
object
- gamma: float = 1.0
- lam: float = 1.0
- adv_estimator: str = 'gae'
- norm_adv_by_std_in_grpo: bool = True
- use_kl_in_reward: bool = False
- kl_penalty: str = 'kl'
- __init__(gamma: float = 1.0, lam: float = 1.0, adv_estimator: str = 'gae', norm_adv_by_std_in_grpo: bool = True, use_kl_in_reward: bool = False, kl_penalty: str = 'kl', kl_ctrl: ~trinity.common.verl_config.KL_Ctrl = <factory>) None
- class trinity.common.verl_config.Trainer(balance_batch: bool = True, total_epochs: int = 30, total_training_steps: Optional[int] = None, project_name: str = '', group_name: str = '', experiment_name: str = '', logger: List[str] = <factory>, val_generations_to_log_to_wandb: int = 0, nnodes: int = 0, n_gpus_per_node: int = 0, save_freq: int = 0, resume_mode: str = 'auto', resume_from_path: str = '', test_freq: int = 0, critic_warmup: int = 0, default_hdfs_dir: Optional[str] = None, remove_previous_ckpt_in_save: bool = False, del_local_ckpt_after_load: bool = False, default_local_dir: str = '', val_before_train: bool = False, training_rollout_mode: str = 'parallel', enable_exp_buffer: bool = True, sync_freq: int = 0, sft_warmup_steps: int = 0, max_actor_ckpt_to_keep: Optional[int] = None, max_critic_ckpt_to_keep: Optional[int] = None, device: str = 'cuda')[source]
Bases:
object
- balance_batch: bool = True
- total_epochs: int = 30
- total_training_steps: int | None = None
- project_name: str = ''
- group_name: str = ''
- experiment_name: str = ''
- logger: List[str]
- val_generations_to_log_to_wandb: int = 0
- nnodes: int = 0
- n_gpus_per_node: int = 0
- save_freq: int = 0
- resume_mode: str = 'auto'
- resume_from_path: str = ''
- test_freq: int = 0
- critic_warmup: int = 0
- default_hdfs_dir: str | None = None
- remove_previous_ckpt_in_save: bool = False
- del_local_ckpt_after_load: bool = False
- default_local_dir: str = ''
- val_before_train: bool = False
- training_rollout_mode: str = 'parallel'
- enable_exp_buffer: bool = True
- sync_freq: int = 0
- sft_warmup_steps: int = 0
- max_actor_ckpt_to_keep: int | None = None
- max_critic_ckpt_to_keep: int | None = None
- device: str = 'cuda'
- __init__(balance_batch: bool = True, total_epochs: int = 30, total_training_steps: int | None = None, project_name: str = '', group_name: str = '', experiment_name: str = '', logger: ~typing.List[str] = <factory>, val_generations_to_log_to_wandb: int = 0, nnodes: int = 0, n_gpus_per_node: int = 0, save_freq: int = 0, resume_mode: str = 'auto', resume_from_path: str = '', test_freq: int = 0, critic_warmup: int = 0, default_hdfs_dir: str | None = None, remove_previous_ckpt_in_save: bool = False, del_local_ckpt_after_load: bool = False, default_local_dir: str = '', val_before_train: bool = False, training_rollout_mode: str = 'parallel', enable_exp_buffer: bool = True, sync_freq: int = 0, sft_warmup_steps: int = 0, max_actor_ckpt_to_keep: int | None = None, max_critic_ckpt_to_keep: int | None = None, device: str = 'cuda') None
- class trinity.common.verl_config.veRLConfig(data: trinity.common.verl_config.Data = <factory>, actor_rollout_ref: trinity.common.verl_config.ActorRolloutRef = <factory>, critic: trinity.common.verl_config.Critic = <factory>, reward_model: trinity.common.verl_config.RewardModel = <factory>, custom_reward_function: trinity.common.verl_config.CustomRewardFunction = <factory>, algorithm: trinity.common.verl_config.Algorithm = <factory>, trainer: trinity.common.verl_config.Trainer = <factory>, buffer: trinity.common.config.BufferConfig = <factory>, synchronizer: Optional[trinity.common.config.SynchronizerConfig] = None, enable_preview: bool = True)[source]
Bases:
object
- actor_rollout_ref: ActorRolloutRef
- reward_model: RewardModel
- custom_reward_function: CustomRewardFunction
- buffer: BufferConfig
- synchronizer: SynchronizerConfig | None = None
- enable_preview: bool = True
- __init__(data: ~trinity.common.verl_config.Data = <factory>, actor_rollout_ref: ~trinity.common.verl_config.ActorRolloutRef = <factory>, critic: ~trinity.common.verl_config.Critic = <factory>, reward_model: ~trinity.common.verl_config.RewardModel = <factory>, custom_reward_function: ~trinity.common.verl_config.CustomRewardFunction = <factory>, algorithm: ~trinity.common.verl_config.Algorithm = <factory>, trainer: ~trinity.common.verl_config.Trainer = <factory>, buffer: ~trinity.common.config.BufferConfig = <factory>, synchronizer: ~trinity.common.config.SynchronizerConfig | None = None, enable_preview: bool = True) None
- trinity.common.verl_config.load_config(config_path: str) veRLConfig [source]