trinity.common
Subpackages
- trinity.common.models
- Submodules
- trinity.common.models.model module
InferenceModel
ModelWrapper
ModelWrapper.__init__()
ModelWrapper.generate()
ModelWrapper.generate_async()
ModelWrapper.chat()
ModelWrapper.chat_async()
ModelWrapper.logprobs()
ModelWrapper.logprobs_async()
ModelWrapper.convert_messages_to_experience()
ModelWrapper.convert_messages_to_experience_async()
ModelWrapper.model_version
ModelWrapper.get_openai_client()
ModelWrapper.extract_experience_from_history()
convert_api_output_to_experience()
extract_logprobs()
- trinity.common.models.utils module
- trinity.common.models.vllm_model module
vLLMRolloutModel
vLLMRolloutModel.__init__()
vLLMRolloutModel.chat()
vLLMRolloutModel.generate()
vLLMRolloutModel.logprobs()
vLLMRolloutModel.convert_messages_to_experience()
vLLMRolloutModel.shutdown()
vLLMRolloutModel.sync_model()
vLLMRolloutModel.init_process_group()
vLLMRolloutModel.run_api_server()
vLLMRolloutModel.has_api_server()
vLLMRolloutModel.api_server_ready()
vLLMRolloutModel.reset_prefix_cache()
vLLMRolloutModel.get_model_version()
vLLMRolloutModel.sleep()
vLLMRolloutModel.wake_up()
- trinity.common.models.vllm_worker module
- Module contents
- trinity.common.rewards
- Submodules
- trinity.common.rewards.accuracy_reward module
- trinity.common.rewards.agents_reward module
- trinity.common.rewards.countdown_reward module
- trinity.common.rewards.dapo_reward module
- trinity.common.rewards.format_reward module
- trinity.common.rewards.human_reward module
- trinity.common.rewards.math_reward module
- trinity.common.rewards.reward_fn module
- trinity.common.rewards.tool_reward module
- trinity.common.rewards.utils module
- Module contents
- trinity.common.workflows
- Submodules
- trinity.common.workflows.customized_math_workflows module
- trinity.common.workflows.customized_toolcall_workflows module
- trinity.common.workflows.eval_workflow module
- trinity.common.workflows.math_rm_workflow module
- trinity.common.workflows.step_wise_workflow module
- trinity.common.workflows.workflow module
- Module contents
Submodules
trinity.common.config module
Configs for RFT.
- class trinity.common.config.FormatConfig(prompt_type: PromptType = PromptType.MESSAGES, prompt_key: str = 'prompt', response_key: str = 'response', messages_key: str = 'message', chat_template: str = '', system_prompt: str | None = None, reply_prefix: str | None = None, reward_fn_key: str = '', workflow_key: str = '', solution_key: str = 'solution', reward_key: str = 'reward', chosen_key: str = 'chosen', rejected_key: str = 'rejected', label_key: str = '')[source]
Bases:
object
Configuration for data formatting
- prompt_type: PromptType = 'messages'
- prompt_key: str = 'prompt'
- response_key: str = 'response'
- messages_key: str = 'message'
- chat_template: str = ''
- system_prompt: str | None = None
- reply_prefix: str | None = None
- reward_fn_key: str = ''
- workflow_key: str = ''
- solution_key: str = 'solution'
- reward_key: str = 'reward'
- chosen_key: str = 'chosen'
- rejected_key: str = 'rejected'
- label_key: str = ''
- __init__(prompt_type: PromptType = PromptType.MESSAGES, prompt_key: str = 'prompt', response_key: str = 'response', messages_key: str = 'message', chat_template: str = '', system_prompt: str | None = None, reply_prefix: str | None = None, reward_fn_key: str = '', workflow_key: str = '', solution_key: str = 'solution', reward_key: str = 'reward', chosen_key: str = 'chosen', rejected_key: str = 'rejected', label_key: str = '') None
- class trinity.common.config.GenerationConfig(temperature: float = 1.0, top_p: float = 1.0, top_k: int = -1, logprobs: int = 0, n: int = 1)[source]
Bases:
object
- temperature: float = 1.0
- top_p: float = 1.0
- top_k: int = -1
- logprobs: int = 0
- n: int = 1
- __init__(temperature: float = 1.0, top_p: float = 1.0, top_k: int = -1, logprobs: int = 0, n: int = 1) None
- class trinity.common.config.StorageConfig(name: str = '', storage_type: ~trinity.common.constants.StorageType = StorageType.FILE, path: str | None = None, repeat_times: int | None = None, raw: bool = False, split: str = 'train', subset_name: str | None = None, format: ~trinity.common.config.FormatConfig = <factory>, index: int = 0, wrap_in_ray: bool = True, capacity: int = 10000, max_read_timeout: float = 1800, use_priority_queue: bool = False, reuse_cooldown_time: float | None = None, replay_buffer_kwargs: dict = <factory>, default_workflow_type: str | None = None, default_eval_workflow_type: str | None = None, default_reward_fn_type: str | None = None, rollout_args: ~trinity.common.config.GenerationConfig = <factory>, workflow_args: dict = <factory>, reward_fn_args: dict = <factory>, enable_progress_bar: bool | None = False, ray_namespace: str | None = None, algorithm_type: str | None = None, total_epochs: int = 1, total_steps: int | None = None, task_type: ~trinity.common.constants.TaskType = TaskType.EXPLORE)[source]
Bases:
object
Storage config.
- name: str = ''
- storage_type: StorageType = 'file'
- path: str | None = None
- repeat_times: int | None = None
- raw: bool = False
- split: str = 'train'
- subset_name: str | None = None
- format: FormatConfig
- index: int = 0
- wrap_in_ray: bool = True
- capacity: int = 10000
- max_read_timeout: float = 1800
- use_priority_queue: bool = False
- reuse_cooldown_time: float | None = None
- replay_buffer_kwargs: dict
- default_workflow_type: str | None = None
- default_eval_workflow_type: str | None = None
- default_reward_fn_type: str | None = None
- rollout_args: GenerationConfig
- workflow_args: dict
- reward_fn_args: dict
- enable_progress_bar: bool | None = False
- ray_namespace: str | None = None
- algorithm_type: str | None = None
- total_epochs: int = 1
- total_steps: int | None = None
- __init__(name: str = '', storage_type: ~trinity.common.constants.StorageType = StorageType.FILE, path: str | None = None, repeat_times: int | None = None, raw: bool = False, split: str = 'train', subset_name: str | None = None, format: ~trinity.common.config.FormatConfig = <factory>, index: int = 0, wrap_in_ray: bool = True, capacity: int = 10000, max_read_timeout: float = 1800, use_priority_queue: bool = False, reuse_cooldown_time: float | None = None, replay_buffer_kwargs: dict = <factory>, default_workflow_type: str | None = None, default_eval_workflow_type: str | None = None, default_reward_fn_type: str | None = None, rollout_args: ~trinity.common.config.GenerationConfig = <factory>, workflow_args: dict = <factory>, reward_fn_args: dict = <factory>, enable_progress_bar: bool | None = False, ray_namespace: str | None = None, algorithm_type: str | None = None, total_epochs: int = 1, total_steps: int | None = None, task_type: ~trinity.common.constants.TaskType = TaskType.EXPLORE) None
- class trinity.common.config.RewardShapingConfig(stats_key: str = '', op_type: OpType = OpType.ADD, weight: float = 1.0)[source]
Bases:
object
Config for reward shaping.
- stats_key: str = ''
- weight: float = 1.0
- class trinity.common.config.DataPipelineConfig(input_buffers: ~typing.List[~trinity.common.config.StorageConfig] = <factory>, output_buffer: ~trinity.common.config.StorageConfig = <factory>, format: ~trinity.common.config.FormatConfig = <factory>, dj_config_path: str | None = None, dj_process_desc: str | None = None, agent_model_name: str | None = None, clean_strategy: str = 'iterative', min_size_ratio: float | None = None, min_priority_score: float | None = 0.0, priority_weights: ~typing.Dict[str, float] | None = None, data_dist: str | None = 'gaussian', reward_shaping: ~typing.List[~trinity.common.config.RewardShapingConfig] | None = <factory>)[source]
Bases:
object
Config for data pipeline.
- input_buffers: List[StorageConfig]
- output_buffer: StorageConfig
- format: FormatConfig
- dj_config_path: str | None = None
- dj_process_desc: str | None = None
- agent_model_name: str | None = None
- clean_strategy: str = 'iterative'
- min_size_ratio: float | None = None
- min_priority_score: float | None = 0.0
- priority_weights: Dict[str, float] | None = None
- data_dist: str | None = 'gaussian'
- reward_shaping: List[RewardShapingConfig] | None
- __init__(input_buffers: ~typing.List[~trinity.common.config.StorageConfig] = <factory>, output_buffer: ~trinity.common.config.StorageConfig = <factory>, format: ~trinity.common.config.FormatConfig = <factory>, dj_config_path: str | None = None, dj_process_desc: str | None = None, agent_model_name: str | None = None, clean_strategy: str = 'iterative', min_size_ratio: float | None = None, min_priority_score: float | None = 0.0, priority_weights: ~typing.Dict[str, float] | None = None, data_dist: str | None = 'gaussian', reward_shaping: ~typing.List[~trinity.common.config.RewardShapingConfig] | None = <factory>) None
- class trinity.common.config.DataProcessorConfig(data_processor_url: str | None = None, task_pipeline: DataPipelineConfig | None = None, experience_pipeline: DataPipelineConfig | None = None)[source]
Bases:
object
Data-Juicer config
- data_processor_url: str | None = None
- task_pipeline: DataPipelineConfig | None = None
- experience_pipeline: DataPipelineConfig | None = None
- __init__(data_processor_url: str | None = None, task_pipeline: DataPipelineConfig | None = None, experience_pipeline: DataPipelineConfig | None = None) None
- class trinity.common.config.ModelConfig(model_path: str = '', critic_model_path: str = '', max_model_len: int | None = None, max_prompt_tokens: int | None = None, max_response_tokens: int | None = None, custom_chat_template: str | None = None)[source]
Bases:
object
- model_path: str = ''
- critic_model_path: str = ''
- max_model_len: int | None = None
- max_prompt_tokens: int | None = None
- max_response_tokens: int | None = None
- custom_chat_template: str | None = None
- __init__(model_path: str = '', critic_model_path: str = '', max_model_len: int | None = None, max_prompt_tokens: int | None = None, max_response_tokens: int | None = None, custom_chat_template: str | None = None) None
- class trinity.common.config.InferenceModelConfig(model_path: str = '', engine_type: str = 'vllm_async', engine_num: int = 1, tensor_parallel_size: int = 1, use_v1: bool = True, enforce_eager: bool = True, enable_prefix_caching: bool = False, enable_chunked_prefill: bool = False, gpu_memory_utilization: float = 0.9, dtype: str = 'bfloat16', seed: int = 42, max_model_len: int | None = None, max_prompt_tokens: int | None = None, max_response_tokens: int | None = None, chat_template: str | None = None, enable_thinking: bool = False, enable_history: bool = False, enable_openai_api: bool = False, enable_auto_tool_choice: bool = False, tool_call_parser: str | None = None, reasoning_parser: str | None = None, bundle_indices: str = '')[source]
Bases:
object
- model_path: str = ''
- engine_type: str = 'vllm_async'
- engine_num: int = 1
- tensor_parallel_size: int = 1
- use_v1: bool = True
- enforce_eager: bool = True
- enable_prefix_caching: bool = False
- enable_chunked_prefill: bool = False
- gpu_memory_utilization: float = 0.9
- dtype: str = 'bfloat16'
- seed: int = 42
- max_model_len: int | None = None
- max_prompt_tokens: int | None = None
- max_response_tokens: int | None = None
- chat_template: str | None = None
- enable_thinking: bool = False
- enable_history: bool = False
- enable_openai_api: bool = False
- enable_auto_tool_choice: bool = False
- tool_call_parser: str | None = None
- reasoning_parser: str | None = None
- bundle_indices: str = ''
- __init__(model_path: str = '', engine_type: str = 'vllm_async', engine_num: int = 1, tensor_parallel_size: int = 1, use_v1: bool = True, enforce_eager: bool = True, enable_prefix_caching: bool = False, enable_chunked_prefill: bool = False, gpu_memory_utilization: float = 0.9, dtype: str = 'bfloat16', seed: int = 42, max_model_len: int | None = None, max_prompt_tokens: int | None = None, max_response_tokens: int | None = None, chat_template: str | None = None, enable_thinking: bool = False, enable_history: bool = False, enable_openai_api: bool = False, enable_auto_tool_choice: bool = False, tool_call_parser: str | None = None, reasoning_parser: str | None = None, bundle_indices: str = '') None
- class trinity.common.config.AlgorithmConfig(algorithm_type: str = 'ppo', repeat_times: int = 1, add_strategy: str | None = None, add_strategy_args: dict | None = None, sample_strategy: str | None = None, sample_strategy_args: dict | None = None, advantage_fn: str | None = None, advantage_fn_args: dict | None = None, kl_penalty_fn: str | None = None, kl_penalty_fn_args: dict | None = None, policy_loss_fn: str | None = None, policy_loss_fn_args: dict | None = None, kl_loss_fn: str | None = None, kl_loss_fn_args: dict | None = None, entropy_loss_fn: str | None = None, entropy_loss_fn_args: dict | None = None, use_token_level_loss: bool = True)[source]
Bases:
object
Config for algorithm.
- algorithm_type: str = 'ppo'
- repeat_times: int = 1
- add_strategy: str | None = None
- add_strategy_args: dict | None = None
- sample_strategy: str | None = None
- sample_strategy_args: dict | None = None
- advantage_fn: str | None = None
- advantage_fn_args: dict | None = None
- kl_penalty_fn: str | None = None
- kl_penalty_fn_args: dict | None = None
- policy_loss_fn: str | None = None
- policy_loss_fn_args: dict | None = None
- kl_loss_fn: str | None = None
- kl_loss_fn_args: dict | None = None
- entropy_loss_fn: str | None = None
- entropy_loss_fn_args: dict | None = None
- use_token_level_loss: bool = True
- __init__(algorithm_type: str = 'ppo', repeat_times: int = 1, add_strategy: str | None = None, add_strategy_args: dict | None = None, sample_strategy: str | None = None, sample_strategy_args: dict | None = None, advantage_fn: str | None = None, advantage_fn_args: dict | None = None, kl_penalty_fn: str | None = None, kl_penalty_fn_args: dict | None = None, policy_loss_fn: str | None = None, policy_loss_fn_args: dict | None = None, kl_loss_fn: str | None = None, kl_loss_fn_args: dict | None = None, entropy_loss_fn: str | None = None, entropy_loss_fn_args: dict | None = None, use_token_level_loss: bool = True) None
- class trinity.common.config.ClusterConfig(node_num: int = 1, gpu_per_node: int = 8)[source]
Bases:
object
Config for the cluster.
- node_num: int = 1
- gpu_per_node: int = 8
- __init__(node_num: int = 1, gpu_per_node: int = 8) None
- class trinity.common.config.ExplorerInput(taskset: ~trinity.common.config.StorageConfig = <factory>, eval_tasksets: ~typing.List[~trinity.common.config.StorageConfig] = <factory>, default_workflow_type: str | None = None, default_eval_workflow_type: str | None = None, default_reward_fn_type: str | None = None, system_prompt: str | None = None, reply_prefix: str | None = None)[source]
Bases:
object
Config for explorer input.
- taskset: StorageConfig
- eval_tasksets: List[StorageConfig]
- default_workflow_type: str | None = None
- default_eval_workflow_type: str | None = None
- default_reward_fn_type: str | None = None
- system_prompt: str | None = None
- reply_prefix: str | None = None
- __init__(taskset: ~trinity.common.config.StorageConfig = <factory>, eval_tasksets: ~typing.List[~trinity.common.config.StorageConfig] = <factory>, default_workflow_type: str | None = None, default_eval_workflow_type: str | None = None, default_reward_fn_type: str | None = None, system_prompt: str | None = None, reply_prefix: str | None = None) None
- class trinity.common.config.TrainerInput(experience_buffer: StorageConfig | None = None, sft_warmup_dataset: StorageConfig | None = None, read_experience_strategy: ReadStrategy | None = None, sft_warmup_steps: int = 0)[source]
Bases:
object
Config for trainer input.
- experience_buffer: StorageConfig | None = None
- sft_warmup_dataset: StorageConfig | None = None
- read_experience_strategy: ReadStrategy | None = None
- sft_warmup_steps: int = 0
- __init__(experience_buffer: StorageConfig | None = None, sft_warmup_dataset: StorageConfig | None = None, read_experience_strategy: ReadStrategy | None = None, sft_warmup_steps: int = 0) None
- class trinity.common.config.BufferConfig(batch_size: int = 1, train_batch_size: int = 0, total_epochs: int = 1, total_steps: int | None = None, explorer_input: ~trinity.common.config.ExplorerInput = <factory>, explorer_output: ~trinity.common.config.StorageConfig | None = None, trainer_input: ~trinity.common.config.TrainerInput = <factory>, max_retry_times: int = 3, max_retry_interval: int = 1, tokenizer_path: str | None = None, pad_token_id: int | None = None, cache_dir: str | None = None)[source]
Bases:
object
Config for buffer.
- batch_size: int = 1
- train_batch_size: int = 0
- total_epochs: int = 1
- total_steps: int | None = None
- explorer_input: ExplorerInput
- explorer_output: StorageConfig | None = None
- trainer_input: TrainerInput
- max_retry_times: int = 3
- max_retry_interval: int = 1
- tokenizer_path: str | None = None
- pad_token_id: int | None = None
- cache_dir: str | None = None
- __init__(batch_size: int = 1, train_batch_size: int = 0, total_epochs: int = 1, total_steps: int | None = None, explorer_input: ~trinity.common.config.ExplorerInput = <factory>, explorer_output: ~trinity.common.config.StorageConfig | None = None, trainer_input: ~trinity.common.config.TrainerInput = <factory>, max_retry_times: int = 3, max_retry_interval: int = 1, tokenizer_path: str | None = None, pad_token_id: int | None = None, cache_dir: str | None = None) None
- class trinity.common.config.ExplorerConfig(name: str = 'explorer', runner_per_model: int = 8, max_timeout: int = 1800, max_retry_times: int = 2, env_vars: dict = <factory>, max_repeat_times_per_runner: int | None = None, runner_num: int | None = None, rollout_model: ~trinity.common.config.InferenceModelConfig = <factory>, auxiliary_models: ~typing.List[~trinity.common.config.InferenceModelConfig] = <factory>, eval_interval: int = 100, eval_on_startup: bool = True, bench_on_latest_checkpoint: bool = False, collect_experiences: bool = False)[source]
Bases:
object
Config for explorer.
- name: str = 'explorer'
- runner_per_model: int = 8
- max_timeout: int = 1800
- max_retry_times: int = 2
- env_vars: dict
- max_repeat_times_per_runner: int | None = None
- runner_num: int | None = None
- rollout_model: InferenceModelConfig
- auxiliary_models: List[InferenceModelConfig]
- eval_interval: int = 100
- eval_on_startup: bool = True
- bench_on_latest_checkpoint: bool = False
- collect_experiences: bool = False
- __init__(name: str = 'explorer', runner_per_model: int = 8, max_timeout: int = 1800, max_retry_times: int = 2, env_vars: dict = <factory>, max_repeat_times_per_runner: int | None = None, runner_num: int | None = None, rollout_model: ~trinity.common.config.InferenceModelConfig = <factory>, auxiliary_models: ~typing.List[~trinity.common.config.InferenceModelConfig] = <factory>, eval_interval: int = 100, eval_on_startup: bool = True, bench_on_latest_checkpoint: bool = False, collect_experiences: bool = False) None
- class trinity.common.config.TrainerConfig(name: str = 'trainer', trainer_type: str = 'verl', save_interval: int = 0, enable_preview: bool = True, actor_grad_clip: Optional[float] = None, trainer_config: Any = <factory>, trainer_config_path: str = '')[source]
Bases:
object
- name: str = 'trainer'
- trainer_type: str = 'verl'
- save_interval: int = 0
- enable_preview: bool = True
- actor_grad_clip: float | None = None
- trainer_config: Any
- trainer_config_path: str = ''
- __init__(name: str = 'trainer', trainer_type: str = 'verl', save_interval: int = 0, enable_preview: bool = True, actor_grad_clip: float | None = None, trainer_config: ~typing.Any = <factory>, trainer_config_path: str = '') None
- class trinity.common.config.MonitorConfig(monitor_type: str = 'tensorboard', monitor_args: Dict | None = None, enable_ray_timeline: bool = False, cache_dir: str = '')[source]
Bases:
object
- monitor_type: str = 'tensorboard'
- monitor_args: Dict | None = None
- enable_ray_timeline: bool = False
- cache_dir: str = ''
- __init__(monitor_type: str = 'tensorboard', monitor_args: Dict | None = None, enable_ray_timeline: bool = False, cache_dir: str = '') None
- class trinity.common.config.SynchronizerConfig(sync_method: SyncMethod = SyncMethod.NCCL, sync_style: SyncStyle = SyncStyle.FIXED, sync_interval: int = 1, sync_offset: int = 0, sync_timeout: int = 3600, wait_for_checkpoint: bool = False, explorer_world_size: int | None = None, ray_namespace: str = '')[source]
Bases:
object
Configs for model weight synchronization.
- sync_method: SyncMethod = 'nccl'
- sync_interval: int = 1
- sync_offset: int = 0
- sync_timeout: int = 3600
- wait_for_checkpoint: bool = False
- explorer_world_size: int | None = None
- ray_namespace: str = ''
- __init__(sync_method: SyncMethod = SyncMethod.NCCL, sync_style: SyncStyle = SyncStyle.FIXED, sync_interval: int = 1, sync_offset: int = 0, sync_timeout: int = 3600, wait_for_checkpoint: bool = False, explorer_world_size: int | None = None, ray_namespace: str = '') None
- class trinity.common.config.Config(mode: str = 'both', project: str = 'Trinity-RFT', group: str = '', name: str = 'rft', checkpoint_root_dir: str = '', checkpoint_job_dir: str = '', ray_namespace: str = '', continue_from_checkpoint: bool = True, algorithm: ~trinity.common.config.AlgorithmConfig = <factory>, data_processor: ~trinity.common.config.DataProcessorConfig = <factory>, model: ~trinity.common.config.ModelConfig = <factory>, cluster: ~trinity.common.config.ClusterConfig = <factory>, buffer: ~trinity.common.config.BufferConfig = <factory>, explorer: ~trinity.common.config.ExplorerConfig = <factory>, trainer: ~trinity.common.config.TrainerConfig = <factory>, monitor: ~trinity.common.config.MonitorConfig = <factory>, synchronizer: ~trinity.common.config.SynchronizerConfig = <factory>)[source]
Bases:
object
Global Configuration
- mode: str = 'both'
- project: str = 'Trinity-RFT'
- group: str = ''
- name: str = 'rft'
- checkpoint_root_dir: str = ''
- checkpoint_job_dir: str = ''
- ray_namespace: str = ''
- continue_from_checkpoint: bool = True
- algorithm: AlgorithmConfig
- data_processor: DataProcessorConfig
- model: ModelConfig
- cluster: ClusterConfig
- buffer: BufferConfig
- explorer: ExplorerConfig
- trainer: TrainerConfig
- monitor: MonitorConfig
- synchronizer: SynchronizerConfig
- flatten() Dict[str, Any] [source]
Flatten the config into a single-level dict with dot-separated keys for nested fields.
- __init__(mode: str = 'both', project: str = 'Trinity-RFT', group: str = '', name: str = 'rft', checkpoint_root_dir: str = '', checkpoint_job_dir: str = '', ray_namespace: str = '', continue_from_checkpoint: bool = True, algorithm: ~trinity.common.config.AlgorithmConfig = <factory>, data_processor: ~trinity.common.config.DataProcessorConfig = <factory>, model: ~trinity.common.config.ModelConfig = <factory>, cluster: ~trinity.common.config.ClusterConfig = <factory>, buffer: ~trinity.common.config.BufferConfig = <factory>, explorer: ~trinity.common.config.ExplorerConfig = <factory>, trainer: ~trinity.common.config.TrainerConfig = <factory>, monitor: ~trinity.common.config.MonitorConfig = <factory>, synchronizer: ~trinity.common.config.SynchronizerConfig = <factory>) None
trinity.common.constants module
Constants.
- class trinity.common.constants.CaseInsensitiveEnumMeta(cls, bases, classdict, **kwds)[source]
Bases:
EnumMeta
- class trinity.common.constants.CaseInsensitiveEnum(value, *args, **kwargs)[source]
Bases:
Enum
An enumeration.
- class trinity.common.constants.PromptType(value, *args, **kwargs)[source]
Bases:
CaseInsensitiveEnum
Prompt Type.
- MESSAGES = 'messages'
- CHATPAIR = 'chatpair'
- PLAINTEXT = 'plaintext'
- class trinity.common.constants.TaskType(value)[source]
Bases:
Enum
Task Type.
- EXPLORE = 0
- EVAL = 1
- class trinity.common.constants.ReadStrategy(value, *args, **kwargs)[source]
Bases:
CaseInsensitiveEnum
Pop Strategy.
- DEFAULT = None
- FIFO = 'fifo'
- RANDOM = 'random'
- LRU = 'lru'
- LFU = 'lfu'
- PRIORITY = 'priority'
- class trinity.common.constants.StorageType(value, *args, **kwargs)[source]
Bases:
CaseInsensitiveEnum
Storage Type.
- SQL = 'sql'
- QUEUE = 'queue'
- FILE = 'file'
- class trinity.common.constants.MonitorType(value, *args, **kwargs)[source]
Bases:
CaseInsensitiveEnum
Monitor Type.
- WANDB = 'wandb'
- TENSORBOARD = 'tensorboard'
- class trinity.common.constants.SyncMethodEnumMeta(cls, bases, classdict, **kwds)[source]
Bases:
CaseInsensitiveEnumMeta
- class trinity.common.constants.SyncMethod(value, *args, **kwargs)[source]
Bases:
CaseInsensitiveEnum
Sync Method.
- NCCL = 'nccl'
- CHECKPOINT = 'checkpoint'
- MEMORY = 'memory'
- class trinity.common.constants.RunningStatus(value)[source]
Bases:
Enum
Running status of explorer and trainer.
- RUNNING = 'running'
- REQUIRE_SYNC = 'require_sync'
- WAITING_SYNC = 'waiting_sync'
- STOPPED = 'stopped'
- class trinity.common.constants.DataProcessorPipelineType(value)[source]
Bases:
Enum
Data processor pipeline type.
- EXPERIENCE = 'experience_pipeline'
- TASK = 'task_pipeline'
trinity.common.experience module
Experience Class.
- class trinity.common.experience.EID(batch: int = 0, task: int = 0, run: int = 0, step: int = 0, suffix: str = <factory>)[source]
Bases:
object
Experience ID class to uniquely identify an experience.
To enable the full functionality of the experience grouping, user should manually set the run and step fields in custom workflows.
- batch: int = 0
- task: int = 0
- run: int = 0
- step: int = 0
- suffix: str
- property uid: str
An unique identifier for the experience.
- property sid: str
Step ID of the experience.
For example, experiences generated by all runs of a same task at the same step will have the same sid.
- property rid: str
Run ID of the experience.
For example, experiences generated by one run of a task at all steps will have the same run_id.
- property tid: str
Task ID for the experience.
For example, experiences generated by a all run of a same task in GRPO-like algorithms will have the same tid.
- __init__(batch: int = 0, task: int = 0, run: int = 0, step: int = 0, suffix: str = <factory>) None
- class trinity.common.experience.ExperienceType(value)[source]
Bases:
Enum
Enum for experience types.
- SINGLE_TURN = 'single_turn'
- MULTI_TURN = 'multi_turn'
- DPO = 'dpo'
- class trinity.common.experience.CustomField(source_field: str, destination_field: str, data_type: dtype)[source]
Bases:
object
Custom field for Experiences.
This is used to store additional information into the Experiences class.
- source_field: str
- destination_field: str
- data_type: dtype
- __init__(source_field: str, destination_field: str, data_type: dtype) None
- class trinity.common.experience.Experience(*, eid=None, tokens, logprobs=None, reward=None, advantages=None, returns=None, info=None, metrics=None, prompt_length=1, response_text=None, prompt_text=None, action_mask=None, messages=None, chosen=None, rejected=None, chosen_text=None, rejected_text=None)[source]
Bases:
object
- __init__(*, eid=None, tokens, logprobs=None, reward=None, advantages=None, returns=None, info=None, metrics=None, prompt_length=1, response_text=None, prompt_text=None, action_mask=None, messages=None, chosen=None, rejected=None, chosen_text=None, rejected_text=None)[source]
- reward: float | None = None
- advantages: Tensor | None = None
- returns: Tensor | None = None
- experience_type: ExperienceType = 'single_turn'
- info: dict
- metrics: dict[str, float]
- prompt_length: int = 1
- response_text: str | None = None
- prompt_text: str | None = None
- messages: List[dict] | None = None
- chosen_text: str | None = None
- rejected_text: str | None = None
- tokens: Tensor | None = None
- logprobs: Tensor | None = None
- action_mask: Tensor | None = None
- chosen: Tensor | None = None
- rejected: Tensor | None = None
- classmethod deserialize(data: bytes) Experience [source]
- classmethod gather(experiences: List[Experience], pad_token_id: int = 0, custom_fields: List[CustomField] | None = None) Experiences [source]
- trinity.common.experience.split_dpo_experience_to_single_turn(experiences: List[Experience]) List[Experience] [source]
- class trinity.common.experience.Experiences(eids: ~typing.List[~trinity.common.experience.EID], tokens: ~torch.Tensor, rewards: ~torch.Tensor, advantages: ~torch.Tensor | None, returns: ~torch.Tensor | None, attention_masks: ~torch.Tensor, action_masks: ~torch.Tensor | None, prompt_length: int, logprobs: ~torch.Tensor | None, custom_fields: ~typing.List[str] = <factory>)[source]
Bases:
object
A container for a batch of experiences, for high performance communication usage.
Example
>>> |<- prompt_length ->| | >>> tokens: ('P' represents prompt, 'O' represents output) >>> exp1: |........PPPPPPPPPPP|OOOOOOOOOO.....| >>> exp2: |......PPPPPPPPPPPPP|OOOOOOO........| >>> >>> attention_masks: ('.' represents False and '1' represents True) >>> exp1: |........11111111111|1111111111.....| >>> exp2: |......1111111111111|1111111........|
- __init__(eids: ~typing.List[~trinity.common.experience.EID], tokens: ~torch.Tensor, rewards: ~torch.Tensor, advantages: ~torch.Tensor | None, returns: ~torch.Tensor | None, attention_masks: ~torch.Tensor, action_masks: ~torch.Tensor | None, prompt_length: int, logprobs: ~torch.Tensor | None, custom_fields: ~typing.List[str] = <factory>) None
- tokens: Tensor
- rewards: Tensor
- advantages: Tensor | None
- returns: Tensor | None
- attention_masks: Tensor
- action_masks: Tensor | None
- prompt_length: int
- logprobs: Tensor | None
- custom_fields: List[str]
- property batch_size: int
Get the batch size.
- classmethod gather_experiences(experiences: list[Experience], pad_token_id: int = 0, custom_fields: List[CustomField] | None = None) Experiences [source]
Gather a batch of experiences from a list of experiences.
This method will automatically pad the tokens and logprobs of input experiences to the same length.
- Parameters:
experiences (list[Experience]) – A list of experiences to gather.
pad_token_id (int) – The token ID to use for padding. Default is 0.
custom_fields (Optional[List[CustomField]]) – Custom fields to include in the gathered experiences.
- trinity.common.experience.empty_experiences(custom_fields: List[CustomField] | None) Experiences [source]
- trinity.common.experience.gather_token_ids(experiences, max_prompt_length: int, max_response_length: int, pad_token_id: int) Tensor [source]
- trinity.common.experience.gather_action_masks(experiences, max_response_length: int) Tensor [source]
- trinity.common.experience.gather_attention_masks(experiences, max_prompt_length: int, max_response_length: int) Tensor [source]
trinity.common.verl_config module
- class trinity.common.verl_config.Data(train_batch_size: int = 1024)[source]
Bases:
object
- train_batch_size: int = 1024
- __init__(train_batch_size: int = 1024) None
- class trinity.common.verl_config.FusedKernelOptions(impl_backend: str | None = None)[source]
Bases:
object
- impl_backend: str | None = None
- __init__(impl_backend: str | None = None) None
- class trinity.common.verl_config.ActorModel(path: str = '', external_lib: Optional[str] = None, override_config: Dict[str, Any] = <factory>, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False, use_fused_kernels: bool = False, fused_kernel_options: trinity.common.verl_config.FusedKernelOptions = <factory>, custom_chat_template: Optional[str] = None)[source]
Bases:
object
- path: str = ''
- external_lib: str | None = None
- override_config: Dict[str, Any]
- enable_gradient_checkpointing: bool = True
- use_remove_padding: bool = False
- use_fused_kernels: bool = False
- fused_kernel_options: FusedKernelOptions
- custom_chat_template: str | None = None
- __init__(path: str = '', external_lib: str | None = None, override_config: ~typing.Dict[str, ~typing.Any] = <factory>, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False, use_fused_kernels: bool = False, fused_kernel_options: ~trinity.common.verl_config.FusedKernelOptions = <factory>, custom_chat_template: str | None = None) None
- class trinity.common.verl_config.Optim(lr: float = 1e-06, lr_warmup_steps: int = -1, lr_warmup_steps_ratio: float = 0.0, min_lr_ratio: Optional[float] = 0.0, warmup_style: str = 'constant', total_training_steps: int = -1, betas: List[float] = <factory>)[source]
Bases:
object
- lr: float = 1e-06
- lr_warmup_steps: int = -1
- lr_warmup_steps_ratio: float = 0.0
- min_lr_ratio: float | None = 0.0
- warmup_style: str = 'constant'
- total_training_steps: int = -1
- betas: List[float]
- __init__(lr: float = 1e-06, lr_warmup_steps: int = -1, lr_warmup_steps_ratio: float = 0.0, min_lr_ratio: float | None = 0.0, warmup_style: str = 'constant', total_training_steps: int = -1, betas: ~typing.List[float] = <factory>) None
- class trinity.common.verl_config.WrapPolicy(min_num_params: int = 0)[source]
Bases:
object
- min_num_params: int = 0
- __init__(min_num_params: int = 0) None
- class trinity.common.verl_config.FSDPConfig(wrap_policy: trinity.common.verl_config.WrapPolicy = <factory>, min_num_params: int = 0, param_offload: bool = False, optimizer_offload: bool = False, fsdp_size: int = -1, forward_prefetch: bool = False)[source]
Bases:
object
- wrap_policy: WrapPolicy
- min_num_params: int = 0
- param_offload: bool = False
- optimizer_offload: bool = False
- fsdp_size: int = -1
- forward_prefetch: bool = False
- __init__(wrap_policy: ~trinity.common.verl_config.WrapPolicy = <factory>, min_num_params: int = 0, param_offload: bool = False, optimizer_offload: bool = False, fsdp_size: int = -1, forward_prefetch: bool = False) None
- class trinity.common.verl_config.Checkpoint(load_contents: List[str] = <factory>, save_contents: List[str] = <factory>)[source]
Bases:
object
- load_contents: List[str]
- save_contents: List[str]
- __init__(load_contents: ~typing.List[str] = <factory>, save_contents: ~typing.List[str] = <factory>) None
- class trinity.common.verl_config.Actor(strategy: str = 'fsdp', ppo_mini_batch_size: int = 256, ppo_micro_batch_size: Optional[int] = None, ppo_micro_batch_size_per_gpu: int = 1, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 16384, grad_clip: float = 1.0, ppo_epochs: int = 1, shuffle: bool = False, ulysses_sequence_parallel_size: int = 1, entropy_from_logits_with_chunking: bool = False, entropy_checkpointing: bool = False, checkpoint: trinity.common.verl_config.Checkpoint = <factory>, optim: trinity.common.verl_config.Optim = <factory>, fsdp_config: trinity.common.verl_config.FSDPConfig = <factory>, loss_agg_mode: str = 'token-mean', clip_ratio: float = 0.2, entropy_coeff: float = 0.001, use_kl_loss: bool = False, kl_loss_coef: float = 0.001, kl_loss_type: str = 'low_var_kl')[source]
Bases:
object
- strategy: str = 'fsdp'
- ppo_mini_batch_size: int = 256
- ppo_micro_batch_size: int | None = None
- ppo_micro_batch_size_per_gpu: int = 1
- use_dynamic_bsz: bool = False
- ppo_max_token_len_per_gpu: int = 16384
- grad_clip: float = 1.0
- ppo_epochs: int = 1
- shuffle: bool = False
- ulysses_sequence_parallel_size: int = 1
- entropy_from_logits_with_chunking: bool = False
- entropy_checkpointing: bool = False
- checkpoint: Checkpoint
- fsdp_config: FSDPConfig
- loss_agg_mode: str = 'token-mean'
- clip_ratio: float = 0.2
- entropy_coeff: float = 0.001
- use_kl_loss: bool = False
- kl_loss_coef: float = 0.001
- kl_loss_type: str = 'low_var_kl'
- __init__(strategy: str = 'fsdp', ppo_mini_batch_size: int = 256, ppo_micro_batch_size: int | None = None, ppo_micro_batch_size_per_gpu: int = 1, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 16384, grad_clip: float = 1.0, ppo_epochs: int = 1, shuffle: bool = False, ulysses_sequence_parallel_size: int = 1, entropy_from_logits_with_chunking: bool = False, entropy_checkpointing: bool = False, checkpoint: ~trinity.common.verl_config.Checkpoint = <factory>, optim: ~trinity.common.verl_config.Optim = <factory>, fsdp_config: ~trinity.common.verl_config.FSDPConfig = <factory>, loss_agg_mode: str = 'token-mean', clip_ratio: float = 0.2, entropy_coeff: float = 0.001, use_kl_loss: bool = False, kl_loss_coef: float = 0.001, kl_loss_type: str = 'low_var_kl') None
- class trinity.common.verl_config.Ref(fsdp_config: trinity.common.verl_config.FSDPConfig = <factory>, log_prob_micro_batch_size: Optional[int] = None, log_prob_micro_batch_size_per_gpu: int = 1, log_prob_use_dynamic_bsz: bool = False, log_prob_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1, entropy_from_logits_with_chunking: bool = False, entropy_checkpointing: bool = False, checkpoint: trinity.common.verl_config.Checkpoint = <factory>)[source]
Bases:
object
- fsdp_config: FSDPConfig
- log_prob_micro_batch_size: int | None = None
- log_prob_micro_batch_size_per_gpu: int = 1
- log_prob_use_dynamic_bsz: bool = False
- log_prob_max_token_len_per_gpu: int = 0
- ulysses_sequence_parallel_size: int = 1
- entropy_from_logits_with_chunking: bool = False
- entropy_checkpointing: bool = False
- checkpoint: Checkpoint
- __init__(fsdp_config: ~trinity.common.verl_config.FSDPConfig = <factory>, log_prob_micro_batch_size: int | None = None, log_prob_micro_batch_size_per_gpu: int = 1, log_prob_use_dynamic_bsz: bool = False, log_prob_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1, entropy_from_logits_with_chunking: bool = False, entropy_checkpointing: bool = False, checkpoint: ~trinity.common.verl_config.Checkpoint = <factory>) None
- class trinity.common.verl_config.Rollout(val_kwargs: trinity.common.verl_config._ValKwargs = <factory>, multi_turn: trinity.common.verl_config._MultiTurn = <factory>, temperature: float = 1.0, n: int = 1, log_prob_micro_batch_size: Optional[int] = None, log_prob_micro_batch_size_per_gpu: int = 1)[source]
Bases:
object
- val_kwargs: _ValKwargs
- multi_turn: _MultiTurn
- temperature: float = 1.0
- n: int = 1
- log_prob_micro_batch_size: int | None = None
- log_prob_micro_batch_size_per_gpu: int = 1
- __init__(val_kwargs: ~trinity.common.verl_config._ValKwargs = <factory>, multi_turn: ~trinity.common.verl_config._MultiTurn = <factory>, temperature: float = 1.0, n: int = 1, log_prob_micro_batch_size: int | None = None, log_prob_micro_batch_size_per_gpu: int = 1) None
- class trinity.common.verl_config.ActorRolloutRef(hybrid_engine: bool = True, model: trinity.common.verl_config.ActorModel = <factory>, actor: trinity.common.verl_config.Actor = <factory>, ref: trinity.common.verl_config.Ref = <factory>, rollout: trinity.common.verl_config.Rollout = <factory>, synchronizer: Optional[trinity.common.config.SynchronizerConfig] = None, explorer_name: str = 'explorer')[source]
Bases:
object
- hybrid_engine: bool = True
- model: ActorModel
- synchronizer: SynchronizerConfig | None = None
- explorer_name: str = 'explorer'
- __init__(hybrid_engine: bool = True, model: ~trinity.common.verl_config.ActorModel = <factory>, actor: ~trinity.common.verl_config.Actor = <factory>, ref: ~trinity.common.verl_config.Ref = <factory>, rollout: ~trinity.common.verl_config.Rollout = <factory>, synchronizer: ~trinity.common.config.SynchronizerConfig | None = None, explorer_name: str = 'explorer') None
- class trinity.common.verl_config.CriticModel(path: str = '', tokenizer_path: str = '', override_config: Dict[str, str] = <factory>, external_lib: Optional[str] = None, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False, fsdp_config: trinity.common.verl_config.FSDPConfig = <factory>)[source]
Bases:
object
- path: str = ''
- tokenizer_path: str = ''
- override_config: Dict[str, str]
- external_lib: str | None = None
- enable_gradient_checkpointing: bool = True
- use_remove_padding: bool = False
- fsdp_config: FSDPConfig
- __init__(path: str = '', tokenizer_path: str = '', override_config: ~typing.Dict[str, str] = <factory>, external_lib: str | None = None, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False, fsdp_config: ~trinity.common.verl_config.FSDPConfig = <factory>) None
- class trinity.common.verl_config.Critic(strategy: str = 'fsdp', optim: trinity.common.verl_config.Optim = <factory>, model: trinity.common.verl_config.CriticModel = <factory>, ppo_mini_batch_size: int = 0, ppo_micro_batch_size: Optional[int] = None, ppo_micro_batch_size_per_gpu: int = 1, forward_micro_batch_size: Optional[int] = None, forward_micro_batch_size_per_gpu: Optional[int] = None, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 0, forward_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1, ppo_epochs: int = 0, shuffle: bool = False, grad_clip: float = 0.0, cliprange_value: float = 0.0, checkpoint: trinity.common.verl_config.Checkpoint = <factory>, rollout_n: int = 1, loss_agg_mode: str = 'token-mean')[source]
Bases:
object
- strategy: str = 'fsdp'
- model: CriticModel
- ppo_mini_batch_size: int = 0
- ppo_micro_batch_size: int | None = None
- ppo_micro_batch_size_per_gpu: int = 1
- forward_micro_batch_size: int | None = None
- forward_micro_batch_size_per_gpu: int | None = None
- use_dynamic_bsz: bool = False
- ppo_max_token_len_per_gpu: int = 0
- forward_max_token_len_per_gpu: int = 0
- ulysses_sequence_parallel_size: int = 1
- ppo_epochs: int = 0
- shuffle: bool = False
- grad_clip: float = 0.0
- cliprange_value: float = 0.0
- checkpoint: Checkpoint
- rollout_n: int = 1
- loss_agg_mode: str = 'token-mean'
- __init__(strategy: str = 'fsdp', optim: ~trinity.common.verl_config.Optim = <factory>, model: ~trinity.common.verl_config.CriticModel = <factory>, ppo_mini_batch_size: int = 0, ppo_micro_batch_size: int | None = None, ppo_micro_batch_size_per_gpu: int = 1, forward_micro_batch_size: int | None = None, forward_micro_batch_size_per_gpu: int | None = None, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 0, forward_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1, ppo_epochs: int = 0, shuffle: bool = False, grad_clip: float = 0.0, cliprange_value: float = 0.0, checkpoint: ~trinity.common.verl_config.Checkpoint = <factory>, rollout_n: int = 1, loss_agg_mode: str = 'token-mean') None
- class trinity.common.verl_config.RewardModel(enable: bool = False, strategy: str = 'fsdp', model: trinity.common.verl_config._RewardModel = <factory>, micro_batch_size_per_gpu: int = 1, max_length: Optional[int] = None, ulysses_sequence_parallel_size: int = 1, use_dynamic_bsz: bool = False, forward_max_token_len_per_gpu: int = 0, reward_manager: str = 'naive')[source]
Bases:
object
- enable: bool = False
- strategy: str = 'fsdp'
- model: _RewardModel
- micro_batch_size_per_gpu: int = 1
- max_length: int | None = None
- ulysses_sequence_parallel_size: int = 1
- use_dynamic_bsz: bool = False
- forward_max_token_len_per_gpu: int = 0
- reward_manager: str = 'naive'
- __init__(enable: bool = False, strategy: str = 'fsdp', model: ~trinity.common.verl_config._RewardModel = <factory>, micro_batch_size_per_gpu: int = 1, max_length: int | None = None, ulysses_sequence_parallel_size: int = 1, use_dynamic_bsz: bool = False, forward_max_token_len_per_gpu: int = 0, reward_manager: str = 'naive') None
- class trinity.common.verl_config.CustomRewardFunction(path: str | None = None, name: str = 'compute_score')[source]
Bases:
object
- path: str | None = None
- name: str = 'compute_score'
- __init__(path: str | None = None, name: str = 'compute_score') None
- class trinity.common.verl_config.KL_Ctrl(type: str = 'fixed', kl_coef: float = 0.001, horizon: float = 10000, target_kl: float = 0.1)[source]
Bases:
object
- type: str = 'fixed'
- kl_coef: float = 0.001
- horizon: float = 10000
- target_kl: float = 0.1
- __init__(type: str = 'fixed', kl_coef: float = 0.001, horizon: float = 10000, target_kl: float = 0.1) None
- class trinity.common.verl_config.Algorithm(gamma: float = 1.0, lam: float = 1.0, adv_estimator: str = 'gae', norm_adv_by_std_in_grpo: bool = True, use_kl_in_reward: bool = False, kl_penalty: str = 'kl', kl_ctrl: trinity.common.verl_config.KL_Ctrl = <factory>)[source]
Bases:
object
- gamma: float = 1.0
- lam: float = 1.0
- adv_estimator: str = 'gae'
- norm_adv_by_std_in_grpo: bool = True
- use_kl_in_reward: bool = False
- kl_penalty: str = 'kl'
- __init__(gamma: float = 1.0, lam: float = 1.0, adv_estimator: str = 'gae', norm_adv_by_std_in_grpo: bool = True, use_kl_in_reward: bool = False, kl_penalty: str = 'kl', kl_ctrl: ~trinity.common.verl_config.KL_Ctrl = <factory>) None
- class trinity.common.verl_config.Trainer(balance_batch: bool = True, total_epochs: int = 30, total_training_steps: Optional[int] = None, project_name: str = '', group_name: str = '', experiment_name: str = '', logger: List[str] = <factory>, val_generations_to_log_to_wandb: int = 0, nnodes: int = 0, n_gpus_per_node: int = 0, save_freq: int = 0, resume_mode: str = 'auto', resume_from_path: str = '', test_freq: int = 0, critic_warmup: int = 0, default_hdfs_dir: Optional[str] = None, remove_previous_ckpt_in_save: bool = False, del_local_ckpt_after_load: bool = False, default_local_dir: str = '', val_before_train: bool = False, training_rollout_mode: str = 'parallel', enable_exp_buffer: bool = True, sync_freq: int = 0, sft_warmup_steps: int = 0, max_actor_ckpt_to_keep: Optional[int] = None, max_critic_ckpt_to_keep: Optional[int] = None)[source]
Bases:
object
- balance_batch: bool = True
- total_epochs: int = 30
- total_training_steps: int | None = None
- project_name: str = ''
- group_name: str = ''
- experiment_name: str = ''
- logger: List[str]
- val_generations_to_log_to_wandb: int = 0
- nnodes: int = 0
- n_gpus_per_node: int = 0
- save_freq: int = 0
- resume_mode: str = 'auto'
- resume_from_path: str = ''
- test_freq: int = 0
- critic_warmup: int = 0
- default_hdfs_dir: str | None = None
- remove_previous_ckpt_in_save: bool = False
- del_local_ckpt_after_load: bool = False
- default_local_dir: str = ''
- val_before_train: bool = False
- training_rollout_mode: str = 'parallel'
- enable_exp_buffer: bool = True
- sync_freq: int = 0
- sft_warmup_steps: int = 0
- max_actor_ckpt_to_keep: int | None = None
- max_critic_ckpt_to_keep: int | None = None
- __init__(balance_batch: bool = True, total_epochs: int = 30, total_training_steps: int | None = None, project_name: str = '', group_name: str = '', experiment_name: str = '', logger: ~typing.List[str] = <factory>, val_generations_to_log_to_wandb: int = 0, nnodes: int = 0, n_gpus_per_node: int = 0, save_freq: int = 0, resume_mode: str = 'auto', resume_from_path: str = '', test_freq: int = 0, critic_warmup: int = 0, default_hdfs_dir: str | None = None, remove_previous_ckpt_in_save: bool = False, del_local_ckpt_after_load: bool = False, default_local_dir: str = '', val_before_train: bool = False, training_rollout_mode: str = 'parallel', enable_exp_buffer: bool = True, sync_freq: int = 0, sft_warmup_steps: int = 0, max_actor_ckpt_to_keep: int | None = None, max_critic_ckpt_to_keep: int | None = None) None
- class trinity.common.verl_config.veRLConfig(data: trinity.common.verl_config.Data = <factory>, actor_rollout_ref: trinity.common.verl_config.ActorRolloutRef = <factory>, critic: trinity.common.verl_config.Critic = <factory>, reward_model: trinity.common.verl_config.RewardModel = <factory>, custom_reward_function: trinity.common.verl_config.CustomRewardFunction = <factory>, algorithm: trinity.common.verl_config.Algorithm = <factory>, trainer: trinity.common.verl_config.Trainer = <factory>, buffer: trinity.common.config.BufferConfig = <factory>, synchronizer: Optional[trinity.common.config.SynchronizerConfig] = None, enable_preview: bool = True)[source]
Bases:
object
- actor_rollout_ref: ActorRolloutRef
- reward_model: RewardModel
- custom_reward_function: CustomRewardFunction
- buffer: BufferConfig
- synchronizer: SynchronizerConfig | None = None
- enable_preview: bool = True
- __init__(data: ~trinity.common.verl_config.Data = <factory>, actor_rollout_ref: ~trinity.common.verl_config.ActorRolloutRef = <factory>, critic: ~trinity.common.verl_config.Critic = <factory>, reward_model: ~trinity.common.verl_config.RewardModel = <factory>, custom_reward_function: ~trinity.common.verl_config.CustomRewardFunction = <factory>, algorithm: ~trinity.common.verl_config.Algorithm = <factory>, trainer: ~trinity.common.verl_config.Trainer = <factory>, buffer: ~trinity.common.config.BufferConfig = <factory>, synchronizer: ~trinity.common.config.SynchronizerConfig | None = None, enable_preview: bool = True) None
- trinity.common.verl_config.load_config(config_path: str) veRLConfig [source]