trinity.common

Subpackages

Submodules

trinity.common.config module

Configs for RFT.

class trinity.common.config.FormatConfig(prompt_type: PromptType = PromptType.MESSAGES, prompt_key: str = 'prompt', response_key: str = 'response', messages_key: str = 'message', chat_template: str = '', system_prompt: str | None = None, reply_prefix: str | None = None, reward_fn_key: str = '', workflow_key: str = '', solution_key: str = 'solution', reward_key: str = 'reward', chosen_key: str = 'chosen', rejected_key: str = 'rejected', label_key: str = '')[source]

Bases: object

Configuration for data formatting

prompt_type: PromptType = 'messages'
prompt_key: str = 'prompt'
response_key: str = 'response'
messages_key: str = 'message'
chat_template: str = ''
system_prompt: str | None = None
reply_prefix: str | None = None
reward_fn_key: str = ''
workflow_key: str = ''
solution_key: str = 'solution'
reward_key: str = 'reward'
chosen_key: str = 'chosen'
rejected_key: str = 'rejected'
label_key: str = ''
__init__(prompt_type: PromptType = PromptType.MESSAGES, prompt_key: str = 'prompt', response_key: str = 'response', messages_key: str = 'message', chat_template: str = '', system_prompt: str | None = None, reply_prefix: str | None = None, reward_fn_key: str = '', workflow_key: str = '', solution_key: str = 'solution', reward_key: str = 'reward', chosen_key: str = 'chosen', rejected_key: str = 'rejected', label_key: str = '') None
class trinity.common.config.GenerationConfig(temperature: float = 1.0, top_p: float = 1.0, top_k: int = -1, logprobs: int = 0, n: int = 1)[source]

Bases: object

temperature: float = 1.0
top_p: float = 1.0
top_k: int = -1
logprobs: int = 0
n: int = 1
__init__(temperature: float = 1.0, top_p: float = 1.0, top_k: int = -1, logprobs: int = 0, n: int = 1) None
class trinity.common.config.StorageConfig(name: str = '', storage_type: ~trinity.common.constants.StorageType = StorageType.FILE, path: str | None = None, repeat_times: int | None = None, raw: bool = False, split: str = 'train', subset_name: str | None = None, format: ~trinity.common.config.FormatConfig = <factory>, index: int = 0, wrap_in_ray: bool = True, capacity: int = 10000, max_read_timeout: float = 1800, use_priority_queue: bool = False, reuse_cooldown_time: float | None = None, replay_buffer_kwargs: dict = <factory>, default_workflow_type: str | None = None, default_eval_workflow_type: str | None = None, default_reward_fn_type: str | None = None, rollout_args: ~trinity.common.config.GenerationConfig = <factory>, workflow_args: dict = <factory>, reward_fn_args: dict = <factory>, enable_progress_bar: bool | None = False, ray_namespace: str | None = None, algorithm_type: str | None = None, total_epochs: int = 1, total_steps: int | None = None, task_type: ~trinity.common.constants.TaskType = TaskType.EXPLORE)[source]

Bases: object

Storage config.

name: str = ''
storage_type: StorageType = 'file'
path: str | None = None
repeat_times: int | None = None
raw: bool = False
split: str = 'train'
subset_name: str | None = None
format: FormatConfig
index: int = 0
wrap_in_ray: bool = True
capacity: int = 10000
max_read_timeout: float = 1800
use_priority_queue: bool = False
reuse_cooldown_time: float | None = None
replay_buffer_kwargs: dict
default_workflow_type: str | None = None
default_eval_workflow_type: str | None = None
default_reward_fn_type: str | None = None
rollout_args: GenerationConfig
workflow_args: dict
reward_fn_args: dict
enable_progress_bar: bool | None = False
ray_namespace: str | None = None
algorithm_type: str | None = None
total_epochs: int = 1
total_steps: int | None = None
task_type: TaskType = 0
__init__(name: str = '', storage_type: ~trinity.common.constants.StorageType = StorageType.FILE, path: str | None = None, repeat_times: int | None = None, raw: bool = False, split: str = 'train', subset_name: str | None = None, format: ~trinity.common.config.FormatConfig = <factory>, index: int = 0, wrap_in_ray: bool = True, capacity: int = 10000, max_read_timeout: float = 1800, use_priority_queue: bool = False, reuse_cooldown_time: float | None = None, replay_buffer_kwargs: dict = <factory>, default_workflow_type: str | None = None, default_eval_workflow_type: str | None = None, default_reward_fn_type: str | None = None, rollout_args: ~trinity.common.config.GenerationConfig = <factory>, workflow_args: dict = <factory>, reward_fn_args: dict = <factory>, enable_progress_bar: bool | None = False, ray_namespace: str | None = None, algorithm_type: str | None = None, total_epochs: int = 1, total_steps: int | None = None, task_type: ~trinity.common.constants.TaskType = TaskType.EXPLORE) None
class trinity.common.config.RewardShapingConfig(stats_key: str = '', op_type: OpType = OpType.ADD, weight: float = 1.0)[source]

Bases: object

Config for reward shaping.

stats_key: str = ''
op_type: OpType = 'add'
weight: float = 1.0
__init__(stats_key: str = '', op_type: OpType = OpType.ADD, weight: float = 1.0) None
class trinity.common.config.DataPipelineConfig(input_buffers: ~typing.List[~trinity.common.config.StorageConfig] = <factory>, output_buffer: ~trinity.common.config.StorageConfig = <factory>, format: ~trinity.common.config.FormatConfig = <factory>, dj_config_path: str | None = None, dj_process_desc: str | None = None, agent_model_name: str | None = None, clean_strategy: str = 'iterative', min_size_ratio: float | None = None, min_priority_score: float | None = 0.0, priority_weights: ~typing.Dict[str, float] | None = None, data_dist: str | None = 'gaussian', reward_shaping: ~typing.List[~trinity.common.config.RewardShapingConfig] | None = <factory>)[source]

Bases: object

Config for data pipeline.

input_buffers: List[StorageConfig]
output_buffer: StorageConfig
format: FormatConfig
dj_config_path: str | None = None
dj_process_desc: str | None = None
agent_model_name: str | None = None
clean_strategy: str = 'iterative'
min_size_ratio: float | None = None
min_priority_score: float | None = 0.0
priority_weights: Dict[str, float] | None = None
data_dist: str | None = 'gaussian'
reward_shaping: List[RewardShapingConfig] | None
__init__(input_buffers: ~typing.List[~trinity.common.config.StorageConfig] = <factory>, output_buffer: ~trinity.common.config.StorageConfig = <factory>, format: ~trinity.common.config.FormatConfig = <factory>, dj_config_path: str | None = None, dj_process_desc: str | None = None, agent_model_name: str | None = None, clean_strategy: str = 'iterative', min_size_ratio: float | None = None, min_priority_score: float | None = 0.0, priority_weights: ~typing.Dict[str, float] | None = None, data_dist: str | None = 'gaussian', reward_shaping: ~typing.List[~trinity.common.config.RewardShapingConfig] | None = <factory>) None
class trinity.common.config.DataProcessorConfig(data_processor_url: str | None = None, task_pipeline: DataPipelineConfig | None = None, experience_pipeline: DataPipelineConfig | None = None)[source]

Bases: object

Data-Juicer config

data_processor_url: str | None = None
task_pipeline: DataPipelineConfig | None = None
experience_pipeline: DataPipelineConfig | None = None
__init__(data_processor_url: str | None = None, task_pipeline: DataPipelineConfig | None = None, experience_pipeline: DataPipelineConfig | None = None) None
class trinity.common.config.ModelConfig(model_path: str = '', critic_model_path: str = '', max_model_len: int | None = None, max_prompt_tokens: int | None = None, max_response_tokens: int | None = None, custom_chat_template: str | None = None)[source]

Bases: object

model_path: str = ''
critic_model_path: str = ''
max_model_len: int | None = None
max_prompt_tokens: int | None = None
max_response_tokens: int | None = None
custom_chat_template: str | None = None
__init__(model_path: str = '', critic_model_path: str = '', max_model_len: int | None = None, max_prompt_tokens: int | None = None, max_response_tokens: int | None = None, custom_chat_template: str | None = None) None
class trinity.common.config.InferenceModelConfig(model_path: str = '', engine_type: str = 'vllm_async', engine_num: int = 1, tensor_parallel_size: int = 1, use_v1: bool = True, enforce_eager: bool = True, enable_prefix_caching: bool = False, enable_chunked_prefill: bool = False, gpu_memory_utilization: float = 0.9, dtype: str = 'bfloat16', seed: int = 42, max_model_len: int | None = None, max_prompt_tokens: int | None = None, max_response_tokens: int | None = None, chat_template: str | None = None, enable_thinking: bool = False, enable_history: bool = False, enable_openai_api: bool = False, enable_auto_tool_choice: bool = False, tool_call_parser: str | None = None, reasoning_parser: str | None = None, bundle_indices: str = '')[source]

Bases: object

model_path: str = ''
engine_type: str = 'vllm_async'
engine_num: int = 1
tensor_parallel_size: int = 1
use_v1: bool = True
enforce_eager: bool = True
enable_prefix_caching: bool = False
enable_chunked_prefill: bool = False
gpu_memory_utilization: float = 0.9
dtype: str = 'bfloat16'
seed: int = 42
max_model_len: int | None = None
max_prompt_tokens: int | None = None
max_response_tokens: int | None = None
chat_template: str | None = None
enable_thinking: bool = False
enable_history: bool = False
enable_openai_api: bool = False
enable_auto_tool_choice: bool = False
tool_call_parser: str | None = None
reasoning_parser: str | None = None
bundle_indices: str = ''
__init__(model_path: str = '', engine_type: str = 'vllm_async', engine_num: int = 1, tensor_parallel_size: int = 1, use_v1: bool = True, enforce_eager: bool = True, enable_prefix_caching: bool = False, enable_chunked_prefill: bool = False, gpu_memory_utilization: float = 0.9, dtype: str = 'bfloat16', seed: int = 42, max_model_len: int | None = None, max_prompt_tokens: int | None = None, max_response_tokens: int | None = None, chat_template: str | None = None, enable_thinking: bool = False, enable_history: bool = False, enable_openai_api: bool = False, enable_auto_tool_choice: bool = False, tool_call_parser: str | None = None, reasoning_parser: str | None = None, bundle_indices: str = '') None
class trinity.common.config.AlgorithmConfig(algorithm_type: str = 'ppo', repeat_times: int = 1, add_strategy: str | None = None, add_strategy_args: dict | None = None, sample_strategy: str | None = None, sample_strategy_args: dict | None = None, advantage_fn: str | None = None, advantage_fn_args: dict | None = None, kl_penalty_fn: str | None = None, kl_penalty_fn_args: dict | None = None, policy_loss_fn: str | None = None, policy_loss_fn_args: dict | None = None, kl_loss_fn: str | None = None, kl_loss_fn_args: dict | None = None, entropy_loss_fn: str | None = None, entropy_loss_fn_args: dict | None = None, use_token_level_loss: bool = True)[source]

Bases: object

Config for algorithm.

algorithm_type: str = 'ppo'
repeat_times: int = 1
add_strategy: str | None = None
add_strategy_args: dict | None = None
sample_strategy: str | None = None
sample_strategy_args: dict | None = None
advantage_fn: str | None = None
advantage_fn_args: dict | None = None
kl_penalty_fn: str | None = None
kl_penalty_fn_args: dict | None = None
policy_loss_fn: str | None = None
policy_loss_fn_args: dict | None = None
kl_loss_fn: str | None = None
kl_loss_fn_args: dict | None = None
entropy_loss_fn: str | None = None
entropy_loss_fn_args: dict | None = None
use_token_level_loss: bool = True
__init__(algorithm_type: str = 'ppo', repeat_times: int = 1, add_strategy: str | None = None, add_strategy_args: dict | None = None, sample_strategy: str | None = None, sample_strategy_args: dict | None = None, advantage_fn: str | None = None, advantage_fn_args: dict | None = None, kl_penalty_fn: str | None = None, kl_penalty_fn_args: dict | None = None, policy_loss_fn: str | None = None, policy_loss_fn_args: dict | None = None, kl_loss_fn: str | None = None, kl_loss_fn_args: dict | None = None, entropy_loss_fn: str | None = None, entropy_loss_fn_args: dict | None = None, use_token_level_loss: bool = True) None
class trinity.common.config.ClusterConfig(node_num: int = 1, gpu_per_node: int = 8)[source]

Bases: object

Config for the cluster.

node_num: int = 1
gpu_per_node: int = 8
__init__(node_num: int = 1, gpu_per_node: int = 8) None
class trinity.common.config.ExplorerInput(taskset: ~trinity.common.config.StorageConfig = <factory>, eval_tasksets: ~typing.List[~trinity.common.config.StorageConfig] = <factory>, default_workflow_type: str | None = None, default_eval_workflow_type: str | None = None, default_reward_fn_type: str | None = None, system_prompt: str | None = None, reply_prefix: str | None = None)[source]

Bases: object

Config for explorer input.

taskset: StorageConfig
eval_tasksets: List[StorageConfig]
default_workflow_type: str | None = None
default_eval_workflow_type: str | None = None
default_reward_fn_type: str | None = None
system_prompt: str | None = None
reply_prefix: str | None = None
__init__(taskset: ~trinity.common.config.StorageConfig = <factory>, eval_tasksets: ~typing.List[~trinity.common.config.StorageConfig] = <factory>, default_workflow_type: str | None = None, default_eval_workflow_type: str | None = None, default_reward_fn_type: str | None = None, system_prompt: str | None = None, reply_prefix: str | None = None) None
class trinity.common.config.TrainerInput(experience_buffer: StorageConfig | None = None, sft_warmup_dataset: StorageConfig | None = None, read_experience_strategy: ReadStrategy | None = None, sft_warmup_steps: int = 0)[source]

Bases: object

Config for trainer input.

experience_buffer: StorageConfig | None = None
sft_warmup_dataset: StorageConfig | None = None
read_experience_strategy: ReadStrategy | None = None
sft_warmup_steps: int = 0
__init__(experience_buffer: StorageConfig | None = None, sft_warmup_dataset: StorageConfig | None = None, read_experience_strategy: ReadStrategy | None = None, sft_warmup_steps: int = 0) None
class trinity.common.config.BufferConfig(batch_size: int = 1, train_batch_size: int = 0, total_epochs: int = 1, total_steps: int | None = None, explorer_input: ~trinity.common.config.ExplorerInput = <factory>, explorer_output: ~trinity.common.config.StorageConfig | None = None, trainer_input: ~trinity.common.config.TrainerInput = <factory>, max_retry_times: int = 3, max_retry_interval: int = 1, tokenizer_path: str | None = None, pad_token_id: int | None = None, cache_dir: str | None = None)[source]

Bases: object

Config for buffer.

batch_size: int = 1
train_batch_size: int = 0
total_epochs: int = 1
total_steps: int | None = None
explorer_input: ExplorerInput
explorer_output: StorageConfig | None = None
trainer_input: TrainerInput
max_retry_times: int = 3
max_retry_interval: int = 1
tokenizer_path: str | None = None
pad_token_id: int | None = None
cache_dir: str | None = None
__init__(batch_size: int = 1, train_batch_size: int = 0, total_epochs: int = 1, total_steps: int | None = None, explorer_input: ~trinity.common.config.ExplorerInput = <factory>, explorer_output: ~trinity.common.config.StorageConfig | None = None, trainer_input: ~trinity.common.config.TrainerInput = <factory>, max_retry_times: int = 3, max_retry_interval: int = 1, tokenizer_path: str | None = None, pad_token_id: int | None = None, cache_dir: str | None = None) None
class trinity.common.config.ExplorerConfig(name: str = 'explorer', runner_per_model: int = 8, max_timeout: int = 1800, max_retry_times: int = 2, env_vars: dict = <factory>, max_repeat_times_per_runner: int | None = None, runner_num: int | None = None, rollout_model: ~trinity.common.config.InferenceModelConfig = <factory>, auxiliary_models: ~typing.List[~trinity.common.config.InferenceModelConfig] = <factory>, eval_interval: int = 100, eval_on_startup: bool = True, bench_on_latest_checkpoint: bool = False, collect_experiences: bool = False)[source]

Bases: object

Config for explorer.

name: str = 'explorer'
runner_per_model: int = 8
max_timeout: int = 1800
max_retry_times: int = 2
env_vars: dict
max_repeat_times_per_runner: int | None = None
runner_num: int | None = None
rollout_model: InferenceModelConfig
auxiliary_models: List[InferenceModelConfig]
eval_interval: int = 100
eval_on_startup: bool = True
bench_on_latest_checkpoint: bool = False
collect_experiences: bool = False
__init__(name: str = 'explorer', runner_per_model: int = 8, max_timeout: int = 1800, max_retry_times: int = 2, env_vars: dict = <factory>, max_repeat_times_per_runner: int | None = None, runner_num: int | None = None, rollout_model: ~trinity.common.config.InferenceModelConfig = <factory>, auxiliary_models: ~typing.List[~trinity.common.config.InferenceModelConfig] = <factory>, eval_interval: int = 100, eval_on_startup: bool = True, bench_on_latest_checkpoint: bool = False, collect_experiences: bool = False) None
class trinity.common.config.TrainerConfig(name: str = 'trainer', trainer_type: str = 'verl', save_interval: int = 0, enable_preview: bool = True, actor_grad_clip: Optional[float] = None, trainer_config: Any = <factory>, trainer_config_path: str = '')[source]

Bases: object

name: str = 'trainer'
trainer_type: str = 'verl'
save_interval: int = 0
enable_preview: bool = True
actor_grad_clip: float | None = None
trainer_config: Any
trainer_config_path: str = ''
__init__(name: str = 'trainer', trainer_type: str = 'verl', save_interval: int = 0, enable_preview: bool = True, actor_grad_clip: float | None = None, trainer_config: ~typing.Any = <factory>, trainer_config_path: str = '') None
class trinity.common.config.MonitorConfig(monitor_type: str = 'tensorboard', monitor_args: Dict | None = None, enable_ray_timeline: bool = False, cache_dir: str = '')[source]

Bases: object

monitor_type: str = 'tensorboard'
monitor_args: Dict | None = None
enable_ray_timeline: bool = False
cache_dir: str = ''
__init__(monitor_type: str = 'tensorboard', monitor_args: Dict | None = None, enable_ray_timeline: bool = False, cache_dir: str = '') None
class trinity.common.config.SynchronizerConfig(sync_method: SyncMethod = SyncMethod.NCCL, sync_style: SyncStyle = SyncStyle.FIXED, sync_interval: int = 1, sync_offset: int = 0, sync_timeout: int = 3600, wait_for_checkpoint: bool = False, explorer_world_size: int | None = None, ray_namespace: str = '')[source]

Bases: object

Configs for model weight synchronization.

sync_method: SyncMethod = 'nccl'
sync_style: SyncStyle = 'fixed'
sync_interval: int = 1
sync_offset: int = 0
sync_timeout: int = 3600
wait_for_checkpoint: bool = False
explorer_world_size: int | None = None
ray_namespace: str = ''
__init__(sync_method: SyncMethod = SyncMethod.NCCL, sync_style: SyncStyle = SyncStyle.FIXED, sync_interval: int = 1, sync_offset: int = 0, sync_timeout: int = 3600, wait_for_checkpoint: bool = False, explorer_world_size: int | None = None, ray_namespace: str = '') None
class trinity.common.config.Config(mode: str = 'both', project: str = 'Trinity-RFT', group: str = '', name: str = 'rft', checkpoint_root_dir: str = '', checkpoint_job_dir: str = '', ray_namespace: str = '', continue_from_checkpoint: bool = True, algorithm: ~trinity.common.config.AlgorithmConfig = <factory>, data_processor: ~trinity.common.config.DataProcessorConfig = <factory>, model: ~trinity.common.config.ModelConfig = <factory>, cluster: ~trinity.common.config.ClusterConfig = <factory>, buffer: ~trinity.common.config.BufferConfig = <factory>, explorer: ~trinity.common.config.ExplorerConfig = <factory>, trainer: ~trinity.common.config.TrainerConfig = <factory>, monitor: ~trinity.common.config.MonitorConfig = <factory>, synchronizer: ~trinity.common.config.SynchronizerConfig = <factory>)[source]

Bases: object

Global Configuration

mode: str = 'both'
project: str = 'Trinity-RFT'
group: str = ''
name: str = 'rft'
checkpoint_root_dir: str = ''
checkpoint_job_dir: str = ''
ray_namespace: str = ''
continue_from_checkpoint: bool = True
algorithm: AlgorithmConfig
data_processor: DataProcessorConfig
model: ModelConfig
cluster: ClusterConfig
buffer: BufferConfig
explorer: ExplorerConfig
trainer: TrainerConfig
monitor: MonitorConfig
synchronizer: SynchronizerConfig
save(config_path: str) None[source]

Save config to file.

check_and_update() None[source]

Check and update the config.

flatten() Dict[str, Any][source]

Flatten the config into a single-level dict with dot-separated keys for nested fields.

__init__(mode: str = 'both', project: str = 'Trinity-RFT', group: str = '', name: str = 'rft', checkpoint_root_dir: str = '', checkpoint_job_dir: str = '', ray_namespace: str = '', continue_from_checkpoint: bool = True, algorithm: ~trinity.common.config.AlgorithmConfig = <factory>, data_processor: ~trinity.common.config.DataProcessorConfig = <factory>, model: ~trinity.common.config.ModelConfig = <factory>, cluster: ~trinity.common.config.ClusterConfig = <factory>, buffer: ~trinity.common.config.BufferConfig = <factory>, explorer: ~trinity.common.config.ExplorerConfig = <factory>, trainer: ~trinity.common.config.TrainerConfig = <factory>, monitor: ~trinity.common.config.MonitorConfig = <factory>, synchronizer: ~trinity.common.config.SynchronizerConfig = <factory>) None
trinity.common.config.load_config(config_path: str) Config[source]

Load the configuration from the given path.

trinity.common.constants module

Constants.

class trinity.common.constants.CaseInsensitiveEnumMeta(cls, bases, classdict, **kwds)[source]

Bases: EnumMeta

class trinity.common.constants.CaseInsensitiveEnum(value, *args, **kwargs)[source]

Bases: Enum

An enumeration.

class trinity.common.constants.PromptType(value, *args, **kwargs)[source]

Bases: CaseInsensitiveEnum

Prompt Type.

MESSAGES = 'messages'
CHATPAIR = 'chatpair'
PLAINTEXT = 'plaintext'
class trinity.common.constants.TaskType(value)[source]

Bases: Enum

Task Type.

EXPLORE = 0
EVAL = 1
class trinity.common.constants.ReadStrategy(value, *args, **kwargs)[source]

Bases: CaseInsensitiveEnum

Pop Strategy.

DEFAULT = None
FIFO = 'fifo'
RANDOM = 'random'
LRU = 'lru'
LFU = 'lfu'
PRIORITY = 'priority'
class trinity.common.constants.StorageType(value, *args, **kwargs)[source]

Bases: CaseInsensitiveEnum

Storage Type.

SQL = 'sql'
QUEUE = 'queue'
FILE = 'file'
class trinity.common.constants.MonitorType(value, *args, **kwargs)[source]

Bases: CaseInsensitiveEnum

Monitor Type.

WANDB = 'wandb'
TENSORBOARD = 'tensorboard'
class trinity.common.constants.SyncMethodEnumMeta(cls, bases, classdict, **kwds)[source]

Bases: CaseInsensitiveEnumMeta

class trinity.common.constants.SyncMethod(value, *args, **kwargs)[source]

Bases: CaseInsensitiveEnum

Sync Method.

NCCL = 'nccl'
CHECKPOINT = 'checkpoint'
MEMORY = 'memory'
class trinity.common.constants.RunningStatus(value)[source]

Bases: Enum

Running status of explorer and trainer.

RUNNING = 'running'
REQUIRE_SYNC = 'require_sync'
WAITING_SYNC = 'waiting_sync'
STOPPED = 'stopped'
class trinity.common.constants.DataProcessorPipelineType(value)[source]

Bases: Enum

Data processor pipeline type.

EXPERIENCE = 'experience_pipeline'
TASK = 'task_pipeline'
class trinity.common.constants.OpType(value)[source]

Bases: Enum

Operator type for reward shaping.

ADD = 'add'
SUB = 'sub'
MUL = 'mul'
DIV = 'div'
class trinity.common.constants.SyncStyle(value, *args, **kwargs)[source]

Bases: CaseInsensitiveEnum

An enumeration.

FIXED = 'fixed'
DYNAMIC_BY_TRAINER = 'dynamic_by_trainer'
DYNAMIC_BY_EXPLORER = 'dynamic_by_explorer'

trinity.common.experience module

Experience Class.

class trinity.common.experience.EID(batch: int = 0, task: int = 0, run: int = 0, step: int = 0, suffix: str = <factory>)[source]

Bases: object

Experience ID class to uniquely identify an experience.

To enable the full functionality of the experience grouping, user should manually set the run and step fields in custom workflows.

batch: int = 0
task: int = 0
run: int = 0
step: int = 0
suffix: str
property uid: str

An unique identifier for the experience.

property sid: str

Step ID of the experience.

For example, experiences generated by all runs of a same task at the same step will have the same sid.

property rid: str

Run ID of the experience.

For example, experiences generated by one run of a task at all steps will have the same run_id.

property tid: str

Task ID for the experience.

For example, experiences generated by a all run of a same task in GRPO-like algorithms will have the same tid.

to_dict() dict[source]

Convert the EID to a dictionary.

__init__(batch: int = 0, task: int = 0, run: int = 0, step: int = 0, suffix: str = <factory>) None
class trinity.common.experience.ExperienceType(value)[source]

Bases: Enum

Enum for experience types.

SINGLE_TURN = 'single_turn'
MULTI_TURN = 'multi_turn'
DPO = 'dpo'
class trinity.common.experience.CustomField(source_field: str, destination_field: str, data_type: dtype)[source]

Bases: object

Custom field for Experiences.

This is used to store additional information into the Experiences class.

source_field: str
destination_field: str
data_type: dtype
__init__(source_field: str, destination_field: str, data_type: dtype) None
class trinity.common.experience.Experience(*, eid=None, tokens, logprobs=None, reward=None, advantages=None, returns=None, info=None, metrics=None, prompt_length=1, response_text=None, prompt_text=None, action_mask=None, messages=None, chosen=None, rejected=None, chosen_text=None, rejected_text=None)[source]

Bases: object

__init__(*, eid=None, tokens, logprobs=None, reward=None, advantages=None, returns=None, info=None, metrics=None, prompt_length=1, response_text=None, prompt_text=None, action_mask=None, messages=None, chosen=None, rejected=None, chosen_text=None, rejected_text=None)[source]
eid: EID
reward: float | None = None
advantages: Tensor | None = None
returns: Tensor | None = None
experience_type: ExperienceType = 'single_turn'
info: dict
metrics: dict[str, float]
prompt_length: int = 1
response_text: str | None = None
prompt_text: str | None = None
messages: List[dict] | None = None
chosen_text: str | None = None
rejected_text: str | None = None
tokens: Tensor | None = None
logprobs: Tensor | None = None
action_mask: Tensor | None = None
chosen: Tensor | None = None
rejected: Tensor | None = None
serialize() bytes[source]

Serialize the experience to bytes.

classmethod deserialize(data: bytes) Experience[source]
to_dict() dict[source]

Convert the experience to a dictionary.

classmethod gather(experiences: List[Experience], pad_token_id: int = 0, custom_fields: List[CustomField] | None = None) Experiences[source]
trinity.common.experience.split_dpo_experience_to_single_turn(experiences: List[Experience]) List[Experience][source]
class trinity.common.experience.Experiences(eids: ~typing.List[~trinity.common.experience.EID], tokens: ~torch.Tensor, rewards: ~torch.Tensor, advantages: ~torch.Tensor | None, returns: ~torch.Tensor | None, attention_masks: ~torch.Tensor, action_masks: ~torch.Tensor | None, prompt_length: int, logprobs: ~torch.Tensor | None, custom_fields: ~typing.List[str] = <factory>)[source]

Bases: object

A container for a batch of experiences, for high performance communication usage.

Example

>>>             |<- prompt_length ->|               |
>>> tokens: ('P' represents prompt, 'O' represents output)
>>> exp1:       |........PPPPPPPPPPP|OOOOOOOOOO.....|
>>> exp2:       |......PPPPPPPPPPPPP|OOOOOOO........|
>>>
>>> attention_masks: ('.' represents False and '1' represents True)
>>> exp1:       |........11111111111|1111111111.....|
>>> exp2:       |......1111111111111|1111111........|
__init__(eids: ~typing.List[~trinity.common.experience.EID], tokens: ~torch.Tensor, rewards: ~torch.Tensor, advantages: ~torch.Tensor | None, returns: ~torch.Tensor | None, attention_masks: ~torch.Tensor, action_masks: ~torch.Tensor | None, prompt_length: int, logprobs: ~torch.Tensor | None, custom_fields: ~typing.List[str] = <factory>) None
eids: List[EID]
tokens: Tensor
rewards: Tensor
advantages: Tensor | None
returns: Tensor | None
attention_masks: Tensor
action_masks: Tensor | None
prompt_length: int
logprobs: Tensor | None
custom_fields: List[str]
property batch_size: int

Get the batch size.

classmethod gather_experiences(experiences: list[Experience], pad_token_id: int = 0, custom_fields: List[CustomField] | None = None) Experiences[source]

Gather a batch of experiences from a list of experiences.

This method will automatically pad the tokens and logprobs of input experiences to the same length.

Parameters:
  • experiences (list[Experience]) – A list of experiences to gather.

  • pad_token_id (int) – The token ID to use for padding. Default is 0.

  • custom_fields (Optional[List[CustomField]]) – Custom fields to include in the gathered experiences.

trinity.common.experience.empty_experiences(custom_fields: List[CustomField] | None) Experiences[source]
trinity.common.experience.gather_token_ids(experiences, max_prompt_length: int, max_response_length: int, pad_token_id: int) Tensor[source]
trinity.common.experience.gather_action_masks(experiences, max_response_length: int) Tensor[source]
trinity.common.experience.gather_attention_masks(experiences, max_prompt_length: int, max_response_length: int) Tensor[source]
trinity.common.experience.gather_logprobs(experiences, max_response_length: int) Tensor[source]
trinity.common.experience.gather_advantages(experiences, max_response_length: int) Tensor | None[source]
trinity.common.experience.gather_returns(experiences, max_response_length: int) Tensor | None[source]

trinity.common.verl_config module

class trinity.common.verl_config.Data(train_batch_size: int = 1024)[source]

Bases: object

train_batch_size: int = 1024
__init__(train_batch_size: int = 1024) None
class trinity.common.verl_config.FusedKernelOptions(impl_backend: str | None = None)[source]

Bases: object

impl_backend: str | None = None
__init__(impl_backend: str | None = None) None
class trinity.common.verl_config.ActorModel(path: str = '', external_lib: Optional[str] = None, override_config: Dict[str, Any] = <factory>, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False, use_fused_kernels: bool = False, fused_kernel_options: trinity.common.verl_config.FusedKernelOptions = <factory>, custom_chat_template: Optional[str] = None)[source]

Bases: object

path: str = ''
external_lib: str | None = None
override_config: Dict[str, Any]
enable_gradient_checkpointing: bool = True
use_remove_padding: bool = False
use_fused_kernels: bool = False
fused_kernel_options: FusedKernelOptions
custom_chat_template: str | None = None
__init__(path: str = '', external_lib: str | None = None, override_config: ~typing.Dict[str, ~typing.Any] = <factory>, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False, use_fused_kernels: bool = False, fused_kernel_options: ~trinity.common.verl_config.FusedKernelOptions = <factory>, custom_chat_template: str | None = None) None
class trinity.common.verl_config.Optim(lr: float = 1e-06, lr_warmup_steps: int = -1, lr_warmup_steps_ratio: float = 0.0, min_lr_ratio: Optional[float] = 0.0, warmup_style: str = 'constant', total_training_steps: int = -1, betas: List[float] = <factory>)[source]

Bases: object

lr: float = 1e-06
lr_warmup_steps: int = -1
lr_warmup_steps_ratio: float = 0.0
min_lr_ratio: float | None = 0.0
warmup_style: str = 'constant'
total_training_steps: int = -1
betas: List[float]
__init__(lr: float = 1e-06, lr_warmup_steps: int = -1, lr_warmup_steps_ratio: float = 0.0, min_lr_ratio: float | None = 0.0, warmup_style: str = 'constant', total_training_steps: int = -1, betas: ~typing.List[float] = <factory>) None
class trinity.common.verl_config.WrapPolicy(min_num_params: int = 0)[source]

Bases: object

min_num_params: int = 0
__init__(min_num_params: int = 0) None
class trinity.common.verl_config.FSDPConfig(wrap_policy: trinity.common.verl_config.WrapPolicy = <factory>, min_num_params: int = 0, param_offload: bool = False, optimizer_offload: bool = False, fsdp_size: int = -1, forward_prefetch: bool = False)[source]

Bases: object

wrap_policy: WrapPolicy
min_num_params: int = 0
param_offload: bool = False
optimizer_offload: bool = False
fsdp_size: int = -1
forward_prefetch: bool = False
__init__(wrap_policy: ~trinity.common.verl_config.WrapPolicy = <factory>, min_num_params: int = 0, param_offload: bool = False, optimizer_offload: bool = False, fsdp_size: int = -1, forward_prefetch: bool = False) None
class trinity.common.verl_config.Checkpoint(load_contents: List[str] = <factory>, save_contents: List[str] = <factory>)[source]

Bases: object

load_contents: List[str]
save_contents: List[str]
__init__(load_contents: ~typing.List[str] = <factory>, save_contents: ~typing.List[str] = <factory>) None
class trinity.common.verl_config.Actor(strategy: str = 'fsdp', ppo_mini_batch_size: int = 256, ppo_micro_batch_size: Optional[int] = None, ppo_micro_batch_size_per_gpu: int = 1, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 16384, grad_clip: float = 1.0, ppo_epochs: int = 1, shuffle: bool = False, ulysses_sequence_parallel_size: int = 1, entropy_from_logits_with_chunking: bool = False, entropy_checkpointing: bool = False, checkpoint: trinity.common.verl_config.Checkpoint = <factory>, optim: trinity.common.verl_config.Optim = <factory>, fsdp_config: trinity.common.verl_config.FSDPConfig = <factory>, loss_agg_mode: str = 'token-mean', clip_ratio: float = 0.2, entropy_coeff: float = 0.001, use_kl_loss: bool = False, kl_loss_coef: float = 0.001, kl_loss_type: str = 'low_var_kl')[source]

Bases: object

strategy: str = 'fsdp'
ppo_mini_batch_size: int = 256
ppo_micro_batch_size: int | None = None
ppo_micro_batch_size_per_gpu: int = 1
use_dynamic_bsz: bool = False
ppo_max_token_len_per_gpu: int = 16384
grad_clip: float = 1.0
ppo_epochs: int = 1
shuffle: bool = False
ulysses_sequence_parallel_size: int = 1
entropy_from_logits_with_chunking: bool = False
entropy_checkpointing: bool = False
checkpoint: Checkpoint
optim: Optim
fsdp_config: FSDPConfig
loss_agg_mode: str = 'token-mean'
clip_ratio: float = 0.2
entropy_coeff: float = 0.001
use_kl_loss: bool = False
kl_loss_coef: float = 0.001
kl_loss_type: str = 'low_var_kl'
__init__(strategy: str = 'fsdp', ppo_mini_batch_size: int = 256, ppo_micro_batch_size: int | None = None, ppo_micro_batch_size_per_gpu: int = 1, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 16384, grad_clip: float = 1.0, ppo_epochs: int = 1, shuffle: bool = False, ulysses_sequence_parallel_size: int = 1, entropy_from_logits_with_chunking: bool = False, entropy_checkpointing: bool = False, checkpoint: ~trinity.common.verl_config.Checkpoint = <factory>, optim: ~trinity.common.verl_config.Optim = <factory>, fsdp_config: ~trinity.common.verl_config.FSDPConfig = <factory>, loss_agg_mode: str = 'token-mean', clip_ratio: float = 0.2, entropy_coeff: float = 0.001, use_kl_loss: bool = False, kl_loss_coef: float = 0.001, kl_loss_type: str = 'low_var_kl') None
class trinity.common.verl_config.Ref(fsdp_config: trinity.common.verl_config.FSDPConfig = <factory>, log_prob_micro_batch_size: Optional[int] = None, log_prob_micro_batch_size_per_gpu: int = 1, log_prob_use_dynamic_bsz: bool = False, log_prob_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1, entropy_from_logits_with_chunking: bool = False, entropy_checkpointing: bool = False, checkpoint: trinity.common.verl_config.Checkpoint = <factory>)[source]

Bases: object

fsdp_config: FSDPConfig
log_prob_micro_batch_size: int | None = None
log_prob_micro_batch_size_per_gpu: int = 1
log_prob_use_dynamic_bsz: bool = False
log_prob_max_token_len_per_gpu: int = 0
ulysses_sequence_parallel_size: int = 1
entropy_from_logits_with_chunking: bool = False
entropy_checkpointing: bool = False
checkpoint: Checkpoint
__init__(fsdp_config: ~trinity.common.verl_config.FSDPConfig = <factory>, log_prob_micro_batch_size: int | None = None, log_prob_micro_batch_size_per_gpu: int = 1, log_prob_use_dynamic_bsz: bool = False, log_prob_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1, entropy_from_logits_with_chunking: bool = False, entropy_checkpointing: bool = False, checkpoint: ~trinity.common.verl_config.Checkpoint = <factory>) None
class trinity.common.verl_config.Rollout(val_kwargs: trinity.common.verl_config._ValKwargs = <factory>, multi_turn: trinity.common.verl_config._MultiTurn = <factory>, temperature: float = 1.0, n: int = 1, log_prob_micro_batch_size: Optional[int] = None, log_prob_micro_batch_size_per_gpu: int = 1)[source]

Bases: object

val_kwargs: _ValKwargs
multi_turn: _MultiTurn
temperature: float = 1.0
n: int = 1
log_prob_micro_batch_size: int | None = None
log_prob_micro_batch_size_per_gpu: int = 1
__init__(val_kwargs: ~trinity.common.verl_config._ValKwargs = <factory>, multi_turn: ~trinity.common.verl_config._MultiTurn = <factory>, temperature: float = 1.0, n: int = 1, log_prob_micro_batch_size: int | None = None, log_prob_micro_batch_size_per_gpu: int = 1) None
class trinity.common.verl_config.ActorRolloutRef(hybrid_engine: bool = True, model: trinity.common.verl_config.ActorModel = <factory>, actor: trinity.common.verl_config.Actor = <factory>, ref: trinity.common.verl_config.Ref = <factory>, rollout: trinity.common.verl_config.Rollout = <factory>, synchronizer: Optional[trinity.common.config.SynchronizerConfig] = None, explorer_name: str = 'explorer')[source]

Bases: object

hybrid_engine: bool = True
model: ActorModel
actor: Actor
ref: Ref
rollout: Rollout
synchronizer: SynchronizerConfig | None = None
explorer_name: str = 'explorer'
__init__(hybrid_engine: bool = True, model: ~trinity.common.verl_config.ActorModel = <factory>, actor: ~trinity.common.verl_config.Actor = <factory>, ref: ~trinity.common.verl_config.Ref = <factory>, rollout: ~trinity.common.verl_config.Rollout = <factory>, synchronizer: ~trinity.common.config.SynchronizerConfig | None = None, explorer_name: str = 'explorer') None
class trinity.common.verl_config.CriticModel(path: str = '', tokenizer_path: str = '', override_config: Dict[str, str] = <factory>, external_lib: Optional[str] = None, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False, fsdp_config: trinity.common.verl_config.FSDPConfig = <factory>)[source]

Bases: object

path: str = ''
tokenizer_path: str = ''
override_config: Dict[str, str]
external_lib: str | None = None
enable_gradient_checkpointing: bool = True
use_remove_padding: bool = False
fsdp_config: FSDPConfig
__init__(path: str = '', tokenizer_path: str = '', override_config: ~typing.Dict[str, str] = <factory>, external_lib: str | None = None, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False, fsdp_config: ~trinity.common.verl_config.FSDPConfig = <factory>) None
class trinity.common.verl_config.Critic(strategy: str = 'fsdp', optim: trinity.common.verl_config.Optim = <factory>, model: trinity.common.verl_config.CriticModel = <factory>, ppo_mini_batch_size: int = 0, ppo_micro_batch_size: Optional[int] = None, ppo_micro_batch_size_per_gpu: int = 1, forward_micro_batch_size: Optional[int] = None, forward_micro_batch_size_per_gpu: Optional[int] = None, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 0, forward_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1, ppo_epochs: int = 0, shuffle: bool = False, grad_clip: float = 0.0, cliprange_value: float = 0.0, checkpoint: trinity.common.verl_config.Checkpoint = <factory>, rollout_n: int = 1, loss_agg_mode: str = 'token-mean')[source]

Bases: object

strategy: str = 'fsdp'
optim: Optim
model: CriticModel
ppo_mini_batch_size: int = 0
ppo_micro_batch_size: int | None = None
ppo_micro_batch_size_per_gpu: int = 1
forward_micro_batch_size: int | None = None
forward_micro_batch_size_per_gpu: int | None = None
use_dynamic_bsz: bool = False
ppo_max_token_len_per_gpu: int = 0
forward_max_token_len_per_gpu: int = 0
ulysses_sequence_parallel_size: int = 1
ppo_epochs: int = 0
shuffle: bool = False
grad_clip: float = 0.0
cliprange_value: float = 0.0
checkpoint: Checkpoint
rollout_n: int = 1
loss_agg_mode: str = 'token-mean'
__init__(strategy: str = 'fsdp', optim: ~trinity.common.verl_config.Optim = <factory>, model: ~trinity.common.verl_config.CriticModel = <factory>, ppo_mini_batch_size: int = 0, ppo_micro_batch_size: int | None = None, ppo_micro_batch_size_per_gpu: int = 1, forward_micro_batch_size: int | None = None, forward_micro_batch_size_per_gpu: int | None = None, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 0, forward_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1, ppo_epochs: int = 0, shuffle: bool = False, grad_clip: float = 0.0, cliprange_value: float = 0.0, checkpoint: ~trinity.common.verl_config.Checkpoint = <factory>, rollout_n: int = 1, loss_agg_mode: str = 'token-mean') None
class trinity.common.verl_config.RewardModel(enable: bool = False, strategy: str = 'fsdp', model: trinity.common.verl_config._RewardModel = <factory>, micro_batch_size_per_gpu: int = 1, max_length: Optional[int] = None, ulysses_sequence_parallel_size: int = 1, use_dynamic_bsz: bool = False, forward_max_token_len_per_gpu: int = 0, reward_manager: str = 'naive')[source]

Bases: object

enable: bool = False
strategy: str = 'fsdp'
model: _RewardModel
micro_batch_size_per_gpu: int = 1
max_length: int | None = None
ulysses_sequence_parallel_size: int = 1
use_dynamic_bsz: bool = False
forward_max_token_len_per_gpu: int = 0
reward_manager: str = 'naive'
__init__(enable: bool = False, strategy: str = 'fsdp', model: ~trinity.common.verl_config._RewardModel = <factory>, micro_batch_size_per_gpu: int = 1, max_length: int | None = None, ulysses_sequence_parallel_size: int = 1, use_dynamic_bsz: bool = False, forward_max_token_len_per_gpu: int = 0, reward_manager: str = 'naive') None
class trinity.common.verl_config.CustomRewardFunction(path: str | None = None, name: str = 'compute_score')[source]

Bases: object

path: str | None = None
name: str = 'compute_score'
__init__(path: str | None = None, name: str = 'compute_score') None
class trinity.common.verl_config.KL_Ctrl(type: str = 'fixed', kl_coef: float = 0.001, horizon: float = 10000, target_kl: float = 0.1)[source]

Bases: object

type: str = 'fixed'
kl_coef: float = 0.001
horizon: float = 10000
target_kl: float = 0.1
__init__(type: str = 'fixed', kl_coef: float = 0.001, horizon: float = 10000, target_kl: float = 0.1) None
class trinity.common.verl_config.Algorithm(gamma: float = 1.0, lam: float = 1.0, adv_estimator: str = 'gae', norm_adv_by_std_in_grpo: bool = True, use_kl_in_reward: bool = False, kl_penalty: str = 'kl', kl_ctrl: trinity.common.verl_config.KL_Ctrl = <factory>)[source]

Bases: object

gamma: float = 1.0
lam: float = 1.0
adv_estimator: str = 'gae'
norm_adv_by_std_in_grpo: bool = True
use_kl_in_reward: bool = False
kl_penalty: str = 'kl'
kl_ctrl: KL_Ctrl
__init__(gamma: float = 1.0, lam: float = 1.0, adv_estimator: str = 'gae', norm_adv_by_std_in_grpo: bool = True, use_kl_in_reward: bool = False, kl_penalty: str = 'kl', kl_ctrl: ~trinity.common.verl_config.KL_Ctrl = <factory>) None
class trinity.common.verl_config.Trainer(balance_batch: bool = True, total_epochs: int = 30, total_training_steps: Optional[int] = None, project_name: str = '', group_name: str = '', experiment_name: str = '', logger: List[str] = <factory>, val_generations_to_log_to_wandb: int = 0, nnodes: int = 0, n_gpus_per_node: int = 0, save_freq: int = 0, resume_mode: str = 'auto', resume_from_path: str = '', test_freq: int = 0, critic_warmup: int = 0, default_hdfs_dir: Optional[str] = None, remove_previous_ckpt_in_save: bool = False, del_local_ckpt_after_load: bool = False, default_local_dir: str = '', val_before_train: bool = False, training_rollout_mode: str = 'parallel', enable_exp_buffer: bool = True, sync_freq: int = 0, sft_warmup_steps: int = 0, max_actor_ckpt_to_keep: Optional[int] = None, max_critic_ckpt_to_keep: Optional[int] = None)[source]

Bases: object

balance_batch: bool = True
total_epochs: int = 30
total_training_steps: int | None = None
project_name: str = ''
group_name: str = ''
experiment_name: str = ''
logger: List[str]
val_generations_to_log_to_wandb: int = 0
nnodes: int = 0
n_gpus_per_node: int = 0
save_freq: int = 0
resume_mode: str = 'auto'
resume_from_path: str = ''
test_freq: int = 0
critic_warmup: int = 0
default_hdfs_dir: str | None = None
remove_previous_ckpt_in_save: bool = False
del_local_ckpt_after_load: bool = False
default_local_dir: str = ''
val_before_train: bool = False
training_rollout_mode: str = 'parallel'
enable_exp_buffer: bool = True
sync_freq: int = 0
sft_warmup_steps: int = 0
max_actor_ckpt_to_keep: int | None = None
max_critic_ckpt_to_keep: int | None = None
__init__(balance_batch: bool = True, total_epochs: int = 30, total_training_steps: int | None = None, project_name: str = '', group_name: str = '', experiment_name: str = '', logger: ~typing.List[str] = <factory>, val_generations_to_log_to_wandb: int = 0, nnodes: int = 0, n_gpus_per_node: int = 0, save_freq: int = 0, resume_mode: str = 'auto', resume_from_path: str = '', test_freq: int = 0, critic_warmup: int = 0, default_hdfs_dir: str | None = None, remove_previous_ckpt_in_save: bool = False, del_local_ckpt_after_load: bool = False, default_local_dir: str = '', val_before_train: bool = False, training_rollout_mode: str = 'parallel', enable_exp_buffer: bool = True, sync_freq: int = 0, sft_warmup_steps: int = 0, max_actor_ckpt_to_keep: int | None = None, max_critic_ckpt_to_keep: int | None = None) None
class trinity.common.verl_config.veRLConfig(data: trinity.common.verl_config.Data = <factory>, actor_rollout_ref: trinity.common.verl_config.ActorRolloutRef = <factory>, critic: trinity.common.verl_config.Critic = <factory>, reward_model: trinity.common.verl_config.RewardModel = <factory>, custom_reward_function: trinity.common.verl_config.CustomRewardFunction = <factory>, algorithm: trinity.common.verl_config.Algorithm = <factory>, trainer: trinity.common.verl_config.Trainer = <factory>, buffer: trinity.common.config.BufferConfig = <factory>, synchronizer: Optional[trinity.common.config.SynchronizerConfig] = None, enable_preview: bool = True)[source]

Bases: object

data: Data
actor_rollout_ref: ActorRolloutRef
critic: Critic
reward_model: RewardModel
custom_reward_function: CustomRewardFunction
algorithm: Algorithm
trainer: Trainer
buffer: BufferConfig
synchronizer: SynchronizerConfig | None = None
enable_preview: bool = True
synchronize_config(config: Config) None[source]

Synchronize config.

__init__(data: ~trinity.common.verl_config.Data = <factory>, actor_rollout_ref: ~trinity.common.verl_config.ActorRolloutRef = <factory>, critic: ~trinity.common.verl_config.Critic = <factory>, reward_model: ~trinity.common.verl_config.RewardModel = <factory>, custom_reward_function: ~trinity.common.verl_config.CustomRewardFunction = <factory>, algorithm: ~trinity.common.verl_config.Algorithm = <factory>, trainer: ~trinity.common.verl_config.Trainer = <factory>, buffer: ~trinity.common.config.BufferConfig = <factory>, synchronizer: ~trinity.common.config.SynchronizerConfig | None = None, enable_preview: bool = True) None
trinity.common.verl_config.load_config(config_path: str) veRLConfig[source]

Module contents