trinity.common

Subpackages

Submodules

trinity.common.config module

Configs for RFT.

class trinity.common.config.FormatConfig(prompt_type: PromptType = PromptType.MESSAGES, prompt_key: str = 'prompt', response_key: str = 'response', messages_key: str = 'message', chat_template: str = '', system_prompt: str | None = None, reply_prefix: str | None = None, reward_fn_key: str = '', workflow_key: str = '', solution_key: str = 'solution', reward_key: str = 'reward', chosen_key: str = 'chosen', rejected_key: str = 'rejected', label_key: str = '')[source]

Bases: object

Configuration for data formatting

prompt_type: PromptType = 'messages'

prompt_key: str = 'prompt'

response_key: str = 'response'

messages_key: str = 'message'

chat_template: str = ''

system_prompt: str | None = None

reply_prefix: str | None = None

reward_fn_key: str = ''

workflow_key: str = ''

solution_key: str = 'solution'

reward_key: str = 'reward'

chosen_key: str = 'chosen'

rejected_key: str = 'rejected'

label_key: str = ''

__init__(prompt_type: PromptType = PromptType.MESSAGES, prompt_key: str = 'prompt', response_key: str = 'response', messages_key: str = 'message', chat_template: str = '', system_prompt: str | None = None, reply_prefix: str | None = None, reward_fn_key: str = '', workflow_key: str = '', solution_key: str = 'solution', reward_key: str = 'reward', chosen_key: str = 'chosen', rejected_key: str = 'rejected', label_key: str = '') → None

class trinity.common.config.GenerationConfig(temperature: float = 1.0, top_p: float = 1.0, top_k: int = -1, logprobs: int = 0, n: int = 1)[source]

Bases: object

temperature: float = 1.0

top_p: float = 1.0

top_k: int = -1

logprobs: int = 0

n: int = 1

__init__(temperature: float = 1.0, top_p: float = 1.0, top_k: int = -1, logprobs: int = 0, n: int = 1) → None

class trinity.common.config.StorageConfig(name: str = '', storage_type: ~trinity.common.constants.StorageType = StorageType.FILE, path: str | None = None, repeat_times: int | None = None, raw: bool = False, split: str = 'train', subset_name: str | None = None, format: ~trinity.common.config.FormatConfig = <factory>, index: int = 0, wrap_in_ray: bool = True, capacity: int = 10000, max_read_timeout: float = 1800, use_priority_queue: bool = False, reuse_cooldown_time: float | None = None, replay_buffer_kwargs: dict = <factory>, default_workflow_type: str | None = None, default_eval_workflow_type: str | None = None, default_reward_fn_type: str | None = None, rollout_args: ~trinity.common.config.GenerationConfig = <factory>, workflow_args: dict = <factory>, reward_fn_args: dict = <factory>, enable_progress_bar: bool | None = False, ray_namespace: str | None = None, algorithm_type: str | None = None, total_epochs: int = 1, total_steps: int | None = None, task_type: ~trinity.common.constants.TaskType = TaskType.EXPLORE)[source]

Bases: object

Storage config.

name: str = ''

storage_type: StorageType = 'file'

path: str | None = None

repeat_times: int | None = None

raw: bool = False

split: str = 'train'

subset_name: str | None = None

format: FormatConfig

index: int = 0

wrap_in_ray: bool = True

capacity: int = 10000

max_read_timeout: float = 1800

use_priority_queue: bool = False

reuse_cooldown_time: float | None = None

replay_buffer_kwargs: dict

default_workflow_type: str | None = None

default_eval_workflow_type: str | None = None

default_reward_fn_type: str | None = None

rollout_args: GenerationConfig

workflow_args: dict

reward_fn_args: dict

enable_progress_bar: bool | None = False

ray_namespace: str | None = None

algorithm_type: str | None = None

total_epochs: int = 1

total_steps: int | None = None

task_type: TaskType = 0

__init__(name: str = '', storage_type: ~trinity.common.constants.StorageType = StorageType.FILE, path: str | None = None, repeat_times: int | None = None, raw: bool = False, split: str = 'train', subset_name: str | None = None, format: ~trinity.common.config.FormatConfig = <factory>, index: int = 0, wrap_in_ray: bool = True, capacity: int = 10000, max_read_timeout: float = 1800, use_priority_queue: bool = False, reuse_cooldown_time: float | None = None, replay_buffer_kwargs: dict = <factory>, default_workflow_type: str | None = None, default_eval_workflow_type: str | None = None, default_reward_fn_type: str | None = None, rollout_args: ~trinity.common.config.GenerationConfig = <factory>, workflow_args: dict = <factory>, reward_fn_args: dict = <factory>, enable_progress_bar: bool | None = False, ray_namespace: str | None = None, algorithm_type: str | None = None, total_epochs: int = 1, total_steps: int | None = None, task_type: ~trinity.common.constants.TaskType = TaskType.EXPLORE) → None

class trinity.common.config.RewardShapingConfig(stats_key: str = '', op_type: OpType = OpType.ADD, weight: float = 1.0)[source]

Bases: object

Config for reward shaping.

stats_key: str = ''

op_type: OpType = 'add'

weight: float = 1.0

__init__(stats_key: str = '', op_type: OpType = OpType.ADD, weight: float = 1.0) → None

class trinity.common.config.DataPipelineConfig(input_buffers: ~typing.List[~trinity.common.config.StorageConfig] = <factory>, output_buffer: ~trinity.common.config.StorageConfig = <factory>, format: ~trinity.common.config.FormatConfig = <factory>, dj_config_path: str | None = None, dj_process_desc: str | None = None, agent_model_name: str | None = None, clean_strategy: str = 'iterative', min_size_ratio: float | None = None, min_priority_score: float | None = 0.0, priority_weights: ~typing.Dict[str, float] | None = None, data_dist: str | None = 'gaussian', reward_shaping: ~typing.List[~trinity.common.config.RewardShapingConfig] | None = <factory>)[source]

Bases: object

Config for data pipeline.

input_buffers: List[StorageConfig]

output_buffer: StorageConfig

format: FormatConfig

dj_config_path: str | None = None

dj_process_desc: str | None = None

agent_model_name: str | None = None

clean_strategy: str = 'iterative'

min_size_ratio: float | None = None

min_priority_score: float | None = 0.0

priority_weights: Dict[str, float] | None = None

data_dist: str | None = 'gaussian'

reward_shaping: List[RewardShapingConfig] | None

__init__(input_buffers: ~typing.List[~trinity.common.config.StorageConfig] = <factory>, output_buffer: ~trinity.common.config.StorageConfig = <factory>, format: ~trinity.common.config.FormatConfig = <factory>, dj_config_path: str | None = None, dj_process_desc: str | None = None, agent_model_name: str | None = None, clean_strategy: str = 'iterative', min_size_ratio: float | None = None, min_priority_score: float | None = 0.0, priority_weights: ~typing.Dict[str, float] | None = None, data_dist: str | None = 'gaussian', reward_shaping: ~typing.List[~trinity.common.config.RewardShapingConfig] | None = <factory>) → None

class trinity.common.config.DataProcessorConfig(data_processor_url: str | None = None, task_pipeline: DataPipelineConfig | None = None, experience_pipeline: DataPipelineConfig | None = None)[source]

Bases: object

Data-Juicer config

data_processor_url: str | None = None

task_pipeline: DataPipelineConfig | None = None

experience_pipeline: DataPipelineConfig | None = None

__init__(data_processor_url: str | None = None, task_pipeline: DataPipelineConfig | None = None, experience_pipeline: DataPipelineConfig | None = None) → None

class trinity.common.config.ModelConfig(model_path: str = '', critic_model_path: str = '', max_model_len: int | None = None, max_prompt_tokens: int | None = None, max_response_tokens: int | None = None, custom_chat_template: str | None = None)[source]

Bases: object

model_path: str = ''

critic_model_path: str = ''

max_model_len: int | None = None

max_prompt_tokens: int | None = None

max_response_tokens: int | None = None

custom_chat_template: str | None = None

__init__(model_path: str = '', critic_model_path: str = '', max_model_len: int | None = None, max_prompt_tokens: int | None = None, max_response_tokens: int | None = None, custom_chat_template: str | None = None) → None

class trinity.common.config.InferenceModelConfig(model_path: str = '', engine_type: str = 'vllm_async', engine_num: int = 1, tensor_parallel_size: int = 1, use_v1: bool = True, enforce_eager: bool = True, enable_prefix_caching: bool = False, enable_chunked_prefill: bool = False, gpu_memory_utilization: float = 0.9, dtype: str = 'bfloat16', seed: int = 42, max_model_len: int | None = None, max_prompt_tokens: int | None = None, max_response_tokens: int | None = None, chat_template: str | None = None, enable_thinking: bool = False, enable_history: bool = False, enable_openai_api: bool = False, enable_auto_tool_choice: bool = False, tool_call_parser: str | None = None, reasoning_parser: str | None = None, bundle_indices: str = '')[source]

Bases: object

model_path: str = ''

engine_type: str = 'vllm_async'

engine_num: int = 1

tensor_parallel_size: int = 1

use_v1: bool = True

enforce_eager: bool = True

enable_prefix_caching: bool = False

enable_chunked_prefill: bool = False

gpu_memory_utilization: float = 0.9

dtype: str = 'bfloat16'

seed: int = 42

max_model_len: int | None = None

max_prompt_tokens: int | None = None

max_response_tokens: int | None = None

chat_template: str | None = None

enable_thinking: bool = False

enable_history: bool = False

enable_openai_api: bool = False

enable_auto_tool_choice: bool = False

tool_call_parser: str | None = None

reasoning_parser: str | None = None

bundle_indices: str = ''

__init__(model_path: str = '', engine_type: str = 'vllm_async', engine_num: int = 1, tensor_parallel_size: int = 1, use_v1: bool = True, enforce_eager: bool = True, enable_prefix_caching: bool = False, enable_chunked_prefill: bool = False, gpu_memory_utilization: float = 0.9, dtype: str = 'bfloat16', seed: int = 42, max_model_len: int | None = None, max_prompt_tokens: int | None = None, max_response_tokens: int | None = None, chat_template: str | None = None, enable_thinking: bool = False, enable_history: bool = False, enable_openai_api: bool = False, enable_auto_tool_choice: bool = False, tool_call_parser: str | None = None, reasoning_parser: str | None = None, bundle_indices: str = '') → None

class trinity.common.config.AlgorithmConfig(algorithm_type: str = 'ppo', repeat_times: int = 1, add_strategy: str | None = None, add_strategy_args: dict | None = None, sample_strategy: str | None = None, sample_strategy_args: dict | None = None, advantage_fn: str | None = None, advantage_fn_args: dict | None = None, kl_penalty_fn: str | None = None, kl_penalty_fn_args: dict | None = None, policy_loss_fn: str | None = None, policy_loss_fn_args: dict | None = None, kl_loss_fn: str | None = None, kl_loss_fn_args: dict | None = None, entropy_loss_fn: str | None = None, entropy_loss_fn_args: dict | None = None, use_token_level_loss: bool = True)[source]

Bases: object

Config for algorithm.

algorithm_type: str = 'ppo'

repeat_times: int = 1

add_strategy: str | None = None

add_strategy_args: dict | None = None

sample_strategy: str | None = None

sample_strategy_args: dict | None = None

advantage_fn: str | None = None

advantage_fn_args: dict | None = None

kl_penalty_fn: str | None = None

kl_penalty_fn_args: dict | None = None

policy_loss_fn: str | None = None

policy_loss_fn_args: dict | None = None

kl_loss_fn: str | None = None

kl_loss_fn_args: dict | None = None

entropy_loss_fn: str | None = None

entropy_loss_fn_args: dict | None = None

use_token_level_loss: bool = True

__init__(algorithm_type: str = 'ppo', repeat_times: int = 1, add_strategy: str | None = None, add_strategy_args: dict | None = None, sample_strategy: str | None = None, sample_strategy_args: dict | None = None, advantage_fn: str | None = None, advantage_fn_args: dict | None = None, kl_penalty_fn: str | None = None, kl_penalty_fn_args: dict | None = None, policy_loss_fn: str | None = None, policy_loss_fn_args: dict | None = None, kl_loss_fn: str | None = None, kl_loss_fn_args: dict | None = None, entropy_loss_fn: str | None = None, entropy_loss_fn_args: dict | None = None, use_token_level_loss: bool = True) → None

class trinity.common.config.ClusterConfig(node_num: int = 1, gpu_per_node: int = 8)[source]

Bases: object

Config for the cluster.

node_num: int = 1

gpu_per_node: int = 8

__init__(node_num: int = 1, gpu_per_node: int = 8) → None

class trinity.common.config.ExplorerInput(taskset: ~trinity.common.config.StorageConfig = <factory>, eval_tasksets: ~typing.List[~trinity.common.config.StorageConfig] = <factory>, default_workflow_type: str | None = None, default_eval_workflow_type: str | None = None, default_reward_fn_type: str | None = None, system_prompt: str | None = None, reply_prefix: str | None = None)[source]

Bases: object

Config for explorer input.

taskset: StorageConfig

eval_tasksets: List[StorageConfig]

default_workflow_type: str | None = None

default_eval_workflow_type: str | None = None

default_reward_fn_type: str | None = None

system_prompt: str | None = None

reply_prefix: str | None = None

__init__(taskset: ~trinity.common.config.StorageConfig = <factory>, eval_tasksets: ~typing.List[~trinity.common.config.StorageConfig] = <factory>, default_workflow_type: str | None = None, default_eval_workflow_type: str | None = None, default_reward_fn_type: str | None = None, system_prompt: str | None = None, reply_prefix: str | None = None) → None

class trinity.common.config.TrainerInput(experience_buffer: StorageConfig | None = None, sft_warmup_dataset: StorageConfig | None = None, read_experience_strategy: ReadStrategy | None = None, sft_warmup_steps: int = 0)[source]

Bases: object

Config for trainer input.

experience_buffer: StorageConfig | None = None

sft_warmup_dataset: StorageConfig | None = None

read_experience_strategy: ReadStrategy | None = None

sft_warmup_steps: int = 0

__init__(experience_buffer: StorageConfig | None = None, sft_warmup_dataset: StorageConfig | None = None, read_experience_strategy: ReadStrategy | None = None, sft_warmup_steps: int = 0) → None

class trinity.common.config.BufferConfig(batch_size: int = 1, train_batch_size: int = 0, total_epochs: int = 1, total_steps: int | None = None, explorer_input: ~trinity.common.config.ExplorerInput = <factory>, explorer_output: ~trinity.common.config.StorageConfig | None = None, trainer_input: ~trinity.common.config.TrainerInput = <factory>, max_retry_times: int = 3, max_retry_interval: int = 1, tokenizer_path: str | None = None, pad_token_id: int | None = None, cache_dir: str | None = None)[source]

Bases: object

Config for buffer.

batch_size: int = 1

train_batch_size: int = 0

total_epochs: int = 1

total_steps: int | None = None

explorer_input: ExplorerInput

explorer_output: StorageConfig | None = None

trainer_input: TrainerInput

max_retry_times: int = 3

max_retry_interval: int = 1

tokenizer_path: str | None = None

pad_token_id: int | None = None

cache_dir: str | None = None

__init__(batch_size: int = 1, train_batch_size: int = 0, total_epochs: int = 1, total_steps: int | None = None, explorer_input: ~trinity.common.config.ExplorerInput = <factory>, explorer_output: ~trinity.common.config.StorageConfig | None = None, trainer_input: ~trinity.common.config.TrainerInput = <factory>, max_retry_times: int = 3, max_retry_interval: int = 1, tokenizer_path: str | None = None, pad_token_id: int | None = None, cache_dir: str | None = None) → None

class trinity.common.config.ExplorerConfig(name: str = 'explorer', runner_per_model: int = 8, max_timeout: int = 1800, max_retry_times: int = 2, env_vars: dict = <factory>, max_repeat_times_per_runner: int | None = None, runner_num: int | None = None, rollout_model: ~trinity.common.config.InferenceModelConfig = <factory>, auxiliary_models: ~typing.List[~trinity.common.config.InferenceModelConfig] = <factory>, eval_interval: int = 100, eval_on_startup: bool = True, bench_on_latest_checkpoint: bool = False, collect_experiences: bool = False)[source]

Bases: object

Config for explorer.

name: str = 'explorer'

runner_per_model: int = 8

max_timeout: int = 1800

max_retry_times: int = 2

env_vars: dict

max_repeat_times_per_runner: int | None = None

runner_num: int | None = None

rollout_model: InferenceModelConfig

auxiliary_models: List[InferenceModelConfig]

eval_interval: int = 100

eval_on_startup: bool = True

bench_on_latest_checkpoint: bool = False

collect_experiences: bool = False

__init__(name: str = 'explorer', runner_per_model: int = 8, max_timeout: int = 1800, max_retry_times: int = 2, env_vars: dict = <factory>, max_repeat_times_per_runner: int | None = None, runner_num: int | None = None, rollout_model: ~trinity.common.config.InferenceModelConfig = <factory>, auxiliary_models: ~typing.List[~trinity.common.config.InferenceModelConfig] = <factory>, eval_interval: int = 100, eval_on_startup: bool = True, bench_on_latest_checkpoint: bool = False, collect_experiences: bool = False) → None

class trinity.common.config.TrainerConfig(name: str = 'trainer', trainer_type: str = 'verl', save_interval: int = 0, enable_preview: bool = True, actor_grad_clip: Optional[float] = None, trainer_config: Any = <factory>, trainer_config_path: str = '')[source]

Bases: object

name: str = 'trainer'

trainer_type: str = 'verl'

save_interval: int = 0

enable_preview: bool = True

actor_grad_clip: float | None = None

trainer_config: Any

trainer_config_path: str = ''

__init__(name: str = 'trainer', trainer_type: str = 'verl', save_interval: int = 0, enable_preview: bool = True, actor_grad_clip: float | None = None, trainer_config: ~typing.Any = <factory>, trainer_config_path: str = '') → None

class trinity.common.config.MonitorConfig(monitor_type: str = 'tensorboard', monitor_args: Dict | None = None, enable_ray_timeline: bool = False, cache_dir: str = '')[source]

Bases: object

monitor_type: str = 'tensorboard'

monitor_args: Dict | None = None

enable_ray_timeline: bool = False

cache_dir: str = ''

__init__(monitor_type: str = 'tensorboard', monitor_args: Dict | None = None, enable_ray_timeline: bool = False, cache_dir: str = '') → None

class trinity.common.config.SynchronizerConfig(sync_method: SyncMethod = SyncMethod.NCCL, sync_style: SyncStyle = SyncStyle.FIXED, sync_interval: int = 1, sync_offset: int = 0, sync_timeout: int = 3600, wait_for_checkpoint: bool = False, explorer_world_size: int | None = None, ray_namespace: str = '')[source]

Bases: object

Configs for model weight synchronization.

sync_method: SyncMethod = 'nccl'

sync_style: SyncStyle = 'fixed'

sync_interval: int = 1

sync_offset: int = 0

sync_timeout: int = 3600

wait_for_checkpoint: bool = False

explorer_world_size: int | None = None

ray_namespace: str = ''

__init__(sync_method: SyncMethod = SyncMethod.NCCL, sync_style: SyncStyle = SyncStyle.FIXED, sync_interval: int = 1, sync_offset: int = 0, sync_timeout: int = 3600, wait_for_checkpoint: bool = False, explorer_world_size: int | None = None, ray_namespace: str = '') → None

class trinity.common.config.Config(mode: str = 'both', project: str = 'Trinity-RFT', group: str = '', name: str = 'rft', checkpoint_root_dir: str = '', checkpoint_job_dir: str = '', ray_namespace: str = '', continue_from_checkpoint: bool = True, algorithm: ~trinity.common.config.AlgorithmConfig = <factory>, data_processor: ~trinity.common.config.DataProcessorConfig = <factory>, model: ~trinity.common.config.ModelConfig = <factory>, cluster: ~trinity.common.config.ClusterConfig = <factory>, buffer: ~trinity.common.config.BufferConfig = <factory>, explorer: ~trinity.common.config.ExplorerConfig = <factory>, trainer: ~trinity.common.config.TrainerConfig = <factory>, monitor: ~trinity.common.config.MonitorConfig = <factory>, synchronizer: ~trinity.common.config.SynchronizerConfig = <factory>)[source]

Bases: object

Global Configuration

mode: str = 'both'

project: str = 'Trinity-RFT'

group: str = ''

name: str = 'rft'

checkpoint_root_dir: str = ''

checkpoint_job_dir: str = ''

ray_namespace: str = ''

continue_from_checkpoint: bool = True

algorithm: AlgorithmConfig

data_processor: DataProcessorConfig

model: ModelConfig

cluster: ClusterConfig

buffer: BufferConfig

explorer: ExplorerConfig

trainer: TrainerConfig

monitor: MonitorConfig

synchronizer: SynchronizerConfig

save(config_path: str) → None[source]: Save config to file.

check_and_update() → None[source]: Check and update the config.

flatten() → Dict[str, Any][source]: Flatten the config into a single-level dict with dot-separated keys for nested fields.

__init__(mode: str = 'both', project: str = 'Trinity-RFT', group: str = '', name: str = 'rft', checkpoint_root_dir: str = '', checkpoint_job_dir: str = '', ray_namespace: str = '', continue_from_checkpoint: bool = True, algorithm: ~trinity.common.config.AlgorithmConfig = <factory>, data_processor: ~trinity.common.config.DataProcessorConfig = <factory>, model: ~trinity.common.config.ModelConfig = <factory>, cluster: ~trinity.common.config.ClusterConfig = <factory>, buffer: ~trinity.common.config.BufferConfig = <factory>, explorer: ~trinity.common.config.ExplorerConfig = <factory>, trainer: ~trinity.common.config.TrainerConfig = <factory>, monitor: ~trinity.common.config.MonitorConfig = <factory>, synchronizer: ~trinity.common.config.SynchronizerConfig = <factory>) → None

trinity.common.config.load_config(config_path: str) → Config[source]: Load the configuration from the given path.

trinity.common.constants module

Constants.

class trinity.common.constants.CaseInsensitiveEnumMeta(cls, bases, classdict, **kwds)[source]: Bases: EnumMeta

class trinity.common.constants.CaseInsensitiveEnum(value, *args, **kwargs)[source]

Bases: Enum

An enumeration.

class trinity.common.constants.PromptType(value, *args, **kwargs)[source]

Bases: CaseInsensitiveEnum

Prompt Type.

MESSAGES = 'messages'

CHATPAIR = 'chatpair'

PLAINTEXT = 'plaintext'

class trinity.common.constants.TaskType(value)[source]

Bases: Enum

Task Type.

EXPLORE = 0

EVAL = 1

class trinity.common.constants.ReadStrategy(value, *args, **kwargs)[source]

Bases: CaseInsensitiveEnum

Pop Strategy.

DEFAULT = None

FIFO = 'fifo'

RANDOM = 'random'

LRU = 'lru'

LFU = 'lfu'

PRIORITY = 'priority'

class trinity.common.constants.StorageType(value, *args, **kwargs)[source]

Bases: CaseInsensitiveEnum

Storage Type.

SQL = 'sql'

QUEUE = 'queue'

FILE = 'file'

class trinity.common.constants.MonitorType(value, *args, **kwargs)[source]

Bases: CaseInsensitiveEnum

Monitor Type.

WANDB = 'wandb'

TENSORBOARD = 'tensorboard'

class trinity.common.constants.SyncMethodEnumMeta(cls, bases, classdict, **kwds)[source]: Bases: CaseInsensitiveEnumMeta

class trinity.common.constants.SyncMethod(value, *args, **kwargs)[source]

Bases: CaseInsensitiveEnum

Sync Method.

NCCL = 'nccl'

CHECKPOINT = 'checkpoint'

MEMORY = 'memory'

class trinity.common.constants.RunningStatus(value)[source]

Bases: Enum

Running status of explorer and trainer.

RUNNING = 'running'

REQUIRE_SYNC = 'require_sync'

WAITING_SYNC = 'waiting_sync'

STOPPED = 'stopped'

class trinity.common.constants.DataProcessorPipelineType(value)[source]

Bases: Enum

Data processor pipeline type.

EXPERIENCE = 'experience_pipeline'

TASK = 'task_pipeline'

class trinity.common.constants.OpType(value)[source]

Bases: Enum

Operator type for reward shaping.

ADD = 'add'

SUB = 'sub'

MUL = 'mul'

DIV = 'div'

class trinity.common.constants.SyncStyle(value, *args, **kwargs)[source]

Bases: CaseInsensitiveEnum

An enumeration.

FIXED = 'fixed'

DYNAMIC_BY_TRAINER = 'dynamic_by_trainer'

DYNAMIC_BY_EXPLORER = 'dynamic_by_explorer'

trinity.common.experience module

Experience Class.

class trinity.common.experience.EID(batch: int = 0, task: int = 0, run: int = 0, step: int = 0, suffix: str = <factory>)[source]

Bases: object

Experience ID class to uniquely identify an experience.

To enable the full functionality of the experience grouping, user should manually set the run and step fields in custom workflows.

batch: int = 0

task: int = 0

run: int = 0

step: int = 0

suffix: str

property uid: str: An unique identifier for the experience.

property sid: str

Step ID of the experience.

For example, experiences generated by all runs of a same task at the same step will have the same sid.

property rid: str

Run ID of the experience.

For example, experiences generated by one run of a task at all steps will have the same run_id.

property tid: str

Task ID for the experience.

For example, experiences generated by a all run of a same task in GRPO-like algorithms will have the same tid.

to_dict() → dict[source]: Convert the EID to a dictionary.

__init__(batch: int = 0, task: int = 0, run: int = 0, step: int = 0, suffix: str = <factory>) → None

class trinity.common.experience.ExperienceType(value)[source]

Bases: Enum

Enum for experience types.

SINGLE_TURN = 'single_turn'

MULTI_TURN = 'multi_turn'

DPO = 'dpo'

class trinity.common.experience.CustomField(source_field: str, destination_field: str, data_type: dtype)[source]

Bases: object

Custom field for Experiences.

This is used to store additional information into the Experiences class.

source_field: str

destination_field: str

data_type: dtype

__init__(source_field: str, destination_field: str, data_type: dtype) → None

class trinity.common.experience.Experience(*, eid=None, tokens, logprobs=None, reward=None, advantages=None, returns=None, info=None, metrics=None, prompt_length=1, response_text=None, prompt_text=None, action_mask=None, messages=None, chosen=None, rejected=None, chosen_text=None, rejected_text=None)[source]

Bases: object

__init__(*, eid=None, tokens, logprobs=None, reward=None, advantages=None, returns=None, info=None, metrics=None, prompt_length=1, response_text=None, prompt_text=None, action_mask=None, messages=None, chosen=None, rejected=None, chosen_text=None, rejected_text=None)[source]

eid: EID

reward: float | None = None

advantages: Tensor | None = None

returns: Tensor | None = None

experience_type: ExperienceType = 'single_turn'

info: dict

metrics: dict[str, float]

prompt_length: int = 1

response_text: str | None = None

prompt_text: str | None = None

messages: List[dict] | None = None

chosen_text: str | None = None

rejected_text: str | None = None

tokens: Tensor | None = None

logprobs: Tensor | None = None

action_mask: Tensor | None = None

chosen: Tensor | None = None

rejected: Tensor | None = None

serialize() → bytes[source]: Serialize the experience to bytes.

classmethod deserialize(data: bytes) → Experience[source]

to_dict() → dict[source]: Convert the experience to a dictionary.

classmethod gather(experiences: List[Experience], pad_token_id: int = 0, custom_fields: List[CustomField] | None = None) → Experiences[source]

trinity.common.experience.split_dpo_experience_to_single_turn(experiences: List[Experience]) → List[Experience][source]

class trinity.common.experience.Experiences(eids: ~typing.List[~trinity.common.experience.EID], tokens: ~torch.Tensor, rewards: ~torch.Tensor, advantages: ~torch.Tensor | None, returns: ~torch.Tensor | None, attention_masks: ~torch.Tensor, action_masks: ~torch.Tensor | None, prompt_length: int, logprobs: ~torch.Tensor | None, custom_fields: ~typing.List[str] = <factory>)[source]

Bases: object

A container for a batch of experiences, for high performance communication usage.

Example

>>>             |<- prompt_length ->|               |
>>> tokens: ('P' represents prompt, 'O' represents output)
>>> exp1:       |........PPPPPPPPPPP|OOOOOOOOOO.....|
>>> exp2:       |......PPPPPPPPPPPPP|OOOOOOO........|
>>>
>>> attention_masks: ('.' represents False and '1' represents True)
>>> exp1:       |........11111111111|1111111111.....|
>>> exp2:       |......1111111111111|1111111........|

__init__(eids: ~typing.List[~trinity.common.experience.EID], tokens: ~torch.Tensor, rewards: ~torch.Tensor, advantages: ~torch.Tensor | None, returns: ~torch.Tensor | None, attention_masks: ~torch.Tensor, action_masks: ~torch.Tensor | None, prompt_length: int, logprobs: ~torch.Tensor | None, custom_fields: ~typing.List[str] = <factory>) → None

eids: List[EID]

tokens: Tensor

rewards: Tensor

advantages: Tensor | None

returns: Tensor | None

attention_masks: Tensor

action_masks: Tensor | None

prompt_length: int

logprobs: Tensor | None

custom_fields: List[str]

property batch_size: int: Get the batch size.

classmethod gather_experiences(experiences: list[Experience], pad_token_id: int = 0, custom_fields: List[CustomField] | None = None) → Experiences[source]

Gather a batch of experiences from a list of experiences.

This method will automatically pad the tokens and logprobs of input experiences to the same length.

Parameters:

experiences (list[Experience]) – A list of experiences to gather.
pad_token_id (int) – The token ID to use for padding. Default is 0.
custom_fields (Optional[List[CustomField]]) – Custom fields to include in the gathered experiences.

trinity.common.experience.empty_experiences(custom_fields: List[CustomField] | None) → Experiences[source]

trinity.common.experience.gather_token_ids(experiences, max_prompt_length: int, max_response_length: int, pad_token_id: int) → Tensor[source]

trinity.common.experience.gather_action_masks(experiences, max_response_length: int) → Tensor[source]

trinity.common.experience.gather_attention_masks(experiences, max_prompt_length: int, max_response_length: int) → Tensor[source]

trinity.common.experience.gather_logprobs(experiences, max_response_length: int) → Tensor[source]

trinity.common.experience.gather_advantages(experiences, max_response_length: int) → Tensor | None[source]

trinity.common.experience.gather_returns(experiences, max_response_length: int) → Tensor | None[source]

trinity.common.verl_config module

class trinity.common.verl_config.Data(train_batch_size: int = 1024)[source]

Bases: object

train_batch_size: int = 1024

__init__(train_batch_size: int = 1024) → None

class trinity.common.verl_config.FusedKernelOptions(impl_backend: str | None = None)[source]

Bases: object

impl_backend: str | None = None

__init__(impl_backend: str | None = None) → None

class trinity.common.verl_config.ActorModel(path: str = '', external_lib: Optional[str] = None, override_config: Dict[str, Any] = <factory>, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False, use_fused_kernels: bool = False, fused_kernel_options: trinity.common.verl_config.FusedKernelOptions = <factory>, custom_chat_template: Optional[str] = None)[source]

Bases: object

path: str = ''

external_lib: str | None = None

override_config: Dict[str, Any]

enable_gradient_checkpointing: bool = True

use_remove_padding: bool = False

use_fused_kernels: bool = False

fused_kernel_options: FusedKernelOptions

custom_chat_template: str | None = None

__init__(path: str = '', external_lib: str | None = None, override_config: ~typing.Dict[str, ~typing.Any] = <factory>, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False, use_fused_kernels: bool = False, fused_kernel_options: ~trinity.common.verl_config.FusedKernelOptions = <factory>, custom_chat_template: str | None = None) → None

class trinity.common.verl_config.Optim(lr: float = 1e-06, lr_warmup_steps: int = -1, lr_warmup_steps_ratio: float = 0.0, min_lr_ratio: Optional[float] = 0.0, warmup_style: str = 'constant', total_training_steps: int = -1, betas: List[float] = <factory>)[source]

Bases: object

lr: float = 1e-06

lr_warmup_steps: int = -1

lr_warmup_steps_ratio: float = 0.0

min_lr_ratio: float | None = 0.0

warmup_style: str = 'constant'

total_training_steps: int = -1

betas: List[float]

__init__(lr: float = 1e-06, lr_warmup_steps: int = -1, lr_warmup_steps_ratio: float = 0.0, min_lr_ratio: float | None = 0.0, warmup_style: str = 'constant', total_training_steps: int = -1, betas: ~typing.List[float] = <factory>) → None

class trinity.common.verl_config.WrapPolicy(min_num_params: int = 0)[source]

Bases: object

min_num_params: int = 0

__init__(min_num_params: int = 0) → None

class trinity.common.verl_config.FSDPConfig(wrap_policy: trinity.common.verl_config.WrapPolicy = <factory>, min_num_params: int = 0, param_offload: bool = False, optimizer_offload: bool = False, fsdp_size: int = -1, forward_prefetch: bool = False)[source]

Bases: object

wrap_policy: WrapPolicy

min_num_params: int = 0

param_offload: bool = False

optimizer_offload: bool = False

fsdp_size: int = -1

forward_prefetch: bool = False

__init__(wrap_policy: ~trinity.common.verl_config.WrapPolicy = <factory>, min_num_params: int = 0, param_offload: bool = False, optimizer_offload: bool = False, fsdp_size: int = -1, forward_prefetch: bool = False) → None

class trinity.common.verl_config.Checkpoint(load_contents: List[str] = <factory>, save_contents: List[str] = <factory>)[source]

Bases: object

load_contents: List[str]

save_contents: List[str]

__init__(load_contents: ~typing.List[str] = <factory>, save_contents: ~typing.List[str] = <factory>) → None

class trinity.common.verl_config.Actor(strategy: str = 'fsdp', ppo_mini_batch_size: int = 256, ppo_micro_batch_size: Optional[int] = None, ppo_micro_batch_size_per_gpu: int = 1, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 16384, grad_clip: float = 1.0, ppo_epochs: int = 1, shuffle: bool = False, ulysses_sequence_parallel_size: int = 1, entropy_from_logits_with_chunking: bool = False, entropy_checkpointing: bool = False, checkpoint: trinity.common.verl_config.Checkpoint = <factory>, optim: trinity.common.verl_config.Optim = <factory>, fsdp_config: trinity.common.verl_config.FSDPConfig = <factory>, loss_agg_mode: str = 'token-mean', clip_ratio: float = 0.2, entropy_coeff: float = 0.001, use_kl_loss: bool = False, kl_loss_coef: float = 0.001, kl_loss_type: str = 'low_var_kl')[source]

Bases: object

strategy: str = 'fsdp'

ppo_mini_batch_size: int = 256

ppo_micro_batch_size: int | None = None

ppo_micro_batch_size_per_gpu: int = 1

use_dynamic_bsz: bool = False

ppo_max_token_len_per_gpu: int = 16384

grad_clip: float = 1.0

ppo_epochs: int = 1

shuffle: bool = False

ulysses_sequence_parallel_size: int = 1

entropy_from_logits_with_chunking: bool = False

entropy_checkpointing: bool = False

checkpoint: Checkpoint

optim: Optim

fsdp_config: FSDPConfig

loss_agg_mode: str = 'token-mean'

clip_ratio: float = 0.2

entropy_coeff: float = 0.001

use_kl_loss: bool = False

kl_loss_coef: float = 0.001

kl_loss_type: str = 'low_var_kl'

__init__(strategy: str = 'fsdp', ppo_mini_batch_size: int = 256, ppo_micro_batch_size: int | None = None, ppo_micro_batch_size_per_gpu: int = 1, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 16384, grad_clip: float = 1.0, ppo_epochs: int = 1, shuffle: bool = False, ulysses_sequence_parallel_size: int = 1, entropy_from_logits_with_chunking: bool = False, entropy_checkpointing: bool = False, checkpoint: ~trinity.common.verl_config.Checkpoint = <factory>, optim: ~trinity.common.verl_config.Optim = <factory>, fsdp_config: ~trinity.common.verl_config.FSDPConfig = <factory>, loss_agg_mode: str = 'token-mean', clip_ratio: float = 0.2, entropy_coeff: float = 0.001, use_kl_loss: bool = False, kl_loss_coef: float = 0.001, kl_loss_type: str = 'low_var_kl') → None

class trinity.common.verl_config.Ref(fsdp_config: trinity.common.verl_config.FSDPConfig = <factory>, log_prob_micro_batch_size: Optional[int] = None, log_prob_micro_batch_size_per_gpu: int = 1, log_prob_use_dynamic_bsz: bool = False, log_prob_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1, entropy_from_logits_with_chunking: bool = False, entropy_checkpointing: bool = False, checkpoint: trinity.common.verl_config.Checkpoint = <factory>)[source]

Bases: object

fsdp_config: FSDPConfig

log_prob_micro_batch_size: int | None = None

log_prob_micro_batch_size_per_gpu: int = 1

log_prob_use_dynamic_bsz: bool = False

log_prob_max_token_len_per_gpu: int = 0

ulysses_sequence_parallel_size: int = 1

entropy_from_logits_with_chunking: bool = False

entropy_checkpointing: bool = False

checkpoint: Checkpoint

__init__(fsdp_config: ~trinity.common.verl_config.FSDPConfig = <factory>, log_prob_micro_batch_size: int | None = None, log_prob_micro_batch_size_per_gpu: int = 1, log_prob_use_dynamic_bsz: bool = False, log_prob_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1, entropy_from_logits_with_chunking: bool = False, entropy_checkpointing: bool = False, checkpoint: ~trinity.common.verl_config.Checkpoint = <factory>) → None

class trinity.common.verl_config.Rollout(val_kwargs: trinity.common.verl_config._ValKwargs = <factory>, multi_turn: trinity.common.verl_config._MultiTurn = <factory>, temperature: float = 1.0, n: int = 1, log_prob_micro_batch_size: Optional[int] = None, log_prob_micro_batch_size_per_gpu: int = 1)[source]

Bases: object

val_kwargs: _ValKwargs

multi_turn: _MultiTurn

temperature: float = 1.0

n: int = 1

log_prob_micro_batch_size: int | None = None

log_prob_micro_batch_size_per_gpu: int = 1

__init__(val_kwargs: ~trinity.common.verl_config._ValKwargs = <factory>, multi_turn: ~trinity.common.verl_config._MultiTurn = <factory>, temperature: float = 1.0, n: int = 1, log_prob_micro_batch_size: int | None = None, log_prob_micro_batch_size_per_gpu: int = 1) → None

class trinity.common.verl_config.ActorRolloutRef(hybrid_engine: bool = True, model: trinity.common.verl_config.ActorModel = <factory>, actor: trinity.common.verl_config.Actor = <factory>, ref: trinity.common.verl_config.Ref = <factory>, rollout: trinity.common.verl_config.Rollout = <factory>, synchronizer: Optional[trinity.common.config.SynchronizerConfig] = None, explorer_name: str = 'explorer')[source]

Bases: object

hybrid_engine: bool = True

model: ActorModel

actor: Actor

ref: Ref

rollout: Rollout

synchronizer: SynchronizerConfig | None = None

explorer_name: str = 'explorer'

__init__(hybrid_engine: bool = True, model: ~trinity.common.verl_config.ActorModel = <factory>, actor: ~trinity.common.verl_config.Actor = <factory>, ref: ~trinity.common.verl_config.Ref = <factory>, rollout: ~trinity.common.verl_config.Rollout = <factory>, synchronizer: ~trinity.common.config.SynchronizerConfig | None = None, explorer_name: str = 'explorer') → None

class trinity.common.verl_config.CriticModel(path: str = '', tokenizer_path: str = '', override_config: Dict[str, str] = <factory>, external_lib: Optional[str] = None, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False, fsdp_config: trinity.common.verl_config.FSDPConfig = <factory>)[source]

Bases: object

path: str = ''

tokenizer_path: str = ''

override_config: Dict[str, str]

external_lib: str | None = None

enable_gradient_checkpointing: bool = True

use_remove_padding: bool = False

fsdp_config: FSDPConfig

__init__(path: str = '', tokenizer_path: str = '', override_config: ~typing.Dict[str, str] = <factory>, external_lib: str | None = None, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False, fsdp_config: ~trinity.common.verl_config.FSDPConfig = <factory>) → None

class trinity.common.verl_config.Critic(strategy: str = 'fsdp', optim: trinity.common.verl_config.Optim = <factory>, model: trinity.common.verl_config.CriticModel = <factory>, ppo_mini_batch_size: int = 0, ppo_micro_batch_size: Optional[int] = None, ppo_micro_batch_size_per_gpu: int = 1, forward_micro_batch_size: Optional[int] = None, forward_micro_batch_size_per_gpu: Optional[int] = None, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 0, forward_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1, ppo_epochs: int = 0, shuffle: bool = False, grad_clip: float = 0.0, cliprange_value: float = 0.0, checkpoint: trinity.common.verl_config.Checkpoint = <factory>, rollout_n: int = 1, loss_agg_mode: str = 'token-mean')[source]

Bases: object

strategy: str = 'fsdp'

optim: Optim

model: CriticModel

ppo_mini_batch_size: int = 0

ppo_micro_batch_size: int | None = None

ppo_micro_batch_size_per_gpu: int = 1

forward_micro_batch_size: int | None = None

forward_micro_batch_size_per_gpu: int | None = None

use_dynamic_bsz: bool = False

ppo_max_token_len_per_gpu: int = 0

forward_max_token_len_per_gpu: int = 0

ulysses_sequence_parallel_size: int = 1

ppo_epochs: int = 0

shuffle: bool = False

grad_clip: float = 0.0

cliprange_value: float = 0.0

checkpoint: Checkpoint

rollout_n: int = 1

loss_agg_mode: str = 'token-mean'

__init__(strategy: str = 'fsdp', optim: ~trinity.common.verl_config.Optim = <factory>, model: ~trinity.common.verl_config.CriticModel = <factory>, ppo_mini_batch_size: int = 0, ppo_micro_batch_size: int | None = None, ppo_micro_batch_size_per_gpu: int = 1, forward_micro_batch_size: int | None = None, forward_micro_batch_size_per_gpu: int | None = None, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 0, forward_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1, ppo_epochs: int = 0, shuffle: bool = False, grad_clip: float = 0.0, cliprange_value: float = 0.0, checkpoint: ~trinity.common.verl_config.Checkpoint = <factory>, rollout_n: int = 1, loss_agg_mode: str = 'token-mean') → None

class trinity.common.verl_config.RewardModel(enable: bool = False, strategy: str = 'fsdp', model: trinity.common.verl_config._RewardModel = <factory>, micro_batch_size_per_gpu: int = 1, max_length: Optional[int] = None, ulysses_sequence_parallel_size: int = 1, use_dynamic_bsz: bool = False, forward_max_token_len_per_gpu: int = 0, reward_manager: str = 'naive')[source]

Bases: object

enable: bool = False

strategy: str = 'fsdp'

model: _RewardModel

micro_batch_size_per_gpu: int = 1

max_length: int | None = None

ulysses_sequence_parallel_size: int = 1

use_dynamic_bsz: bool = False

forward_max_token_len_per_gpu: int = 0

reward_manager: str = 'naive'

__init__(enable: bool = False, strategy: str = 'fsdp', model: ~trinity.common.verl_config._RewardModel = <factory>, micro_batch_size_per_gpu: int = 1, max_length: int | None = None, ulysses_sequence_parallel_size: int = 1, use_dynamic_bsz: bool = False, forward_max_token_len_per_gpu: int = 0, reward_manager: str = 'naive') → None

class trinity.common.verl_config.CustomRewardFunction(path: str | None = None, name: str = 'compute_score')[source]

Bases: object

path: str | None = None

name: str = 'compute_score'

__init__(path: str | None = None, name: str = 'compute_score') → None

class trinity.common.verl_config.KL_Ctrl(type: str = 'fixed', kl_coef: float = 0.001, horizon: float = 10000, target_kl: float = 0.1)[source]

Bases: object

type: str = 'fixed'

kl_coef: float = 0.001

horizon: float = 10000

target_kl: float = 0.1

__init__(type: str = 'fixed', kl_coef: float = 0.001, horizon: float = 10000, target_kl: float = 0.1) → None

class trinity.common.verl_config.Algorithm(gamma: float = 1.0, lam: float = 1.0, adv_estimator: str = 'gae', norm_adv_by_std_in_grpo: bool = True, use_kl_in_reward: bool = False, kl_penalty: str = 'kl', kl_ctrl: trinity.common.verl_config.KL_Ctrl = <factory>)[source]

Bases: object

gamma: float = 1.0

lam: float = 1.0

adv_estimator: str = 'gae'

norm_adv_by_std_in_grpo: bool = True

use_kl_in_reward: bool = False

kl_penalty: str = 'kl'

kl_ctrl: KL_Ctrl

__init__(gamma: float = 1.0, lam: float = 1.0, adv_estimator: str = 'gae', norm_adv_by_std_in_grpo: bool = True, use_kl_in_reward: bool = False, kl_penalty: str = 'kl', kl_ctrl: ~trinity.common.verl_config.KL_Ctrl = <factory>) → None

class trinity.common.verl_config.Trainer(balance_batch: bool = True, total_epochs: int = 30, total_training_steps: Optional[int] = None, project_name: str = '', group_name: str = '', experiment_name: str = '', logger: List[str] = <factory>, val_generations_to_log_to_wandb: int = 0, nnodes: int = 0, n_gpus_per_node: int = 0, save_freq: int = 0, resume_mode: str = 'auto', resume_from_path: str = '', test_freq: int = 0, critic_warmup: int = 0, default_hdfs_dir: Optional[str] = None, remove_previous_ckpt_in_save: bool = False, del_local_ckpt_after_load: bool = False, default_local_dir: str = '', val_before_train: bool = False, training_rollout_mode: str = 'parallel', enable_exp_buffer: bool = True, sync_freq: int = 0, sft_warmup_steps: int = 0, max_actor_ckpt_to_keep: Optional[int] = None, max_critic_ckpt_to_keep: Optional[int] = None)[source]

Bases: object

balance_batch: bool = True

total_epochs: int = 30

total_training_steps: int | None = None

project_name: str = ''

group_name: str = ''

experiment_name: str = ''

logger: List[str]

val_generations_to_log_to_wandb: int = 0

nnodes: int = 0

n_gpus_per_node: int = 0

save_freq: int = 0

resume_mode: str = 'auto'

resume_from_path: str = ''

test_freq: int = 0

critic_warmup: int = 0

default_hdfs_dir: str | None = None

remove_previous_ckpt_in_save: bool = False

del_local_ckpt_after_load: bool = False

default_local_dir: str = ''

val_before_train: bool = False

training_rollout_mode: str = 'parallel'

enable_exp_buffer: bool = True

sync_freq: int = 0

sft_warmup_steps: int = 0

max_actor_ckpt_to_keep: int | None = None

max_critic_ckpt_to_keep: int | None = None

__init__(balance_batch: bool = True, total_epochs: int = 30, total_training_steps: int | None = None, project_name: str = '', group_name: str = '', experiment_name: str = '', logger: ~typing.List[str] = <factory>, val_generations_to_log_to_wandb: int = 0, nnodes: int = 0, n_gpus_per_node: int = 0, save_freq: int = 0, resume_mode: str = 'auto', resume_from_path: str = '', test_freq: int = 0, critic_warmup: int = 0, default_hdfs_dir: str | None = None, remove_previous_ckpt_in_save: bool = False, del_local_ckpt_after_load: bool = False, default_local_dir: str = '', val_before_train: bool = False, training_rollout_mode: str = 'parallel', enable_exp_buffer: bool = True, sync_freq: int = 0, sft_warmup_steps: int = 0, max_actor_ckpt_to_keep: int | None = None, max_critic_ckpt_to_keep: int | None = None) → None

class trinity.common.verl_config.veRLConfig(data: trinity.common.verl_config.Data = <factory>, actor_rollout_ref: trinity.common.verl_config.ActorRolloutRef = <factory>, critic: trinity.common.verl_config.Critic = <factory>, reward_model: trinity.common.verl_config.RewardModel = <factory>, custom_reward_function: trinity.common.verl_config.CustomRewardFunction = <factory>, algorithm: trinity.common.verl_config.Algorithm = <factory>, trainer: trinity.common.verl_config.Trainer = <factory>, buffer: trinity.common.config.BufferConfig = <factory>, synchronizer: Optional[trinity.common.config.SynchronizerConfig] = None, enable_preview: bool = True)[source]

Bases: object

data: Data

actor_rollout_ref: ActorRolloutRef

critic: Critic

reward_model: RewardModel

custom_reward_function: CustomRewardFunction

algorithm: Algorithm

trainer: Trainer

buffer: BufferConfig

synchronizer: SynchronizerConfig | None = None

enable_preview: bool = True

synchronize_config(config: Config) → None[source]: Synchronize config.

__init__(data: ~trinity.common.verl_config.Data = <factory>, actor_rollout_ref: ~trinity.common.verl_config.ActorRolloutRef = <factory>, critic: ~trinity.common.verl_config.Critic = <factory>, reward_model: ~trinity.common.verl_config.RewardModel = <factory>, custom_reward_function: ~trinity.common.verl_config.CustomRewardFunction = <factory>, algorithm: ~trinity.common.verl_config.Algorithm = <factory>, trainer: ~trinity.common.verl_config.Trainer = <factory>, buffer: ~trinity.common.config.BufferConfig = <factory>, synchronizer: ~trinity.common.config.SynchronizerConfig | None = None, enable_preview: bool = True) → None

trinity.common.verl_config.load_config(config_path: str) → veRLConfig[source]

trinity.common

Subpackages

Submodules

trinity.common.config module

trinity.common.constants module

trinity.common.experience module

trinity.common.verl_config module

Module contents