trinity.common

Subpackages

Submodules

trinity.common.config module

Configs for RFT.

class trinity.common.config.FormatConfig(prompt_type: PromptType = PromptType.MESSAGES, prompt_key: str = 'prompt', response_key: str = 'response', messages_key: str = 'message', chat_template: str = '', system_prompt: str | None = None, reply_prefix: str | None = None, reward_fn_key: str = '', workflow_key: str = '', solution_key: str = '', reward_key: str = '', chosen_key: str = 'chosen', rejected_key: str = 'rejected', label_key: str = '')[source]

Bases: object

Configuration for data formatting

prompt_type: PromptType = 'messages'
prompt_key: str = 'prompt'
response_key: str = 'response'
messages_key: str = 'message'
chat_template: str = ''
system_prompt: str | None = None
reply_prefix: str | None = None
reward_fn_key: str = ''
workflow_key: str = ''
solution_key: str = ''
reward_key: str = ''
chosen_key: str = 'chosen'
rejected_key: str = 'rejected'
label_key: str = ''
__init__(prompt_type: PromptType = PromptType.MESSAGES, prompt_key: str = 'prompt', response_key: str = 'response', messages_key: str = 'message', chat_template: str = '', system_prompt: str | None = None, reply_prefix: str | None = None, reward_fn_key: str = '', workflow_key: str = '', solution_key: str = '', reward_key: str = '', chosen_key: str = 'chosen', rejected_key: str = 'rejected', label_key: str = '') None
class trinity.common.config.GenerationConfig(temperature: float = 1.0, top_p: float = 1.0, top_k: int = -1, logprobs: int = 0, n: int = 1)[source]

Bases: object

temperature: float = 1.0
top_p: float = 1.0
top_k: int = -1
logprobs: int = 0
n: int = 1
__init__(temperature: float = 1.0, top_p: float = 1.0, top_k: int = -1, logprobs: int = 0, n: int = 1) None
class trinity.common.config.StorageConfig(name: str = '', storage_type: ~trinity.common.constants.StorageType = StorageType.FILE, path: str | None = None, split: str = 'train', subset_name: str | None = None, format: ~trinity.common.config.FormatConfig = <factory>, index: int = 0, default_workflow_type: str | None = None, default_reward_fn_type: str | None = None, rollout_args: ~trinity.common.config.GenerationConfig = <factory>, algorithm_type: ~trinity.common.constants.AlgorithmType | None = None, total_epochs: int = 1, task_type: ~trinity.common.constants.TaskType = TaskType.EXPLORE)[source]

Bases: object

Storage config.

name: str = ''
storage_type: StorageType = 'file'
path: str | None = None
split: str = 'train'
subset_name: str | None = None
format: FormatConfig
index: int = 0
default_workflow_type: str | None = None
default_reward_fn_type: str | None = None
rollout_args: GenerationConfig
algorithm_type: AlgorithmType | None = None
total_epochs: int = 1
task_type: TaskType = 0
__init__(name: str = '', storage_type: ~trinity.common.constants.StorageType = StorageType.FILE, path: str | None = None, split: str = 'train', subset_name: str | None = None, format: ~trinity.common.config.FormatConfig = <factory>, index: int = 0, default_workflow_type: str | None = None, default_reward_fn_type: str | None = None, rollout_args: ~trinity.common.config.GenerationConfig = <factory>, algorithm_type: ~trinity.common.constants.AlgorithmType | None = None, total_epochs: int = 1, task_type: ~trinity.common.constants.TaskType = TaskType.EXPLORE) None
class trinity.common.config.DataProcessorConfig(data_workflow_url: str | None = None, source_data_path: str = '', format: ~trinity.common.config.FormatConfig = <factory>, load_kwargs: ~typing.Dict[str, ~typing.Any] = <factory>, dj_config_path: str | None = None, dj_process_desc: str | None = None, agent_model_name: str | None = None, agent_model_config: ~typing.Dict[str, ~typing.Any] | None = None, clean_strategy: str = 'iterative', min_size_ratio: float | None = None, min_priority_score: float | None = 0.0, priority_weights: ~typing.Dict[str, float] | None = None, data_dist: str | None = 'gaussian', db_url: str = '', max_retry_times: int = 3, max_retry_interval: int = 1)[source]

Bases: object

Data-Juicer config

data_workflow_url: str | None = None
source_data_path: str = ''
format: FormatConfig
load_kwargs: Dict[str, Any]
dj_config_path: str | None = None
dj_process_desc: str | None = None
agent_model_name: str | None = None
agent_model_config: Dict[str, Any] | None = None
clean_strategy: str = 'iterative'
min_size_ratio: float | None = None
min_priority_score: float | None = 0.0
priority_weights: Dict[str, float] | None = None
data_dist: str | None = 'gaussian'
db_url: str = ''
max_retry_times: int = 3
max_retry_interval: int = 1
__init__(data_workflow_url: str | None = None, source_data_path: str = '', format: ~trinity.common.config.FormatConfig = <factory>, load_kwargs: ~typing.Dict[str, ~typing.Any] = <factory>, dj_config_path: str | None = None, dj_process_desc: str | None = None, agent_model_name: str | None = None, agent_model_config: ~typing.Dict[str, ~typing.Any] | None = None, clean_strategy: str = 'iterative', min_size_ratio: float | None = None, min_priority_score: float | None = 0.0, priority_weights: ~typing.Dict[str, float] | None = None, data_dist: str | None = 'gaussian', db_url: str = '', max_retry_times: int = 3, max_retry_interval: int = 1) None
class trinity.common.config.ModelConfig(model_path: str = '', critic_model_path: str = '', max_prompt_tokens: int | None = None, max_response_tokens: int | None = None)[source]

Bases: object

model_path: str = ''
critic_model_path: str = ''
max_prompt_tokens: int | None = None
max_response_tokens: int | None = None
__init__(model_path: str = '', critic_model_path: str = '', max_prompt_tokens: int | None = None, max_response_tokens: int | None = None) None
class trinity.common.config.InferenceModelConfig(model_path: str = '', engine_type: str = 'vllm_async', engine_num: int = 1, tensor_parallel_size: int = 1, use_v1: bool = True, enforce_eager: bool = True, enable_prefix_caching: bool = False, enable_chunked_prefill: bool = False, gpu_memory_utilization: float = 0.9, dtype: str = 'bfloat16', seed: int = 42, max_prompt_tokens: int | None = None, max_response_tokens: int | None = None, chat_template: str | None = None, enable_thinking: bool = False, enable_openai_api: bool = False, bundle_indices: str = '')[source]

Bases: object

model_path: str = ''
engine_type: str = 'vllm_async'
engine_num: int = 1
tensor_parallel_size: int = 1
use_v1: bool = True
enforce_eager: bool = True
enable_prefix_caching: bool = False
enable_chunked_prefill: bool = False
gpu_memory_utilization: float = 0.9
dtype: str = 'bfloat16'
seed: int = 42
max_prompt_tokens: int | None = None
max_response_tokens: int | None = None
chat_template: str | None = None
enable_thinking: bool = False
enable_openai_api: bool = False
bundle_indices: str = ''
__init__(model_path: str = '', engine_type: str = 'vllm_async', engine_num: int = 1, tensor_parallel_size: int = 1, use_v1: bool = True, enforce_eager: bool = True, enable_prefix_caching: bool = False, enable_chunked_prefill: bool = False, gpu_memory_utilization: float = 0.9, dtype: str = 'bfloat16', seed: int = 42, max_prompt_tokens: int | None = None, max_response_tokens: int | None = None, chat_template: str | None = None, enable_thinking: bool = False, enable_openai_api: bool = False, bundle_indices: str = '') None
class trinity.common.config.AlgorithmConfig(algorithm_type: AlgorithmType = AlgorithmType.PPO, repeat_times: int = 1, gamma: float | None = None, lam: float | None = None)[source]

Bases: object

Config for algorithm.

algorithm_type: AlgorithmType = 'ppo'
repeat_times: int = 1
gamma: float | None = None
lam: float | None = None
__init__(algorithm_type: AlgorithmType = AlgorithmType.PPO, repeat_times: int = 1, gamma: float | None = None, lam: float | None = None) None
class trinity.common.config.ClusterConfig(node_num: int = 1, gpu_per_node: int = 8)[source]

Bases: object

Config for the cluster.

node_num: int = 1
gpu_per_node: int = 8
__init__(node_num: int = 1, gpu_per_node: int = 8) None
class trinity.common.config.ExplorerInput(taskset: ~trinity.common.config.StorageConfig = <factory>, eval_tasksets: ~typing.List[~trinity.common.config.StorageConfig] = <factory>, default_workflow_type: str | None = None, default_reward_fn_type: str | None = None, system_prompt: str | None = None, reply_prefix: str | None = None)[source]

Bases: object

Config for explorer input.

taskset: StorageConfig
eval_tasksets: List[StorageConfig]
default_workflow_type: str | None = None
default_reward_fn_type: str | None = None
system_prompt: str | None = None
reply_prefix: str | None = None
__init__(taskset: ~trinity.common.config.StorageConfig = <factory>, eval_tasksets: ~typing.List[~trinity.common.config.StorageConfig] = <factory>, default_workflow_type: str | None = None, default_reward_fn_type: str | None = None, system_prompt: str | None = None, reply_prefix: str | None = None) None
class trinity.common.config.TrainerInput(experience_buffer: StorageConfig | None = None, sft_warmup_dataset: StorageConfig | None = None, read_experience_strategy: ReadStrategy | None = None, sft_warmup_steps: int = 0)[source]

Bases: object

Config for trainer input.

experience_buffer: StorageConfig | None = None
sft_warmup_dataset: StorageConfig | None = None
read_experience_strategy: ReadStrategy | None = None
sft_warmup_steps: int = 0
__init__(experience_buffer: StorageConfig | None = None, sft_warmup_dataset: StorageConfig | None = None, read_experience_strategy: ReadStrategy | None = None, sft_warmup_steps: int = 0) None
class trinity.common.config.BufferConfig(batch_size: int = 1, total_epochs: int = 1, explorer_input: ~trinity.common.config.ExplorerInput = <factory>, explorer_output: ~trinity.common.config.StorageConfig | None = None, trainer_input: ~trinity.common.config.TrainerInput = <factory>, max_retry_times: int = 3, max_retry_interval: int = 1, read_batch_size: int = 1, tokenizer_path: str | None = None, pad_token_id: int | None = None)[source]

Bases: object

Config for buffer.

batch_size: int = 1
total_epochs: int = 1
explorer_input: ExplorerInput
explorer_output: StorageConfig | None = None
trainer_input: TrainerInput
max_retry_times: int = 3
max_retry_interval: int = 1
read_batch_size: int = 1
tokenizer_path: str | None = None
pad_token_id: int | None = None
__init__(batch_size: int = 1, total_epochs: int = 1, explorer_input: ~trinity.common.config.ExplorerInput = <factory>, explorer_output: ~trinity.common.config.StorageConfig | None = None, trainer_input: ~trinity.common.config.TrainerInput = <factory>, max_retry_times: int = 3, max_retry_interval: int = 1, read_batch_size: int = 1, tokenizer_path: str | None = None, pad_token_id: int | None = None) None
class trinity.common.config.ExplorerConfig(runner_num: int = 1, max_timeout: int = 900, max_retry_times: int = 2, rollout_model: ~trinity.common.config.InferenceModelConfig = <factory>, auxiliary_models: ~typing.List[~trinity.common.config.InferenceModelConfig] = <factory>, eval_interval: int = 100, eval_on_latest_checkpoint: bool = False)[source]

Bases: object

Config for explorer.

runner_num: int = 1
max_timeout: int = 900
max_retry_times: int = 2
rollout_model: InferenceModelConfig
auxiliary_models: List[InferenceModelConfig]
eval_interval: int = 100
eval_on_latest_checkpoint: bool = False
__init__(runner_num: int = 1, max_timeout: int = 900, max_retry_times: int = 2, rollout_model: ~trinity.common.config.InferenceModelConfig = <factory>, auxiliary_models: ~typing.List[~trinity.common.config.InferenceModelConfig] = <factory>, eval_interval: int = 100, eval_on_latest_checkpoint: bool = False) None
class trinity.common.config.TrainerConfig(trainer_type: str = 'verl', save_interval: int = 0, enable_preview: bool = True, actor_use_kl_loss: Optional[bool] = None, actor_kl_loss_coef: Optional[float] = None, actor_entropy_coef: Optional[float] = None, actor_grad_clip: Optional[float] = None, actor_clip_ratio: Optional[float] = None, trainer_config: Any = <factory>, trainer_config_path: str = '')[source]

Bases: object

trainer_type: str = 'verl'
save_interval: int = 0
enable_preview: bool = True
actor_use_kl_loss: bool | None = None
actor_kl_loss_coef: float | None = None
actor_entropy_coef: float | None = None
actor_grad_clip: float | None = None
actor_clip_ratio: float | None = None
trainer_config: Any
trainer_config_path: str = ''
__init__(trainer_type: str = 'verl', save_interval: int = 0, enable_preview: bool = True, actor_use_kl_loss: bool | None = None, actor_kl_loss_coef: float | None = None, actor_entropy_coef: float | None = None, actor_grad_clip: float | None = None, actor_clip_ratio: float | None = None, trainer_config: ~typing.Any = <factory>, trainer_config_path: str = '') None
class trinity.common.config.MonitorConfig(monitor_type: str = 'tensorboard', monitor_args: Dict = <factory>, cache_dir: str = '')[source]

Bases: object

monitor_type: str = 'tensorboard'
monitor_args: Dict
cache_dir: str = ''
__init__(monitor_type: str = 'tensorboard', monitor_args: ~typing.Dict = <factory>, cache_dir: str = '') None
class trinity.common.config.SynchronizerConfig(sync_method: SyncMethod = SyncMethod.NCCL, sync_interval: int = 1, sync_timeout: int = 1200, wait_for_checkpoint: bool = False, explorer_world_size: int | None = None)[source]

Bases: object

Configs for model weight synchronization

sync_method: SyncMethod = 'nccl'
sync_interval: int = 1
sync_timeout: int = 1200
wait_for_checkpoint: bool = False
explorer_world_size: int | None = None
__init__(sync_method: SyncMethod = SyncMethod.NCCL, sync_interval: int = 1, sync_timeout: int = 1200, wait_for_checkpoint: bool = False, explorer_world_size: int | None = None) None
class trinity.common.config.Config(mode: str = 'both', project: str = 'Trinity-RFT', name: str = 'rft', checkpoint_root_dir: str = '', checkpoint_job_dir: str = '', algorithm: ~trinity.common.config.AlgorithmConfig = <factory>, data_processor: ~trinity.common.config.DataProcessorConfig = <factory>, model: ~trinity.common.config.ModelConfig = <factory>, cluster: ~trinity.common.config.ClusterConfig = <factory>, buffer: ~trinity.common.config.BufferConfig = <factory>, explorer: ~trinity.common.config.ExplorerConfig = <factory>, trainer: ~trinity.common.config.TrainerConfig = <factory>, monitor: ~trinity.common.config.MonitorConfig = <factory>, synchronizer: ~trinity.common.config.SynchronizerConfig = <factory>)[source]

Bases: object

Global Configuration

mode: str = 'both'
project: str = 'Trinity-RFT'
name: str = 'rft'
checkpoint_root_dir: str = ''
checkpoint_job_dir: str = ''
algorithm: AlgorithmConfig
data_processor: DataProcessorConfig
model: ModelConfig
cluster: ClusterConfig
buffer: BufferConfig
explorer: ExplorerConfig
trainer: TrainerConfig
monitor: MonitorConfig
synchronizer: SynchronizerConfig
save(config_path: str) None[source]

Save config to file.

check_and_update() None[source]

Check and update the config.

__init__(mode: str = 'both', project: str = 'Trinity-RFT', name: str = 'rft', checkpoint_root_dir: str = '', checkpoint_job_dir: str = '', algorithm: ~trinity.common.config.AlgorithmConfig = <factory>, data_processor: ~trinity.common.config.DataProcessorConfig = <factory>, model: ~trinity.common.config.ModelConfig = <factory>, cluster: ~trinity.common.config.ClusterConfig = <factory>, buffer: ~trinity.common.config.BufferConfig = <factory>, explorer: ~trinity.common.config.ExplorerConfig = <factory>, trainer: ~trinity.common.config.TrainerConfig = <factory>, monitor: ~trinity.common.config.MonitorConfig = <factory>, synchronizer: ~trinity.common.config.SynchronizerConfig = <factory>) None
trinity.common.config.load_config(config_path: str) Config[source]

Load the configuration from the given path.

trinity.common.constants module

Constants.

class trinity.common.constants.CaseInsensitiveEnumMeta(cls, bases, classdict, **kwds)[source]

Bases: EnumMeta

class trinity.common.constants.CaseInsensitiveEnum(value, *args, **kwargs)[source]

Bases: Enum

An enumeration.

class trinity.common.constants.PromptType(value, *args, **kwargs)[source]

Bases: CaseInsensitiveEnum

Prompt Type.

MESSAGES = 'messages'
CHATPAIR = 'chatpair'
PLAINTEXT = 'plaintext'
class trinity.common.constants.TaskType(value)[source]

Bases: Enum

Task Type.

EXPLORE = 0
EVAL = 1
class trinity.common.constants.ReadStrategy(value, *args, **kwargs)[source]

Bases: CaseInsensitiveEnum

Pop Strategy.

DEFAULT = None
FIFO = 'fifo'
RANDOM = 'random'
LRU = 'lru'
LFU = 'lfu'
PRIORITY = 'priority'
class trinity.common.constants.StorageType(value, *args, **kwargs)[source]

Bases: CaseInsensitiveEnum

Storage Type.

SQL = 'sql'
QUEUE = 'queue'
FILE = 'file'
class trinity.common.constants.AlgorithmType(value, *args, **kwargs)[source]

Bases: CaseInsensitiveEnum

Algorithm Type.

SFT = 'sft'
PPO = 'ppo'
GRPO = 'grpo'
OPMD = 'opmd'
PAIRWISE_OPMD = 'pairwise_opmd'
DPO = 'dpo'
is_rft() bool[source]

Check if the algorithm is RFT.

is_sft() bool[source]

Check if the algorithm is SFT.

is_dpo() bool[source]

Check if the algorithm is DPO.

class trinity.common.constants.MonitorType(value, *args, **kwargs)[source]

Bases: CaseInsensitiveEnum

Monitor Type.

WANDB = 'wandb'
TENSORBOARD = 'tensorboard'
class trinity.common.constants.SyncMethodEnumMeta(cls, bases, classdict, **kwds)[source]

Bases: CaseInsensitiveEnumMeta

class trinity.common.constants.SyncMethod(value, *args, **kwargs)[source]

Bases: CaseInsensitiveEnum

Sync Method.

NCCL = 'nccl'
CHECKPOINT = 'checkpoint'

trinity.common.experience module

Experience Class.

class trinity.common.experience.Experience(tokens: Tensor, prompt_length: int, logprobs: Tensor | None = None, reward: float | None = None, prompt_text: str | None = None, response_text: str | None = None, action_mask: Tensor | None = None, chosen: Tensor | None = None, rejected: Tensor | None = None, info: dict | None = None, metrics: dict[str, float] | None = None, run_id: str = '')[source]

Bases: object

A single experience.

tokens: Tensor
prompt_length: int
logprobs: Tensor | None = None
reward: float | None = None
prompt_text: str | None = None
response_text: str | None = None
action_mask: Tensor | None = None
chosen: Tensor | None = None
rejected: Tensor | None = None
info: dict | None = None
metrics: dict[str, float] | None = None
run_id: str = ''
serialize() bytes[source]

Serialize the experience to bytes.

static deserialize(data: bytes) Experience[source]

Deserialize the experience from bytes.

__init__(tokens: Tensor, prompt_length: int, logprobs: Tensor | None = None, reward: float | None = None, prompt_text: str | None = None, response_text: str | None = None, action_mask: Tensor | None = None, chosen: Tensor | None = None, rejected: Tensor | None = None, info: dict | None = None, metrics: dict[str, float] | None = None, run_id: str = '') None
class trinity.common.experience.Experiences(tokens: Tensor, rewards: Tensor, attention_masks: Tensor, action_masks: Tensor | None, prompt_length: int, logprobs: Tensor | None, run_ids: List[str])[source]

Bases: object

A container for a batch of experiences, for high performance communication usage.

Example

>>>             |<- prompt_length ->|               |
>>> tokens: ('P' represents prompt, 'O' represents output)
>>> exp1:       |........PPPPPPPPPPP|OOOOOOOOOO.....|
>>> exp2:       |......PPPPPPPPPPPPP|OOOOOOO........|
>>>
>>> attention_masks: ('.' represents False and '1' represents True)
>>> exp1:       |........11111111111|1111111111.....|
>>> exp2:       |......1111111111111|1111111........|
tokens: Tensor
rewards: Tensor
attention_masks: Tensor
action_masks: Tensor | None
prompt_length: int
logprobs: Tensor | None
run_ids: List[str]
property batch_size: int

Get the batch size.

classmethod gather_experiences(experiences: list[Experience], pad_token_id: int = 0) Experiences[source]

Gather a batch of experiences from a list of experiences.

This method will automatically pad the tokens and logprobs of input experiences to the same length.

classmethod gather_dpo_experiences(experiences: list[Experience], pad_token_id: int = 0) Experiences[source]

Gather a batch of dpo experiences from a list of experiences.

Reference: https://github.com/huggingface/trl/blob/main/trl/trainer/dpo_trainer.py#L849

Note: We arrange inputs in the order of (chosen, rejected, chosen, rejected, …)

to ensure that each pair of (chosen, rejected) is not split by subsequent operations

Parameters:
  • Experiences(list[Experience]) - “prompt”: token ids of the prompt - “chosen”: token ids of the chosen response - “rejected”: token ids of the rejected response

  • pad_token_id(int) The pad token id.

Returns:

  • “tokens”: Concatenated chosen and rejected completion input IDs of shape (2 * batch_size, max_completion_length).

  • ”attention_masks”: Concatenated chosen and rejected attention masks of shape (2 * batch_size, max_completion_length).

Return type:

Experiences

__init__(tokens: Tensor, rewards: Tensor, attention_masks: Tensor, action_masks: Tensor | None, prompt_length: int, logprobs: Tensor | None, run_ids: List[str]) None

trinity.common.schema module

Schema for different types of data.

class trinity.common.schema.RftDatasetModel(**kwargs)[source]

Bases: Base

SQLAlchemy model for RftDataset.

id
consumed_cnt
last_modified_date
from_id
from_model
from_recipe
prompt
response
solution
reward
chosen
rejected
label
quality_score
quality_score_detail
difficulty_score
difficulty_score_detail
diversity_score
diversity_score_detail
priority
reward_fn
workflow
to_dict() dict[source]
__init__(**kwargs)

A simple constructor that allows initialization from kwargs.

Sets attributes on the constructed instance using the names and values in kwargs.

Only keys that are present as attributes of the instance’s class are allowed. These could be, for example, any mapped columns or relationships.

class trinity.common.schema.TaskModel(**kwargs)[source]

Bases: Base

SQLAlchemy model for Task.

id
task_desc
workflow_type
reward_type
__init__(**kwargs)

A simple constructor that allows initialization from kwargs.

Sets attributes on the constructed instance using the names and values in kwargs.

Only keys that are present as attributes of the instance’s class are allowed. These could be, for example, any mapped columns or relationships.

class trinity.common.schema.ExperienceModel(**kwargs)[source]

Bases: Base

SQLAlchemy model for Experience.

id
serialized_exp
prompt
response
reward
consumed
priority
to_experience() Experience[source]

Load the experience from the database.

static from_experience(experience: Experience)[source]

Save the experience to database.

__init__(**kwargs)

A simple constructor that allows initialization from kwargs.

Sets attributes on the constructed instance using the names and values in kwargs.

Only keys that are present as attributes of the instance’s class are allowed. These could be, for example, any mapped columns or relationships.

class trinity.common.schema.SFTDataModel(**kwargs)[source]

Bases: Base

SQLAlchemy model for SFT data.

id
serialized_exp
messages
consumed
to_experience() Experience[source]

Load the experience from the database.

classmethod from_messages(messages: list[dict], tokenizer: Any, chat_template: str | None = None) SFTDataModel[source]

Convert a list of messages into a single instance of SFT data.

__init__(**kwargs)

A simple constructor that allows initialization from kwargs.

Sets attributes on the constructed instance using the names and values in kwargs.

Only keys that are present as attributes of the instance’s class are allowed. These could be, for example, any mapped columns or relationships.

class trinity.common.schema.DPODataModel(**kwargs)[source]

Bases: Base

SQLAlchemy model for DPO data.

id
serialized_exp
chosen
rejected
consumed
to_experience() Experience[source]

Load the experience from the database.

__init__(**kwargs)

A simple constructor that allows initialization from kwargs.

Sets attributes on the constructed instance using the names and values in kwargs.

Only keys that are present as attributes of the instance’s class are allowed. These could be, for example, any mapped columns or relationships.

trinity.common.verl_config module

class trinity.common.verl_config.Data(train_batch_size: int = 1024)[source]

Bases: object

train_batch_size: int = 1024
__init__(train_batch_size: int = 1024) None
class trinity.common.verl_config.ActorModel(path: str = '', external_lib: Optional[str] = None, override_config: Dict[str, Any] = <factory>, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False)[source]

Bases: object

path: str = ''
external_lib: str | None = None
override_config: Dict[str, Any]
enable_gradient_checkpointing: bool = True
use_remove_padding: bool = False
__init__(path: str = '', external_lib: str | None = None, override_config: ~typing.Dict[str, ~typing.Any] = <factory>, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False) None
class trinity.common.verl_config.Optim(lr: float = 1e-06, lr_warmup_steps: int = -1, lr_warmup_steps_ratio: float = 0.0, min_lr_ratio: float | None = 0.0, warmup_style: str = 'constant', total_training_steps: int = -1, beta1: float = 0.9, beta2: float = 0.999)[source]

Bases: object

lr: float = 1e-06
lr_warmup_steps: int = -1
lr_warmup_steps_ratio: float = 0.0
min_lr_ratio: float | None = 0.0
warmup_style: str = 'constant'
total_training_steps: int = -1
beta1: float = 0.9
beta2: float = 0.999
__init__(lr: float = 1e-06, lr_warmup_steps: int = -1, lr_warmup_steps_ratio: float = 0.0, min_lr_ratio: float | None = 0.0, warmup_style: str = 'constant', total_training_steps: int = -1, beta1: float = 0.9, beta2: float = 0.999) None
class trinity.common.verl_config.WrapPolicy(min_num_params: int = 0)[source]

Bases: object

min_num_params: int = 0
__init__(min_num_params: int = 0) None
class trinity.common.verl_config.FSDPConfig(wrap_policy: trinity.common.verl_config.WrapPolicy = <factory>, min_num_params: int = 0, param_offload: bool = False, optimizer_offload: bool = False, fsdp_size: int = -1)[source]

Bases: object

wrap_policy: WrapPolicy
min_num_params: int = 0
param_offload: bool = False
optimizer_offload: bool = False
fsdp_size: int = -1
__init__(wrap_policy: ~trinity.common.verl_config.WrapPolicy = <factory>, min_num_params: int = 0, param_offload: bool = False, optimizer_offload: bool = False, fsdp_size: int = -1) None
class trinity.common.verl_config.Checkpoint(contents: List[str] = <factory>)[source]

Bases: object

contents: List[str]
__init__(contents: ~typing.List[str] = <factory>) None
class trinity.common.verl_config.Actor(strategy: str = 'fsdp', ppo_mini_batch_size: int = 256, ppo_micro_batch_size: Optional[int] = None, ppo_micro_batch_size_per_gpu: int = 1, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 16384, grad_clip: float = 1.0, clip_ratio: float = 0.2, entropy_coeff: float = 0.001, use_kl_loss: bool = False, kl_loss_coef: float = 0.001, kl_loss_type: str = 'low_var_kl', ppo_epochs: int = 1, shuffle: bool = False, ulysses_sequence_parallel_size: int = 1, checkpoint: trinity.common.verl_config.Checkpoint = <factory>, optim: trinity.common.verl_config.Optim = <factory>, fsdp_config: trinity.common.verl_config.FSDPConfig = <factory>, algorithm_type: trinity.common.constants.AlgorithmType = <AlgorithmType.PPO: 'ppo'>, tau: float = 0.001, opmd_baseline: str = 'mean', use_uid: bool = False)[source]

Bases: object

strategy: str = 'fsdp'
ppo_mini_batch_size: int = 256
ppo_micro_batch_size: int | None = None
ppo_micro_batch_size_per_gpu: int = 1
use_dynamic_bsz: bool = False
ppo_max_token_len_per_gpu: int = 16384
grad_clip: float = 1.0
clip_ratio: float = 0.2
entropy_coeff: float = 0.001
use_kl_loss: bool = False
kl_loss_coef: float = 0.001
kl_loss_type: str = 'low_var_kl'
ppo_epochs: int = 1
shuffle: bool = False
ulysses_sequence_parallel_size: int = 1
checkpoint: Checkpoint
optim: Optim
fsdp_config: FSDPConfig
algorithm_type: AlgorithmType = 'ppo'
tau: float = 0.001
opmd_baseline: str = 'mean'
use_uid: bool = False
__init__(strategy: str = 'fsdp', ppo_mini_batch_size: int = 256, ppo_micro_batch_size: int | None = None, ppo_micro_batch_size_per_gpu: int = 1, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 16384, grad_clip: float = 1.0, clip_ratio: float = 0.2, entropy_coeff: float = 0.001, use_kl_loss: bool = False, kl_loss_coef: float = 0.001, kl_loss_type: str = 'low_var_kl', ppo_epochs: int = 1, shuffle: bool = False, ulysses_sequence_parallel_size: int = 1, checkpoint: ~trinity.common.verl_config.Checkpoint = <factory>, optim: ~trinity.common.verl_config.Optim = <factory>, fsdp_config: ~trinity.common.verl_config.FSDPConfig = <factory>, algorithm_type: ~trinity.common.constants.AlgorithmType = AlgorithmType.PPO, tau: float = 0.001, opmd_baseline: str = 'mean', use_uid: bool = False) None
class trinity.common.verl_config.Ref(fsdp_config: trinity.common.verl_config.FSDPConfig = <factory>, log_prob_micro_batch_size: Optional[int] = None, log_prob_micro_batch_size_per_gpu: int = 1, log_prob_use_dynamic_bsz: bool = False, log_prob_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1)[source]

Bases: object

fsdp_config: FSDPConfig
log_prob_micro_batch_size: int | None = None
log_prob_micro_batch_size_per_gpu: int = 1
log_prob_use_dynamic_bsz: bool = False
log_prob_max_token_len_per_gpu: int = 0
ulysses_sequence_parallel_size: int = 1
__init__(fsdp_config: ~trinity.common.verl_config.FSDPConfig = <factory>, log_prob_micro_batch_size: int | None = None, log_prob_micro_batch_size_per_gpu: int = 1, log_prob_use_dynamic_bsz: bool = False, log_prob_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1) None
class trinity.common.verl_config.Rollout(temperature: float = 1.0, n: int = 1)[source]

Bases: object

temperature: float = 1.0
n: int = 1
__init__(temperature: float = 1.0, n: int = 1) None
class trinity.common.verl_config.ActorRolloutRef(hybrid_engine: bool = True, model: trinity.common.verl_config.ActorModel = <factory>, actor: trinity.common.verl_config.Actor = <factory>, ref: trinity.common.verl_config.Ref = <factory>, rollout: trinity.common.verl_config.Rollout = <factory>, synchronizer: Optional[trinity.common.config.SynchronizerConfig] = None)[source]

Bases: object

hybrid_engine: bool = True
model: ActorModel
actor: Actor
ref: Ref
rollout: Rollout
synchronizer: SynchronizerConfig | None = None
__init__(hybrid_engine: bool = True, model: ~trinity.common.verl_config.ActorModel = <factory>, actor: ~trinity.common.verl_config.Actor = <factory>, ref: ~trinity.common.verl_config.Ref = <factory>, rollout: ~trinity.common.verl_config.Rollout = <factory>, synchronizer: ~trinity.common.config.SynchronizerConfig | None = None) None
class trinity.common.verl_config.CriticModel(path: str = '', tokenizer_path: str = '', override_config: Dict[str, str] = <factory>, external_lib: Optional[str] = None, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False, fsdp_config: trinity.common.verl_config.FSDPConfig = <factory>)[source]

Bases: object

path: str = ''
tokenizer_path: str = ''
override_config: Dict[str, str]
external_lib: str | None = None
enable_gradient_checkpointing: bool = True
use_remove_padding: bool = False
fsdp_config: FSDPConfig
__init__(path: str = '', tokenizer_path: str = '', override_config: ~typing.Dict[str, str] = <factory>, external_lib: str | None = None, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False, fsdp_config: ~trinity.common.verl_config.FSDPConfig = <factory>) None
class trinity.common.verl_config.Critic(strategy: str = 'fsdp', optim: trinity.common.verl_config.Optim = <factory>, model: trinity.common.verl_config.CriticModel = <factory>, ppo_mini_batch_size: int = 0, ppo_micro_batch_size: Optional[int] = None, ppo_micro_batch_size_per_gpu: int = 1, forward_micro_batch_size: Optional[int] = None, forward_micro_batch_size_per_gpu: Optional[int] = None, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 0, forward_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1, ppo_epochs: int = 0, shuffle: bool = False, grad_clip: float = 0.0, cliprange_value: float = 0.0, checkpoint: trinity.common.verl_config.Checkpoint = <factory>, rollout_n: int = 1)[source]

Bases: object

strategy: str = 'fsdp'
optim: Optim
model: CriticModel
ppo_mini_batch_size: int = 0
ppo_micro_batch_size: int | None = None
ppo_micro_batch_size_per_gpu: int = 1
forward_micro_batch_size: int | None = None
forward_micro_batch_size_per_gpu: int | None = None
use_dynamic_bsz: bool = False
ppo_max_token_len_per_gpu: int = 0
forward_max_token_len_per_gpu: int = 0
ulysses_sequence_parallel_size: int = 1
ppo_epochs: int = 0
shuffle: bool = False
grad_clip: float = 0.0
cliprange_value: float = 0.0
checkpoint: Checkpoint
rollout_n: int = 1
__init__(strategy: str = 'fsdp', optim: ~trinity.common.verl_config.Optim = <factory>, model: ~trinity.common.verl_config.CriticModel = <factory>, ppo_mini_batch_size: int = 0, ppo_micro_batch_size: int | None = None, ppo_micro_batch_size_per_gpu: int = 1, forward_micro_batch_size: int | None = None, forward_micro_batch_size_per_gpu: int | None = None, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 0, forward_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1, ppo_epochs: int = 0, shuffle: bool = False, grad_clip: float = 0.0, cliprange_value: float = 0.0, checkpoint: ~trinity.common.verl_config.Checkpoint = <factory>, rollout_n: int = 1) None
class trinity.common.verl_config.RewardModel(enable: bool = False, strategy: str = 'fsdp', model: trinity.common.verl_config._RewardModel = <factory>, micro_batch_size_per_gpu: int = 1, max_length: Optional[int] = None, ulysses_sequence_parallel_size: int = 1, use_dynamic_bsz: bool = False, forward_max_token_len_per_gpu: int = 0, reward_manager: str = 'naive')[source]

Bases: object

enable: bool = False
strategy: str = 'fsdp'
model: _RewardModel
micro_batch_size_per_gpu: int = 1
max_length: int | None = None
ulysses_sequence_parallel_size: int = 1
use_dynamic_bsz: bool = False
forward_max_token_len_per_gpu: int = 0
reward_manager: str = 'naive'
__init__(enable: bool = False, strategy: str = 'fsdp', model: ~trinity.common.verl_config._RewardModel = <factory>, micro_batch_size_per_gpu: int = 1, max_length: int | None = None, ulysses_sequence_parallel_size: int = 1, use_dynamic_bsz: bool = False, forward_max_token_len_per_gpu: int = 0, reward_manager: str = 'naive') None
class trinity.common.verl_config.CustomRewardFunction(path: str | None = None, name: str = 'compute_score')[source]

Bases: object

path: str | None = None
name: str = 'compute_score'
__init__(path: str | None = None, name: str = 'compute_score') None
class trinity.common.verl_config.KL_Ctrl(type: str = 'fixed', kl_coef: float = 0.001, horizon: float = 10000, target_kl: float = 0.1)[source]

Bases: object

type: str = 'fixed'
kl_coef: float = 0.001
horizon: float = 10000
target_kl: float = 0.1
__init__(type: str = 'fixed', kl_coef: float = 0.001, horizon: float = 10000, target_kl: float = 0.1) None
class trinity.common.verl_config.Algorithm(gamma: float = 1.0, lam: float = 1.0, adv_estimator: str = 'gae', norm_adv_by_std_in_grpo: bool = True, use_kl_in_reward: bool = False, kl_penalty: str = 'kl', kl_ctrl: trinity.common.verl_config.KL_Ctrl = <factory>)[source]

Bases: object

gamma: float = 1.0
lam: float = 1.0
adv_estimator: str = 'gae'
norm_adv_by_std_in_grpo: bool = True
use_kl_in_reward: bool = False
kl_penalty: str = 'kl'
kl_ctrl: KL_Ctrl
__init__(gamma: float = 1.0, lam: float = 1.0, adv_estimator: str = 'gae', norm_adv_by_std_in_grpo: bool = True, use_kl_in_reward: bool = False, kl_penalty: str = 'kl', kl_ctrl: ~trinity.common.verl_config.KL_Ctrl = <factory>) None
class trinity.common.verl_config.Trainer(balance_batch: bool = True, total_epochs: int = 30, total_training_steps: Optional[int] = None, project_name: str = '', experiment_name: str = '', logger: List[str] = <factory>, val_generations_to_log_to_wandb: int = 0, nnodes: int = 0, n_gpus_per_node: int = 0, save_freq: int = 0, resume_mode: str = 'auto', resume_from_path: str = '', test_freq: int = 0, critic_warmup: int = 0, default_hdfs_dir: Optional[str] = None, remove_previous_ckpt_in_save: bool = False, del_local_ckpt_after_load: bool = False, default_local_dir: str = '', val_before_train: bool = False, training_rollout_mode: str = 'parallel', enable_exp_buffer: bool = True, sync_freq: int = 0, sft_warmup_steps: int = 0, max_actor_ckpt_to_keep: Optional[int] = None, max_critic_ckpt_to_keep: Optional[int] = None)[source]

Bases: object

balance_batch: bool = True
total_epochs: int = 30
total_training_steps: int | None = None
project_name: str = ''
experiment_name: str = ''
logger: List[str]
val_generations_to_log_to_wandb: int = 0
nnodes: int = 0
n_gpus_per_node: int = 0
save_freq: int = 0
resume_mode: str = 'auto'
resume_from_path: str = ''
test_freq: int = 0
critic_warmup: int = 0
default_hdfs_dir: str | None = None
remove_previous_ckpt_in_save: bool = False
del_local_ckpt_after_load: bool = False
default_local_dir: str = ''
val_before_train: bool = False
training_rollout_mode: str = 'parallel'
enable_exp_buffer: bool = True
sync_freq: int = 0
sft_warmup_steps: int = 0
max_actor_ckpt_to_keep: int | None = None
max_critic_ckpt_to_keep: int | None = None
__init__(balance_batch: bool = True, total_epochs: int = 30, total_training_steps: int | None = None, project_name: str = '', experiment_name: str = '', logger: ~typing.List[str] = <factory>, val_generations_to_log_to_wandb: int = 0, nnodes: int = 0, n_gpus_per_node: int = 0, save_freq: int = 0, resume_mode: str = 'auto', resume_from_path: str = '', test_freq: int = 0, critic_warmup: int = 0, default_hdfs_dir: str | None = None, remove_previous_ckpt_in_save: bool = False, del_local_ckpt_after_load: bool = False, default_local_dir: str = '', val_before_train: bool = False, training_rollout_mode: str = 'parallel', enable_exp_buffer: bool = True, sync_freq: int = 0, sft_warmup_steps: int = 0, max_actor_ckpt_to_keep: int | None = None, max_critic_ckpt_to_keep: int | None = None) None
class trinity.common.verl_config.veRLConfig(data: trinity.common.verl_config.Data = <factory>, actor_rollout_ref: trinity.common.verl_config.ActorRolloutRef = <factory>, critic: trinity.common.verl_config.Critic = <factory>, reward_model: trinity.common.verl_config.RewardModel = <factory>, custom_reward_function: trinity.common.verl_config.CustomRewardFunction = <factory>, algorithm: trinity.common.verl_config.Algorithm = <factory>, trainer: trinity.common.verl_config.Trainer = <factory>, buffer: trinity.common.config.BufferConfig = <factory>, synchronizer: Optional[trinity.common.config.SynchronizerConfig] = None, enable_preview: bool = True)[source]

Bases: object

data: Data
actor_rollout_ref: ActorRolloutRef
critic: Critic
reward_model: RewardModel
custom_reward_function: CustomRewardFunction
algorithm: Algorithm
trainer: Trainer
buffer: BufferConfig
synchronizer: SynchronizerConfig | None = None
enable_preview: bool = True
synchronize_config(config: Config) None[source]

Synchronize config.

__init__(data: ~trinity.common.verl_config.Data = <factory>, actor_rollout_ref: ~trinity.common.verl_config.ActorRolloutRef = <factory>, critic: ~trinity.common.verl_config.Critic = <factory>, reward_model: ~trinity.common.verl_config.RewardModel = <factory>, custom_reward_function: ~trinity.common.verl_config.CustomRewardFunction = <factory>, algorithm: ~trinity.common.verl_config.Algorithm = <factory>, trainer: ~trinity.common.verl_config.Trainer = <factory>, buffer: ~trinity.common.config.BufferConfig = <factory>, synchronizer: ~trinity.common.config.SynchronizerConfig | None = None, enable_preview: bool = True) None
trinity.common.verl_config.load_config(config_path: str) veRLConfig[source]

Module contents