trinity.common
Subpackages
- trinity.common.models
- Submodules
- trinity.common.models.model module
InferenceModel
InferenceModel.generate()
InferenceModel.chat()
InferenceModel.logprobs()
InferenceModel.convert_messages_to_experience()
InferenceModel.generate_async()
InferenceModel.chat_async()
InferenceModel.logprobs_async()
InferenceModel.convert_messages_to_experience_async()
InferenceModel.get_ckp_version()
InferenceModel.get_available_address()
ModelWrapper
- trinity.common.models.openai_api module
- trinity.common.models.utils module
- trinity.common.models.vllm_async_model module
vLLMAysncRolloutModel
vLLMAysncRolloutModel.__init__()
vLLMAysncRolloutModel.chat_async()
vLLMAysncRolloutModel.generate_async()
vLLMAysncRolloutModel.logprobs_async()
vLLMAysncRolloutModel.convert_messages_to_experience_async()
vLLMAysncRolloutModel.shutdown()
vLLMAysncRolloutModel.sync_model()
vLLMAysncRolloutModel.init_process_group()
vLLMAysncRolloutModel.update_weight()
vLLMAysncRolloutModel.run_api_server()
vLLMAysncRolloutModel.has_api_server()
vLLMAysncRolloutModel.api_server_ready()
vLLMAysncRolloutModel.reset_prefix_cache()
vLLMAysncRolloutModel.get_ckp_version()
vLLMAysncRolloutModel.sleep()
vLLMAysncRolloutModel.wake_up()
- trinity.common.models.vllm_model module
vLLMRolloutModel
vLLMRolloutModel.__init__()
vLLMRolloutModel.init_process_group()
vLLMRolloutModel.update_weight()
vLLMRolloutModel.reset_prefix_cache()
vLLMRolloutModel.sleep()
vLLMRolloutModel.wake_up()
vLLMRolloutModel.generate()
vLLMRolloutModel.chat()
vLLMRolloutModel.logprobs()
vLLMRolloutModel.convert_messages_to_experience()
vLLMRolloutModel.has_api_server()
vLLMRolloutModel.sync_model()
vLLMRolloutModel.get_ckp_version()
- trinity.common.models.vllm_worker module
- Module contents
- trinity.common.rewards
- Submodules
- trinity.common.rewards.accuracy_reward module
- trinity.common.rewards.agents_reward module
- trinity.common.rewards.base module
- trinity.common.rewards.composite_reward module
- trinity.common.rewards.format_reward module
- trinity.common.rewards.human_reward module
- trinity.common.rewards.reward_fn module
- trinity.common.rewards.tool_reward module
- Module contents
- trinity.common.workflows
Submodules
trinity.common.config module
Configs for RFT.
- class trinity.common.config.FormatConfig(prompt_type: PromptType = PromptType.MESSAGES, prompt_key: str = 'prompt', response_key: str = 'response', messages_key: str = 'message', chat_template: str = '', system_prompt: str | None = None, reply_prefix: str | None = None, reward_fn_key: str = '', workflow_key: str = '', solution_key: str = '', reward_key: str = '', chosen_key: str = 'chosen', rejected_key: str = 'rejected', label_key: str = '')[source]
Bases:
object
Configuration for data formatting
- prompt_type: PromptType = 'messages'
- prompt_key: str = 'prompt'
- response_key: str = 'response'
- messages_key: str = 'message'
- chat_template: str = ''
- system_prompt: str | None = None
- reply_prefix: str | None = None
- reward_fn_key: str = ''
- workflow_key: str = ''
- solution_key: str = ''
- reward_key: str = ''
- chosen_key: str = 'chosen'
- rejected_key: str = 'rejected'
- label_key: str = ''
- __init__(prompt_type: PromptType = PromptType.MESSAGES, prompt_key: str = 'prompt', response_key: str = 'response', messages_key: str = 'message', chat_template: str = '', system_prompt: str | None = None, reply_prefix: str | None = None, reward_fn_key: str = '', workflow_key: str = '', solution_key: str = '', reward_key: str = '', chosen_key: str = 'chosen', rejected_key: str = 'rejected', label_key: str = '') None
- class trinity.common.config.GenerationConfig(temperature: float = 1.0, top_p: float = 1.0, top_k: int = -1, logprobs: int = 0, n: int = 1)[source]
Bases:
object
- temperature: float = 1.0
- top_p: float = 1.0
- top_k: int = -1
- logprobs: int = 0
- n: int = 1
- __init__(temperature: float = 1.0, top_p: float = 1.0, top_k: int = -1, logprobs: int = 0, n: int = 1) None
- class trinity.common.config.StorageConfig(name: str = '', storage_type: ~trinity.common.constants.StorageType = StorageType.FILE, path: str | None = None, split: str = 'train', subset_name: str | None = None, format: ~trinity.common.config.FormatConfig = <factory>, index: int = 0, default_workflow_type: str | None = None, default_reward_fn_type: str | None = None, rollout_args: ~trinity.common.config.GenerationConfig = <factory>, algorithm_type: ~trinity.common.constants.AlgorithmType | None = None, total_epochs: int = 1, task_type: ~trinity.common.constants.TaskType = TaskType.EXPLORE)[source]
Bases:
object
Storage config.
- name: str = ''
- storage_type: StorageType = 'file'
- path: str | None = None
- split: str = 'train'
- subset_name: str | None = None
- format: FormatConfig
- index: int = 0
- default_workflow_type: str | None = None
- default_reward_fn_type: str | None = None
- rollout_args: GenerationConfig
- algorithm_type: AlgorithmType | None = None
- total_epochs: int = 1
- __init__(name: str = '', storage_type: ~trinity.common.constants.StorageType = StorageType.FILE, path: str | None = None, split: str = 'train', subset_name: str | None = None, format: ~trinity.common.config.FormatConfig = <factory>, index: int = 0, default_workflow_type: str | None = None, default_reward_fn_type: str | None = None, rollout_args: ~trinity.common.config.GenerationConfig = <factory>, algorithm_type: ~trinity.common.constants.AlgorithmType | None = None, total_epochs: int = 1, task_type: ~trinity.common.constants.TaskType = TaskType.EXPLORE) None
- class trinity.common.config.DataProcessorConfig(data_workflow_url: str | None = None, source_data_path: str = '', format: ~trinity.common.config.FormatConfig = <factory>, load_kwargs: ~typing.Dict[str, ~typing.Any] = <factory>, dj_config_path: str | None = None, dj_process_desc: str | None = None, agent_model_name: str | None = None, agent_model_config: ~typing.Dict[str, ~typing.Any] | None = None, clean_strategy: str = 'iterative', min_size_ratio: float | None = None, min_priority_score: float | None = 0.0, priority_weights: ~typing.Dict[str, float] | None = None, data_dist: str | None = 'gaussian', db_url: str = '', max_retry_times: int = 3, max_retry_interval: int = 1)[source]
Bases:
object
Data-Juicer config
- data_workflow_url: str | None = None
- source_data_path: str = ''
- format: FormatConfig
- load_kwargs: Dict[str, Any]
- dj_config_path: str | None = None
- dj_process_desc: str | None = None
- agent_model_name: str | None = None
- agent_model_config: Dict[str, Any] | None = None
- clean_strategy: str = 'iterative'
- min_size_ratio: float | None = None
- min_priority_score: float | None = 0.0
- priority_weights: Dict[str, float] | None = None
- data_dist: str | None = 'gaussian'
- db_url: str = ''
- max_retry_times: int = 3
- max_retry_interval: int = 1
- __init__(data_workflow_url: str | None = None, source_data_path: str = '', format: ~trinity.common.config.FormatConfig = <factory>, load_kwargs: ~typing.Dict[str, ~typing.Any] = <factory>, dj_config_path: str | None = None, dj_process_desc: str | None = None, agent_model_name: str | None = None, agent_model_config: ~typing.Dict[str, ~typing.Any] | None = None, clean_strategy: str = 'iterative', min_size_ratio: float | None = None, min_priority_score: float | None = 0.0, priority_weights: ~typing.Dict[str, float] | None = None, data_dist: str | None = 'gaussian', db_url: str = '', max_retry_times: int = 3, max_retry_interval: int = 1) None
- class trinity.common.config.ModelConfig(model_path: str = '', critic_model_path: str = '', max_prompt_tokens: int | None = None, max_response_tokens: int | None = None)[source]
Bases:
object
- model_path: str = ''
- critic_model_path: str = ''
- max_prompt_tokens: int | None = None
- max_response_tokens: int | None = None
- __init__(model_path: str = '', critic_model_path: str = '', max_prompt_tokens: int | None = None, max_response_tokens: int | None = None) None
- class trinity.common.config.InferenceModelConfig(model_path: str = '', engine_type: str = 'vllm_async', engine_num: int = 1, tensor_parallel_size: int = 1, use_v1: bool = True, enforce_eager: bool = True, enable_prefix_caching: bool = False, enable_chunked_prefill: bool = False, gpu_memory_utilization: float = 0.9, dtype: str = 'bfloat16', seed: int = 42, max_prompt_tokens: int | None = None, max_response_tokens: int | None = None, chat_template: str | None = None, enable_thinking: bool = False, enable_openai_api: bool = False, bundle_indices: str = '')[source]
Bases:
object
- model_path: str = ''
- engine_type: str = 'vllm_async'
- engine_num: int = 1
- tensor_parallel_size: int = 1
- use_v1: bool = True
- enforce_eager: bool = True
- enable_prefix_caching: bool = False
- enable_chunked_prefill: bool = False
- gpu_memory_utilization: float = 0.9
- dtype: str = 'bfloat16'
- seed: int = 42
- max_prompt_tokens: int | None = None
- max_response_tokens: int | None = None
- chat_template: str | None = None
- enable_thinking: bool = False
- enable_openai_api: bool = False
- bundle_indices: str = ''
- __init__(model_path: str = '', engine_type: str = 'vllm_async', engine_num: int = 1, tensor_parallel_size: int = 1, use_v1: bool = True, enforce_eager: bool = True, enable_prefix_caching: bool = False, enable_chunked_prefill: bool = False, gpu_memory_utilization: float = 0.9, dtype: str = 'bfloat16', seed: int = 42, max_prompt_tokens: int | None = None, max_response_tokens: int | None = None, chat_template: str | None = None, enable_thinking: bool = False, enable_openai_api: bool = False, bundle_indices: str = '') None
- class trinity.common.config.AlgorithmConfig(algorithm_type: AlgorithmType = AlgorithmType.PPO, repeat_times: int = 1, gamma: float | None = None, lam: float | None = None)[source]
Bases:
object
Config for algorithm.
- algorithm_type: AlgorithmType = 'ppo'
- repeat_times: int = 1
- gamma: float | None = None
- lam: float | None = None
- __init__(algorithm_type: AlgorithmType = AlgorithmType.PPO, repeat_times: int = 1, gamma: float | None = None, lam: float | None = None) None
- class trinity.common.config.ClusterConfig(node_num: int = 1, gpu_per_node: int = 8)[source]
Bases:
object
Config for the cluster.
- node_num: int = 1
- gpu_per_node: int = 8
- __init__(node_num: int = 1, gpu_per_node: int = 8) None
- class trinity.common.config.ExplorerInput(taskset: ~trinity.common.config.StorageConfig = <factory>, eval_tasksets: ~typing.List[~trinity.common.config.StorageConfig] = <factory>, default_workflow_type: str | None = None, default_reward_fn_type: str | None = None, system_prompt: str | None = None, reply_prefix: str | None = None)[source]
Bases:
object
Config for explorer input.
- taskset: StorageConfig
- eval_tasksets: List[StorageConfig]
- default_workflow_type: str | None = None
- default_reward_fn_type: str | None = None
- system_prompt: str | None = None
- reply_prefix: str | None = None
- __init__(taskset: ~trinity.common.config.StorageConfig = <factory>, eval_tasksets: ~typing.List[~trinity.common.config.StorageConfig] = <factory>, default_workflow_type: str | None = None, default_reward_fn_type: str | None = None, system_prompt: str | None = None, reply_prefix: str | None = None) None
- class trinity.common.config.TrainerInput(experience_buffer: StorageConfig | None = None, sft_warmup_dataset: StorageConfig | None = None, read_experience_strategy: ReadStrategy | None = None, sft_warmup_steps: int = 0)[source]
Bases:
object
Config for trainer input.
- experience_buffer: StorageConfig | None = None
- sft_warmup_dataset: StorageConfig | None = None
- read_experience_strategy: ReadStrategy | None = None
- sft_warmup_steps: int = 0
- __init__(experience_buffer: StorageConfig | None = None, sft_warmup_dataset: StorageConfig | None = None, read_experience_strategy: ReadStrategy | None = None, sft_warmup_steps: int = 0) None
- class trinity.common.config.BufferConfig(batch_size: int = 1, total_epochs: int = 1, explorer_input: ~trinity.common.config.ExplorerInput = <factory>, explorer_output: ~trinity.common.config.StorageConfig | None = None, trainer_input: ~trinity.common.config.TrainerInput = <factory>, max_retry_times: int = 3, max_retry_interval: int = 1, read_batch_size: int = 1, tokenizer_path: str | None = None, pad_token_id: int | None = None)[source]
Bases:
object
Config for buffer.
- batch_size: int = 1
- total_epochs: int = 1
- explorer_input: ExplorerInput
- explorer_output: StorageConfig | None = None
- trainer_input: TrainerInput
- max_retry_times: int = 3
- max_retry_interval: int = 1
- read_batch_size: int = 1
- tokenizer_path: str | None = None
- pad_token_id: int | None = None
- __init__(batch_size: int = 1, total_epochs: int = 1, explorer_input: ~trinity.common.config.ExplorerInput = <factory>, explorer_output: ~trinity.common.config.StorageConfig | None = None, trainer_input: ~trinity.common.config.TrainerInput = <factory>, max_retry_times: int = 3, max_retry_interval: int = 1, read_batch_size: int = 1, tokenizer_path: str | None = None, pad_token_id: int | None = None) None
- class trinity.common.config.ExplorerConfig(runner_num: int = 1, max_timeout: int = 900, max_retry_times: int = 2, rollout_model: ~trinity.common.config.InferenceModelConfig = <factory>, auxiliary_models: ~typing.List[~trinity.common.config.InferenceModelConfig] = <factory>, eval_interval: int = 100, eval_on_latest_checkpoint: bool = False)[source]
Bases:
object
Config for explorer.
- runner_num: int = 1
- max_timeout: int = 900
- max_retry_times: int = 2
- rollout_model: InferenceModelConfig
- auxiliary_models: List[InferenceModelConfig]
- eval_interval: int = 100
- eval_on_latest_checkpoint: bool = False
- __init__(runner_num: int = 1, max_timeout: int = 900, max_retry_times: int = 2, rollout_model: ~trinity.common.config.InferenceModelConfig = <factory>, auxiliary_models: ~typing.List[~trinity.common.config.InferenceModelConfig] = <factory>, eval_interval: int = 100, eval_on_latest_checkpoint: bool = False) None
- class trinity.common.config.TrainerConfig(trainer_type: str = 'verl', save_interval: int = 0, enable_preview: bool = True, actor_use_kl_loss: Optional[bool] = None, actor_kl_loss_coef: Optional[float] = None, actor_entropy_coef: Optional[float] = None, actor_grad_clip: Optional[float] = None, actor_clip_ratio: Optional[float] = None, trainer_config: Any = <factory>, trainer_config_path: str = '')[source]
Bases:
object
- trainer_type: str = 'verl'
- save_interval: int = 0
- enable_preview: bool = True
- actor_use_kl_loss: bool | None = None
- actor_kl_loss_coef: float | None = None
- actor_entropy_coef: float | None = None
- actor_grad_clip: float | None = None
- actor_clip_ratio: float | None = None
- trainer_config: Any
- trainer_config_path: str = ''
- __init__(trainer_type: str = 'verl', save_interval: int = 0, enable_preview: bool = True, actor_use_kl_loss: bool | None = None, actor_kl_loss_coef: float | None = None, actor_entropy_coef: float | None = None, actor_grad_clip: float | None = None, actor_clip_ratio: float | None = None, trainer_config: ~typing.Any = <factory>, trainer_config_path: str = '') None
- class trinity.common.config.MonitorConfig(monitor_type: str = 'tensorboard', monitor_args: Dict = <factory>, cache_dir: str = '')[source]
Bases:
object
- monitor_type: str = 'tensorboard'
- monitor_args: Dict
- cache_dir: str = ''
- __init__(monitor_type: str = 'tensorboard', monitor_args: ~typing.Dict = <factory>, cache_dir: str = '') None
- class trinity.common.config.SynchronizerConfig(sync_method: SyncMethod = SyncMethod.NCCL, sync_interval: int = 1, sync_timeout: int = 1200, wait_for_checkpoint: bool = False, explorer_world_size: int | None = None)[source]
Bases:
object
Configs for model weight synchronization
- sync_method: SyncMethod = 'nccl'
- sync_interval: int = 1
- sync_timeout: int = 1200
- wait_for_checkpoint: bool = False
- explorer_world_size: int | None = None
- __init__(sync_method: SyncMethod = SyncMethod.NCCL, sync_interval: int = 1, sync_timeout: int = 1200, wait_for_checkpoint: bool = False, explorer_world_size: int | None = None) None
- class trinity.common.config.Config(mode: str = 'both', project: str = 'Trinity-RFT', name: str = 'rft', checkpoint_root_dir: str = '', checkpoint_job_dir: str = '', algorithm: ~trinity.common.config.AlgorithmConfig = <factory>, data_processor: ~trinity.common.config.DataProcessorConfig = <factory>, model: ~trinity.common.config.ModelConfig = <factory>, cluster: ~trinity.common.config.ClusterConfig = <factory>, buffer: ~trinity.common.config.BufferConfig = <factory>, explorer: ~trinity.common.config.ExplorerConfig = <factory>, trainer: ~trinity.common.config.TrainerConfig = <factory>, monitor: ~trinity.common.config.MonitorConfig = <factory>, synchronizer: ~trinity.common.config.SynchronizerConfig = <factory>)[source]
Bases:
object
Global Configuration
- mode: str = 'both'
- project: str = 'Trinity-RFT'
- name: str = 'rft'
- checkpoint_root_dir: str = ''
- checkpoint_job_dir: str = ''
- algorithm: AlgorithmConfig
- data_processor: DataProcessorConfig
- model: ModelConfig
- cluster: ClusterConfig
- buffer: BufferConfig
- explorer: ExplorerConfig
- trainer: TrainerConfig
- monitor: MonitorConfig
- synchronizer: SynchronizerConfig
- __init__(mode: str = 'both', project: str = 'Trinity-RFT', name: str = 'rft', checkpoint_root_dir: str = '', checkpoint_job_dir: str = '', algorithm: ~trinity.common.config.AlgorithmConfig = <factory>, data_processor: ~trinity.common.config.DataProcessorConfig = <factory>, model: ~trinity.common.config.ModelConfig = <factory>, cluster: ~trinity.common.config.ClusterConfig = <factory>, buffer: ~trinity.common.config.BufferConfig = <factory>, explorer: ~trinity.common.config.ExplorerConfig = <factory>, trainer: ~trinity.common.config.TrainerConfig = <factory>, monitor: ~trinity.common.config.MonitorConfig = <factory>, synchronizer: ~trinity.common.config.SynchronizerConfig = <factory>) None
trinity.common.constants module
Constants.
- class trinity.common.constants.CaseInsensitiveEnumMeta(cls, bases, classdict, **kwds)[source]
Bases:
EnumMeta
- class trinity.common.constants.CaseInsensitiveEnum(value, *args, **kwargs)[source]
Bases:
Enum
An enumeration.
- class trinity.common.constants.PromptType(value, *args, **kwargs)[source]
Bases:
CaseInsensitiveEnum
Prompt Type.
- MESSAGES = 'messages'
- CHATPAIR = 'chatpair'
- PLAINTEXT = 'plaintext'
- class trinity.common.constants.TaskType(value)[source]
Bases:
Enum
Task Type.
- EXPLORE = 0
- EVAL = 1
- class trinity.common.constants.ReadStrategy(value, *args, **kwargs)[source]
Bases:
CaseInsensitiveEnum
Pop Strategy.
- DEFAULT = None
- FIFO = 'fifo'
- RANDOM = 'random'
- LRU = 'lru'
- LFU = 'lfu'
- PRIORITY = 'priority'
- class trinity.common.constants.StorageType(value, *args, **kwargs)[source]
Bases:
CaseInsensitiveEnum
Storage Type.
- SQL = 'sql'
- QUEUE = 'queue'
- FILE = 'file'
- class trinity.common.constants.AlgorithmType(value, *args, **kwargs)[source]
Bases:
CaseInsensitiveEnum
Algorithm Type.
- SFT = 'sft'
- PPO = 'ppo'
- GRPO = 'grpo'
- OPMD = 'opmd'
- PAIRWISE_OPMD = 'pairwise_opmd'
- DPO = 'dpo'
- class trinity.common.constants.MonitorType(value, *args, **kwargs)[source]
Bases:
CaseInsensitiveEnum
Monitor Type.
- WANDB = 'wandb'
- TENSORBOARD = 'tensorboard'
- class trinity.common.constants.SyncMethodEnumMeta(cls, bases, classdict, **kwds)[source]
Bases:
CaseInsensitiveEnumMeta
- class trinity.common.constants.SyncMethod(value, *args, **kwargs)[source]
Bases:
CaseInsensitiveEnum
Sync Method.
- NCCL = 'nccl'
- CHECKPOINT = 'checkpoint'
trinity.common.experience module
Experience Class.
- class trinity.common.experience.Experience(tokens: Tensor, prompt_length: int, logprobs: Tensor | None = None, reward: float | None = None, prompt_text: str | None = None, response_text: str | None = None, action_mask: Tensor | None = None, chosen: Tensor | None = None, rejected: Tensor | None = None, info: dict | None = None, metrics: dict[str, float] | None = None, run_id: str = '')[source]
Bases:
object
A single experience.
- tokens: Tensor
- prompt_length: int
- logprobs: Tensor | None = None
- reward: float | None = None
- prompt_text: str | None = None
- response_text: str | None = None
- action_mask: Tensor | None = None
- chosen: Tensor | None = None
- rejected: Tensor | None = None
- info: dict | None = None
- metrics: dict[str, float] | None = None
- run_id: str = ''
- static deserialize(data: bytes) Experience [source]
Deserialize the experience from bytes.
- __init__(tokens: Tensor, prompt_length: int, logprobs: Tensor | None = None, reward: float | None = None, prompt_text: str | None = None, response_text: str | None = None, action_mask: Tensor | None = None, chosen: Tensor | None = None, rejected: Tensor | None = None, info: dict | None = None, metrics: dict[str, float] | None = None, run_id: str = '') None
- class trinity.common.experience.Experiences(tokens: Tensor, rewards: Tensor, attention_masks: Tensor, action_masks: Tensor | None, prompt_length: int, logprobs: Tensor | None, run_ids: List[str])[source]
Bases:
object
A container for a batch of experiences, for high performance communication usage.
Example
>>> |<- prompt_length ->| | >>> tokens: ('P' represents prompt, 'O' represents output) >>> exp1: |........PPPPPPPPPPP|OOOOOOOOOO.....| >>> exp2: |......PPPPPPPPPPPPP|OOOOOOO........| >>> >>> attention_masks: ('.' represents False and '1' represents True) >>> exp1: |........11111111111|1111111111.....| >>> exp2: |......1111111111111|1111111........|
- tokens: Tensor
- rewards: Tensor
- attention_masks: Tensor
- action_masks: Tensor | None
- prompt_length: int
- logprobs: Tensor | None
- run_ids: List[str]
- property batch_size: int
Get the batch size.
- classmethod gather_experiences(experiences: list[Experience], pad_token_id: int = 0) Experiences [source]
Gather a batch of experiences from a list of experiences.
This method will automatically pad the tokens and logprobs of input experiences to the same length.
- classmethod gather_dpo_experiences(experiences: list[Experience], pad_token_id: int = 0) Experiences [source]
Gather a batch of dpo experiences from a list of experiences.
Reference: https://github.com/huggingface/trl/blob/main/trl/trainer/dpo_trainer.py#L849
- Note: We arrange inputs in the order of (chosen, rejected, chosen, rejected, …)
to ensure that each pair of (chosen, rejected) is not split by subsequent operations
- Parameters:
Experiences – (list[Experience]) - “prompt”: token ids of the prompt - “chosen”: token ids of the chosen response - “rejected”: token ids of the rejected response
pad_token_id – (int) The pad token id.
- Returns:
“tokens”: Concatenated chosen and rejected completion input IDs of shape (2 * batch_size, max_completion_length).
”attention_masks”: Concatenated chosen and rejected attention masks of shape (2 * batch_size, max_completion_length).
- Return type:
- __init__(tokens: Tensor, rewards: Tensor, attention_masks: Tensor, action_masks: Tensor | None, prompt_length: int, logprobs: Tensor | None, run_ids: List[str]) None
trinity.common.schema module
Schema for different types of data.
- class trinity.common.schema.RftDatasetModel(**kwargs)[source]
Bases:
Base
SQLAlchemy model for RftDataset.
- id
- consumed_cnt
- last_modified_date
- from_id
- from_model
- from_recipe
- prompt
- response
- solution
- reward
- chosen
- rejected
- label
- quality_score
- quality_score_detail
- difficulty_score
- difficulty_score_detail
- diversity_score
- diversity_score_detail
- priority
- reward_fn
- workflow
- __init__(**kwargs)
A simple constructor that allows initialization from kwargs.
Sets attributes on the constructed instance using the names and values in
kwargs
.Only keys that are present as attributes of the instance’s class are allowed. These could be, for example, any mapped columns or relationships.
- class trinity.common.schema.TaskModel(**kwargs)[source]
Bases:
Base
SQLAlchemy model for Task.
- id
- task_desc
- workflow_type
- reward_type
- __init__(**kwargs)
A simple constructor that allows initialization from kwargs.
Sets attributes on the constructed instance using the names and values in
kwargs
.Only keys that are present as attributes of the instance’s class are allowed. These could be, for example, any mapped columns or relationships.
- class trinity.common.schema.ExperienceModel(**kwargs)[source]
Bases:
Base
SQLAlchemy model for Experience.
- id
- serialized_exp
- prompt
- response
- reward
- consumed
- priority
- to_experience() Experience [source]
Load the experience from the database.
- static from_experience(experience: Experience)[source]
Save the experience to database.
- __init__(**kwargs)
A simple constructor that allows initialization from kwargs.
Sets attributes on the constructed instance using the names and values in
kwargs
.Only keys that are present as attributes of the instance’s class are allowed. These could be, for example, any mapped columns or relationships.
- class trinity.common.schema.SFTDataModel(**kwargs)[source]
Bases:
Base
SQLAlchemy model for SFT data.
- id
- serialized_exp
- messages
- consumed
- to_experience() Experience [source]
Load the experience from the database.
- classmethod from_messages(messages: list[dict], tokenizer: Any, chat_template: str | None = None) SFTDataModel [source]
Convert a list of messages into a single instance of SFT data.
- __init__(**kwargs)
A simple constructor that allows initialization from kwargs.
Sets attributes on the constructed instance using the names and values in
kwargs
.Only keys that are present as attributes of the instance’s class are allowed. These could be, for example, any mapped columns or relationships.
- class trinity.common.schema.DPODataModel(**kwargs)[source]
Bases:
Base
SQLAlchemy model for DPO data.
- id
- serialized_exp
- chosen
- rejected
- consumed
- to_experience() Experience [source]
Load the experience from the database.
- __init__(**kwargs)
A simple constructor that allows initialization from kwargs.
Sets attributes on the constructed instance using the names and values in
kwargs
.Only keys that are present as attributes of the instance’s class are allowed. These could be, for example, any mapped columns or relationships.
trinity.common.verl_config module
- class trinity.common.verl_config.Data(train_batch_size: int = 1024)[source]
Bases:
object
- train_batch_size: int = 1024
- __init__(train_batch_size: int = 1024) None
- class trinity.common.verl_config.ActorModel(path: str = '', external_lib: Optional[str] = None, override_config: Dict[str, Any] = <factory>, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False)[source]
Bases:
object
- path: str = ''
- external_lib: str | None = None
- override_config: Dict[str, Any]
- enable_gradient_checkpointing: bool = True
- use_remove_padding: bool = False
- __init__(path: str = '', external_lib: str | None = None, override_config: ~typing.Dict[str, ~typing.Any] = <factory>, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False) None
- class trinity.common.verl_config.Optim(lr: float = 1e-06, lr_warmup_steps: int = -1, lr_warmup_steps_ratio: float = 0.0, min_lr_ratio: float | None = 0.0, warmup_style: str = 'constant', total_training_steps: int = -1, beta1: float = 0.9, beta2: float = 0.999)[source]
Bases:
object
- lr: float = 1e-06
- lr_warmup_steps: int = -1
- lr_warmup_steps_ratio: float = 0.0
- min_lr_ratio: float | None = 0.0
- warmup_style: str = 'constant'
- total_training_steps: int = -1
- beta1: float = 0.9
- beta2: float = 0.999
- __init__(lr: float = 1e-06, lr_warmup_steps: int = -1, lr_warmup_steps_ratio: float = 0.0, min_lr_ratio: float | None = 0.0, warmup_style: str = 'constant', total_training_steps: int = -1, beta1: float = 0.9, beta2: float = 0.999) None
- class trinity.common.verl_config.WrapPolicy(min_num_params: int = 0)[source]
Bases:
object
- min_num_params: int = 0
- __init__(min_num_params: int = 0) None
- class trinity.common.verl_config.FSDPConfig(wrap_policy: trinity.common.verl_config.WrapPolicy = <factory>, min_num_params: int = 0, param_offload: bool = False, optimizer_offload: bool = False, fsdp_size: int = -1)[source]
Bases:
object
- wrap_policy: WrapPolicy
- min_num_params: int = 0
- param_offload: bool = False
- optimizer_offload: bool = False
- fsdp_size: int = -1
- __init__(wrap_policy: ~trinity.common.verl_config.WrapPolicy = <factory>, min_num_params: int = 0, param_offload: bool = False, optimizer_offload: bool = False, fsdp_size: int = -1) None
- class trinity.common.verl_config.Checkpoint(contents: List[str] = <factory>)[source]
Bases:
object
- contents: List[str]
- __init__(contents: ~typing.List[str] = <factory>) None
- class trinity.common.verl_config.Actor(strategy: str = 'fsdp', ppo_mini_batch_size: int = 256, ppo_micro_batch_size: Optional[int] = None, ppo_micro_batch_size_per_gpu: int = 1, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 16384, grad_clip: float = 1.0, clip_ratio: float = 0.2, entropy_coeff: float = 0.001, use_kl_loss: bool = False, kl_loss_coef: float = 0.001, kl_loss_type: str = 'low_var_kl', ppo_epochs: int = 1, shuffle: bool = False, ulysses_sequence_parallel_size: int = 1, checkpoint: trinity.common.verl_config.Checkpoint = <factory>, optim: trinity.common.verl_config.Optim = <factory>, fsdp_config: trinity.common.verl_config.FSDPConfig = <factory>, algorithm_type: trinity.common.constants.AlgorithmType = <AlgorithmType.PPO: 'ppo'>, tau: float = 0.001, opmd_baseline: str = 'mean', use_uid: bool = False)[source]
Bases:
object
- strategy: str = 'fsdp'
- ppo_mini_batch_size: int = 256
- ppo_micro_batch_size: int | None = None
- ppo_micro_batch_size_per_gpu: int = 1
- use_dynamic_bsz: bool = False
- ppo_max_token_len_per_gpu: int = 16384
- grad_clip: float = 1.0
- clip_ratio: float = 0.2
- entropy_coeff: float = 0.001
- use_kl_loss: bool = False
- kl_loss_coef: float = 0.001
- kl_loss_type: str = 'low_var_kl'
- ppo_epochs: int = 1
- shuffle: bool = False
- ulysses_sequence_parallel_size: int = 1
- checkpoint: Checkpoint
- fsdp_config: FSDPConfig
- algorithm_type: AlgorithmType = 'ppo'
- tau: float = 0.001
- opmd_baseline: str = 'mean'
- use_uid: bool = False
- __init__(strategy: str = 'fsdp', ppo_mini_batch_size: int = 256, ppo_micro_batch_size: int | None = None, ppo_micro_batch_size_per_gpu: int = 1, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 16384, grad_clip: float = 1.0, clip_ratio: float = 0.2, entropy_coeff: float = 0.001, use_kl_loss: bool = False, kl_loss_coef: float = 0.001, kl_loss_type: str = 'low_var_kl', ppo_epochs: int = 1, shuffle: bool = False, ulysses_sequence_parallel_size: int = 1, checkpoint: ~trinity.common.verl_config.Checkpoint = <factory>, optim: ~trinity.common.verl_config.Optim = <factory>, fsdp_config: ~trinity.common.verl_config.FSDPConfig = <factory>, algorithm_type: ~trinity.common.constants.AlgorithmType = AlgorithmType.PPO, tau: float = 0.001, opmd_baseline: str = 'mean', use_uid: bool = False) None
- class trinity.common.verl_config.Ref(fsdp_config: trinity.common.verl_config.FSDPConfig = <factory>, log_prob_micro_batch_size: Optional[int] = None, log_prob_micro_batch_size_per_gpu: int = 1, log_prob_use_dynamic_bsz: bool = False, log_prob_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1)[source]
Bases:
object
- fsdp_config: FSDPConfig
- log_prob_micro_batch_size: int | None = None
- log_prob_micro_batch_size_per_gpu: int = 1
- log_prob_use_dynamic_bsz: bool = False
- log_prob_max_token_len_per_gpu: int = 0
- ulysses_sequence_parallel_size: int = 1
- __init__(fsdp_config: ~trinity.common.verl_config.FSDPConfig = <factory>, log_prob_micro_batch_size: int | None = None, log_prob_micro_batch_size_per_gpu: int = 1, log_prob_use_dynamic_bsz: bool = False, log_prob_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1) None
- class trinity.common.verl_config.Rollout(temperature: float = 1.0, n: int = 1)[source]
Bases:
object
- temperature: float = 1.0
- n: int = 1
- __init__(temperature: float = 1.0, n: int = 1) None
- class trinity.common.verl_config.ActorRolloutRef(hybrid_engine: bool = True, model: trinity.common.verl_config.ActorModel = <factory>, actor: trinity.common.verl_config.Actor = <factory>, ref: trinity.common.verl_config.Ref = <factory>, rollout: trinity.common.verl_config.Rollout = <factory>, synchronizer: Optional[trinity.common.config.SynchronizerConfig] = None)[source]
Bases:
object
- hybrid_engine: bool = True
- model: ActorModel
- synchronizer: SynchronizerConfig | None = None
- __init__(hybrid_engine: bool = True, model: ~trinity.common.verl_config.ActorModel = <factory>, actor: ~trinity.common.verl_config.Actor = <factory>, ref: ~trinity.common.verl_config.Ref = <factory>, rollout: ~trinity.common.verl_config.Rollout = <factory>, synchronizer: ~trinity.common.config.SynchronizerConfig | None = None) None
- class trinity.common.verl_config.CriticModel(path: str = '', tokenizer_path: str = '', override_config: Dict[str, str] = <factory>, external_lib: Optional[str] = None, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False, fsdp_config: trinity.common.verl_config.FSDPConfig = <factory>)[source]
Bases:
object
- path: str = ''
- tokenizer_path: str = ''
- override_config: Dict[str, str]
- external_lib: str | None = None
- enable_gradient_checkpointing: bool = True
- use_remove_padding: bool = False
- fsdp_config: FSDPConfig
- __init__(path: str = '', tokenizer_path: str = '', override_config: ~typing.Dict[str, str] = <factory>, external_lib: str | None = None, enable_gradient_checkpointing: bool = True, use_remove_padding: bool = False, fsdp_config: ~trinity.common.verl_config.FSDPConfig = <factory>) None
- class trinity.common.verl_config.Critic(strategy: str = 'fsdp', optim: trinity.common.verl_config.Optim = <factory>, model: trinity.common.verl_config.CriticModel = <factory>, ppo_mini_batch_size: int = 0, ppo_micro_batch_size: Optional[int] = None, ppo_micro_batch_size_per_gpu: int = 1, forward_micro_batch_size: Optional[int] = None, forward_micro_batch_size_per_gpu: Optional[int] = None, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 0, forward_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1, ppo_epochs: int = 0, shuffle: bool = False, grad_clip: float = 0.0, cliprange_value: float = 0.0, checkpoint: trinity.common.verl_config.Checkpoint = <factory>, rollout_n: int = 1)[source]
Bases:
object
- strategy: str = 'fsdp'
- model: CriticModel
- ppo_mini_batch_size: int = 0
- ppo_micro_batch_size: int | None = None
- ppo_micro_batch_size_per_gpu: int = 1
- forward_micro_batch_size: int | None = None
- forward_micro_batch_size_per_gpu: int | None = None
- use_dynamic_bsz: bool = False
- ppo_max_token_len_per_gpu: int = 0
- forward_max_token_len_per_gpu: int = 0
- ulysses_sequence_parallel_size: int = 1
- ppo_epochs: int = 0
- shuffle: bool = False
- grad_clip: float = 0.0
- cliprange_value: float = 0.0
- checkpoint: Checkpoint
- rollout_n: int = 1
- __init__(strategy: str = 'fsdp', optim: ~trinity.common.verl_config.Optim = <factory>, model: ~trinity.common.verl_config.CriticModel = <factory>, ppo_mini_batch_size: int = 0, ppo_micro_batch_size: int | None = None, ppo_micro_batch_size_per_gpu: int = 1, forward_micro_batch_size: int | None = None, forward_micro_batch_size_per_gpu: int | None = None, use_dynamic_bsz: bool = False, ppo_max_token_len_per_gpu: int = 0, forward_max_token_len_per_gpu: int = 0, ulysses_sequence_parallel_size: int = 1, ppo_epochs: int = 0, shuffle: bool = False, grad_clip: float = 0.0, cliprange_value: float = 0.0, checkpoint: ~trinity.common.verl_config.Checkpoint = <factory>, rollout_n: int = 1) None
- class trinity.common.verl_config.RewardModel(enable: bool = False, strategy: str = 'fsdp', model: trinity.common.verl_config._RewardModel = <factory>, micro_batch_size_per_gpu: int = 1, max_length: Optional[int] = None, ulysses_sequence_parallel_size: int = 1, use_dynamic_bsz: bool = False, forward_max_token_len_per_gpu: int = 0, reward_manager: str = 'naive')[source]
Bases:
object
- enable: bool = False
- strategy: str = 'fsdp'
- model: _RewardModel
- micro_batch_size_per_gpu: int = 1
- max_length: int | None = None
- ulysses_sequence_parallel_size: int = 1
- use_dynamic_bsz: bool = False
- forward_max_token_len_per_gpu: int = 0
- reward_manager: str = 'naive'
- __init__(enable: bool = False, strategy: str = 'fsdp', model: ~trinity.common.verl_config._RewardModel = <factory>, micro_batch_size_per_gpu: int = 1, max_length: int | None = None, ulysses_sequence_parallel_size: int = 1, use_dynamic_bsz: bool = False, forward_max_token_len_per_gpu: int = 0, reward_manager: str = 'naive') None
- class trinity.common.verl_config.CustomRewardFunction(path: str | None = None, name: str = 'compute_score')[source]
Bases:
object
- path: str | None = None
- name: str = 'compute_score'
- __init__(path: str | None = None, name: str = 'compute_score') None
- class trinity.common.verl_config.KL_Ctrl(type: str = 'fixed', kl_coef: float = 0.001, horizon: float = 10000, target_kl: float = 0.1)[source]
Bases:
object
- type: str = 'fixed'
- kl_coef: float = 0.001
- horizon: float = 10000
- target_kl: float = 0.1
- __init__(type: str = 'fixed', kl_coef: float = 0.001, horizon: float = 10000, target_kl: float = 0.1) None
- class trinity.common.verl_config.Algorithm(gamma: float = 1.0, lam: float = 1.0, adv_estimator: str = 'gae', norm_adv_by_std_in_grpo: bool = True, use_kl_in_reward: bool = False, kl_penalty: str = 'kl', kl_ctrl: trinity.common.verl_config.KL_Ctrl = <factory>)[source]
Bases:
object
- gamma: float = 1.0
- lam: float = 1.0
- adv_estimator: str = 'gae'
- norm_adv_by_std_in_grpo: bool = True
- use_kl_in_reward: bool = False
- kl_penalty: str = 'kl'
- __init__(gamma: float = 1.0, lam: float = 1.0, adv_estimator: str = 'gae', norm_adv_by_std_in_grpo: bool = True, use_kl_in_reward: bool = False, kl_penalty: str = 'kl', kl_ctrl: ~trinity.common.verl_config.KL_Ctrl = <factory>) None
- class trinity.common.verl_config.Trainer(balance_batch: bool = True, total_epochs: int = 30, total_training_steps: Optional[int] = None, project_name: str = '', experiment_name: str = '', logger: List[str] = <factory>, val_generations_to_log_to_wandb: int = 0, nnodes: int = 0, n_gpus_per_node: int = 0, save_freq: int = 0, resume_mode: str = 'auto', resume_from_path: str = '', test_freq: int = 0, critic_warmup: int = 0, default_hdfs_dir: Optional[str] = None, remove_previous_ckpt_in_save: bool = False, del_local_ckpt_after_load: bool = False, default_local_dir: str = '', val_before_train: bool = False, training_rollout_mode: str = 'parallel', enable_exp_buffer: bool = True, sync_freq: int = 0, sft_warmup_steps: int = 0, max_actor_ckpt_to_keep: Optional[int] = None, max_critic_ckpt_to_keep: Optional[int] = None)[source]
Bases:
object
- balance_batch: bool = True
- total_epochs: int = 30
- total_training_steps: int | None = None
- project_name: str = ''
- experiment_name: str = ''
- logger: List[str]
- val_generations_to_log_to_wandb: int = 0
- nnodes: int = 0
- n_gpus_per_node: int = 0
- save_freq: int = 0
- resume_mode: str = 'auto'
- resume_from_path: str = ''
- test_freq: int = 0
- critic_warmup: int = 0
- default_hdfs_dir: str | None = None
- remove_previous_ckpt_in_save: bool = False
- del_local_ckpt_after_load: bool = False
- default_local_dir: str = ''
- val_before_train: bool = False
- training_rollout_mode: str = 'parallel'
- enable_exp_buffer: bool = True
- sync_freq: int = 0
- sft_warmup_steps: int = 0
- max_actor_ckpt_to_keep: int | None = None
- max_critic_ckpt_to_keep: int | None = None
- __init__(balance_batch: bool = True, total_epochs: int = 30, total_training_steps: int | None = None, project_name: str = '', experiment_name: str = '', logger: ~typing.List[str] = <factory>, val_generations_to_log_to_wandb: int = 0, nnodes: int = 0, n_gpus_per_node: int = 0, save_freq: int = 0, resume_mode: str = 'auto', resume_from_path: str = '', test_freq: int = 0, critic_warmup: int = 0, default_hdfs_dir: str | None = None, remove_previous_ckpt_in_save: bool = False, del_local_ckpt_after_load: bool = False, default_local_dir: str = '', val_before_train: bool = False, training_rollout_mode: str = 'parallel', enable_exp_buffer: bool = True, sync_freq: int = 0, sft_warmup_steps: int = 0, max_actor_ckpt_to_keep: int | None = None, max_critic_ckpt_to_keep: int | None = None) None
- class trinity.common.verl_config.veRLConfig(data: trinity.common.verl_config.Data = <factory>, actor_rollout_ref: trinity.common.verl_config.ActorRolloutRef = <factory>, critic: trinity.common.verl_config.Critic = <factory>, reward_model: trinity.common.verl_config.RewardModel = <factory>, custom_reward_function: trinity.common.verl_config.CustomRewardFunction = <factory>, algorithm: trinity.common.verl_config.Algorithm = <factory>, trainer: trinity.common.verl_config.Trainer = <factory>, buffer: trinity.common.config.BufferConfig = <factory>, synchronizer: Optional[trinity.common.config.SynchronizerConfig] = None, enable_preview: bool = True)[source]
Bases:
object
- actor_rollout_ref: ActorRolloutRef
- reward_model: RewardModel
- custom_reward_function: CustomRewardFunction
- buffer: BufferConfig
- synchronizer: SynchronizerConfig | None = None
- enable_preview: bool = True
- __init__(data: ~trinity.common.verl_config.Data = <factory>, actor_rollout_ref: ~trinity.common.verl_config.ActorRolloutRef = <factory>, critic: ~trinity.common.verl_config.Critic = <factory>, reward_model: ~trinity.common.verl_config.RewardModel = <factory>, custom_reward_function: ~trinity.common.verl_config.CustomRewardFunction = <factory>, algorithm: ~trinity.common.verl_config.Algorithm = <factory>, trainer: ~trinity.common.verl_config.Trainer = <factory>, buffer: ~trinity.common.config.BufferConfig = <factory>, synchronizer: ~trinity.common.config.SynchronizerConfig | None = None, enable_preview: bool = True) None
- trinity.common.verl_config.load_config(config_path: str) veRLConfig [source]