trinity.algorithm.algorithm module#
Algorithm classes.
- class trinity.algorithm.algorithm.ConstantMeta(name, bases, namespace, **kwargs)[source]#
Bases:
ABCMeta
- class trinity.algorithm.algorithm.AlgorithmType[source]#
Bases:
ABC
- use_critic: bool#
- use_reference: bool#
- compute_advantage_in_trainer: bool#
- can_balance_batch: bool#
- schema: str#
- class trinity.algorithm.algorithm.SFTAlgorithm[source]#
Bases:
AlgorithmType
SFT Algorithm.
- use_critic: bool = False#
- use_reference: bool = False#
- compute_advantage_in_trainer: bool = False#
- can_balance_batch: bool = True#
- schema: str = 'sft'#
- class trinity.algorithm.algorithm.PPOAlgorithm[source]#
Bases:
AlgorithmType
PPO Algorithm.
- use_critic: bool = True#
- use_reference: bool = True#
- compute_advantage_in_trainer: bool = True#
- can_balance_batch: bool = True#
- schema: str = 'experience'#
- class trinity.algorithm.algorithm.GRPOAlgorithm[source]#
Bases:
AlgorithmType
GRPO algorithm.
- use_critic: bool = False#
- use_reference: bool = True#
- compute_advantage_in_trainer: bool = False#
- can_balance_batch: bool = True#
- schema: str = 'experience'#
- class trinity.algorithm.algorithm.OPMDAlgorithm[source]#
Bases:
AlgorithmType
OPMD algorithm.
- use_critic: bool = False#
- use_reference: bool = True#
- compute_advantage_in_trainer: bool = False#
- can_balance_batch: bool = True#
- schema: str = 'experience'#
- class trinity.algorithm.algorithm.AsymREAlgorithm[source]#
Bases:
AlgorithmType
AsymRE algorithm.
- use_critic: bool = False#
- use_reference: bool = False#
- compute_advantage_in_trainer: bool = False#
- can_balance_batch: bool = True#
- schema: str = 'experience'#
- class trinity.algorithm.algorithm.DPOAlgorithm[source]#
Bases:
AlgorithmType
DPO algorithm.
- use_critic: bool = False#
- use_reference: bool = True#
- compute_advantage_in_trainer: bool = False#
- can_balance_batch: bool = False#
- schema: str = 'dpo'#
- class trinity.algorithm.algorithm.TOPRAlgorithm[source]#
Bases:
AlgorithmType
TOPR algorithm. See https://arxiv.org/pdf/2503.14286v1
- use_critic: bool = False#
- use_reference: bool = True#
- compute_advantage_in_trainer: bool = False#
- can_balance_batch: bool = True#
- schema: str = 'experience'#
- class trinity.algorithm.algorithm.CISPOAlgorithm[source]#
Bases:
AlgorithmType
CISPO algorithm. See https://arxiv.org/abs/2506.13585
- use_critic: bool = False#
- use_reference: bool = True#
- compute_advantage_in_trainer: bool = False#
- can_balance_batch: bool = True#
- schema: str = 'experience'#
- class trinity.algorithm.algorithm.MIXAlgorithm[source]#
Bases:
AlgorithmType
MIX algorithm.
- use_critic: bool = False#
- use_reference: bool = True#
- compute_advantage_in_trainer: bool = False#
- use_rollout: bool = True#
- can_balance_batch: bool = True#
- schema: str = 'experience'#
- class trinity.algorithm.algorithm.MIXCHORDAlgorithm[source]#
Bases:
AlgorithmType
MIX algorithm.
- use_critic: bool = False#
- use_reference: bool = True#
- compute_advantage_in_trainer: bool = False#
- use_rollout: bool = True#
- can_balance_batch: bool = True#
- schema: str = 'experience'#
- class trinity.algorithm.algorithm.RAFTAlgorithm[source]#
Bases:
AlgorithmType
RAFT Algorithm. This algorithm is conceptually similar to Supervised Fine-Tuning (SFT) but is designed to work with experience schema from rollouts.
- use_critic: bool = False#
- use_reference: bool = False#
- compute_advantage_in_trainer: bool = False#
- can_balance_batch: bool = True#
- schema: str = 'experience'#