trinity.algorithm.algorithm module#
Algorithm classes.
- class trinity.algorithm.algorithm.ConstantMeta(name, bases, namespace, **kwargs)[source]#
Bases:
ABCMeta
- class trinity.algorithm.algorithm.AlgorithmType[source]#
Bases:
ABC- use_critic: bool#
- use_reference: bool#
- compute_advantage_in_trainer: bool#
- can_balance_batch: bool#
- schema: str#
- class trinity.algorithm.algorithm.SFTAlgorithm[source]#
Bases:
AlgorithmTypeSFT Algorithm.
- use_critic: bool = False#
- use_reference: bool = False#
- compute_advantage_in_trainer: bool = False#
- can_balance_batch: bool = True#
- schema: str = 'sft'#
- class trinity.algorithm.algorithm.PPOAlgorithm[source]#
Bases:
AlgorithmTypePPO Algorithm.
- use_critic: bool = True#
- use_reference: bool = True#
- compute_advantage_in_trainer: bool = True#
- can_balance_batch: bool = True#
- schema: str = 'experience'#
- class trinity.algorithm.algorithm.GRPOAlgorithm[source]#
Bases:
AlgorithmTypeGRPO algorithm.
- use_critic: bool = False#
- use_reference: bool = True#
- compute_advantage_in_trainer: bool = False#
- can_balance_batch: bool = True#
- schema: str = 'experience'#
- class trinity.algorithm.algorithm.OPMDAlgorithm[source]#
Bases:
AlgorithmTypeOPMD algorithm.
- use_critic: bool = False#
- use_reference: bool = True#
- compute_advantage_in_trainer: bool = False#
- can_balance_batch: bool = True#
- schema: str = 'experience'#
- class trinity.algorithm.algorithm.AsymREAlgorithm[source]#
Bases:
AlgorithmTypeAsymRE algorithm.
- use_critic: bool = False#
- use_reference: bool = False#
- compute_advantage_in_trainer: bool = False#
- can_balance_batch: bool = True#
- schema: str = 'experience'#
- class trinity.algorithm.algorithm.DPOAlgorithm[source]#
Bases:
AlgorithmTypeDPO algorithm.
- use_critic: bool = False#
- use_reference: bool = True#
- compute_advantage_in_trainer: bool = False#
- can_balance_batch: bool = False#
- schema: str = 'dpo'#
- class trinity.algorithm.algorithm.TOPRAlgorithm[source]#
Bases:
AlgorithmTypeTOPR algorithm. See https://arxiv.org/pdf/2503.14286v1
- use_critic: bool = False#
- use_reference: bool = True#
- compute_advantage_in_trainer: bool = False#
- can_balance_batch: bool = True#
- schema: str = 'experience'#
- class trinity.algorithm.algorithm.CISPOAlgorithm[source]#
Bases:
AlgorithmTypeCISPO algorithm. See https://arxiv.org/abs/2506.13585
- use_critic: bool = False#
- use_reference: bool = True#
- compute_advantage_in_trainer: bool = False#
- can_balance_batch: bool = True#
- schema: str = 'experience'#
- class trinity.algorithm.algorithm.MIXAlgorithm[source]#
Bases:
AlgorithmTypeMIX algorithm.
- use_critic: bool = False#
- use_reference: bool = True#
- compute_advantage_in_trainer: bool = False#
- use_rollout: bool = True#
- can_balance_batch: bool = True#
- schema: str = 'experience'#
- class trinity.algorithm.algorithm.MIXCHORDAlgorithm[source]#
Bases:
AlgorithmTypeMIX algorithm.
- use_critic: bool = False#
- use_reference: bool = True#
- compute_advantage_in_trainer: bool = False#
- use_rollout: bool = True#
- can_balance_batch: bool = True#
- schema: str = 'experience'#
- class trinity.algorithm.algorithm.RAFTAlgorithm[source]#
Bases:
AlgorithmTypeRAFT Algorithm. This algorithm is conceptually similar to Supervised Fine-Tuning (SFT) but is designed to work with experience schema from rollouts.
- use_critic: bool = False#
- use_reference: bool = False#
- compute_advantage_in_trainer: bool = False#
- can_balance_batch: bool = True#
- schema: str = 'experience'#
- class trinity.algorithm.algorithm.sPPOAlgorithm[source]#
Bases:
AlgorithmTypesPPO Algorithm.
- use_critic: bool = False#
- use_reference: bool = False#
- compute_advantage_in_trainer: bool = False#
- can_balance_batch: bool = True#
- schema: str = 'experience'#
- class trinity.algorithm.algorithm.RECAlgorithm[source]#
Bases:
AlgorithmTypeREC Algorithm.
- use_critic: bool = False#
- use_reference: bool = True#
- compute_advantage_in_trainer: bool = False#
- can_balance_batch: bool = True#
- schema: str = 'experience'#