trinity.algorithm.algorithm module

trinity.algorithm.algorithm module#

Algorithm classes.

class trinity.algorithm.algorithm.ConstantMeta(name, bases, namespace, **kwargs)[source]#: Bases: ABCMeta

class trinity.algorithm.algorithm.AlgorithmType[source]#

Bases: ABC

use_critic: bool#

use_reference: bool#

compute_advantage_in_trainer: bool#

can_balance_batch: bool#

schema: str#

abstract classmethod default_config() → Dict[source]#

classmethod name() → str[source]#

classmethod check_config(config: Config) → None[source]#

class trinity.algorithm.algorithm.SFTAlgorithm[source]#

Bases: AlgorithmType

SFT Algorithm.

use_critic: bool = False#

use_reference: bool = False#

compute_advantage_in_trainer: bool = False#

can_balance_batch: bool = True#

schema: str = 'sft'#

classmethod default_config() → Dict[source]#

class trinity.algorithm.algorithm.PPOAlgorithm[source]#

Bases: AlgorithmType

PPO Algorithm.

use_critic: bool = True#

use_reference: bool = True#

compute_advantage_in_trainer: bool = True#

can_balance_batch: bool = True#

schema: str = 'experience'#

classmethod default_config() → Dict[source]#

class trinity.algorithm.algorithm.GRPOAlgorithm[source]#

Bases: AlgorithmType

GRPO algorithm.

use_critic: bool = False#

use_reference: bool = True#

compute_advantage_in_trainer: bool = False#

can_balance_batch: bool = True#

schema: str = 'experience'#

classmethod default_config() → Dict[source]#

class trinity.algorithm.algorithm.OPMDAlgorithm[source]#

Bases: AlgorithmType

OPMD algorithm.

use_critic: bool = False#

use_reference: bool = True#

compute_advantage_in_trainer: bool = False#

can_balance_batch: bool = True#

schema: str = 'experience'#

classmethod default_config() → Dict[source]#

class trinity.algorithm.algorithm.AsymREAlgorithm[source]#

Bases: AlgorithmType

AsymRE algorithm.

use_critic: bool = False#

use_reference: bool = False#

compute_advantage_in_trainer: bool = False#

can_balance_batch: bool = True#

schema: str = 'experience'#

classmethod default_config() → Dict[source]#

class trinity.algorithm.algorithm.DPOAlgorithm[source]#

Bases: AlgorithmType

DPO algorithm.

use_critic: bool = False#

use_reference: bool = True#

compute_advantage_in_trainer: bool = False#

can_balance_batch: bool = False#

schema: str = 'dpo'#

classmethod default_config() → Dict[source]#

classmethod check_config(config: Config) → None[source]#

class trinity.algorithm.algorithm.TOPRAlgorithm[source]#

Bases: AlgorithmType

TOPR algorithm. See https://arxiv.org/pdf/2503.14286v1

use_critic: bool = False#

use_reference: bool = True#

compute_advantage_in_trainer: bool = False#

can_balance_batch: bool = True#

schema: str = 'experience'#

classmethod default_config() → Dict[source]#

class trinity.algorithm.algorithm.CISPOAlgorithm[source]#

Bases: AlgorithmType

CISPO algorithm. See https://arxiv.org/abs/2506.13585

use_critic: bool = False#

use_reference: bool = True#

compute_advantage_in_trainer: bool = False#

can_balance_batch: bool = True#

schema: str = 'experience'#

classmethod default_config() → Dict[source]#

class trinity.algorithm.algorithm.MIXAlgorithm[source]#

Bases: AlgorithmType

MIX algorithm.

use_critic: bool = False#

use_reference: bool = True#

compute_advantage_in_trainer: bool = False#

use_rollout: bool = True#

can_balance_batch: bool = True#

schema: str = 'experience'#

classmethod default_config() → Dict[source]#

class trinity.algorithm.algorithm.MIXCHORDAlgorithm[source]#

Bases: AlgorithmType

MIX algorithm.

use_critic: bool = False#

use_reference: bool = True#

compute_advantage_in_trainer: bool = False#

use_rollout: bool = True#

can_balance_batch: bool = True#

schema: str = 'experience'#

classmethod default_config() → Dict[source]#

class trinity.algorithm.algorithm.RAFTAlgorithm[source]#

Bases: AlgorithmType

RAFT Algorithm. This algorithm is conceptually similar to Supervised Fine-Tuning (SFT) but is designed to work with experience schema from rollouts.

use_critic: bool = False#

use_reference: bool = False#

compute_advantage_in_trainer: bool = False#

can_balance_batch: bool = True#

schema: str = 'experience'#

classmethod default_config() → Dict[source]#

class trinity.algorithm.algorithm.sPPOAlgorithm[source]#

Bases: AlgorithmType

sPPO Algorithm.

use_critic: bool = False#

use_reference: bool = False#

compute_advantage_in_trainer: bool = False#

can_balance_batch: bool = True#

schema: str = 'experience'#

classmethod default_config() → Dict[source]#

class trinity.algorithm.algorithm.RECAlgorithm[source]#

Bases: AlgorithmType

REC Algorithm.

use_critic: bool = False#

use_reference: bool = True#

compute_advantage_in_trainer: bool = False#

can_balance_batch: bool = True#

schema: str = 'experience'#

classmethod default_config() → Dict[source]#

class trinity.algorithm.algorithm.MultiStepGRPOAlgorithm[source]#

Bases: AlgorithmType

Multi-Step GRPO Algorithm.

use_critic: bool = False#

use_reference: bool = True#

compute_advantage_in_trainer: bool = False#

can_balance_batch: bool = True#

schema: str = 'experience'#

classmethod default_config() → Dict[source]#

trinity.algorithm.algorithm module

Contents

trinity.algorithm.algorithm module#