trinity.algorithm.algorithm module

Contents

trinity.algorithm.algorithm module#

Algorithm classes.

class trinity.algorithm.algorithm.ConstantMeta(name, bases, namespace, **kwargs)[source]#

Bases: ABCMeta

class trinity.algorithm.algorithm.AlgorithmType[source]#

Bases: ABC

use_critic: bool#
use_reference: bool#
compute_advantage_in_trainer: bool#
can_balance_batch: bool#
schema: str#
abstract classmethod default_config() Dict[source]#
classmethod name() str[source]#
classmethod check_config(config: Config) None[source]#
class trinity.algorithm.algorithm.SFTAlgorithm[source]#

Bases: AlgorithmType

SFT Algorithm.

use_critic: bool = False#
use_reference: bool = False#
compute_advantage_in_trainer: bool = False#
can_balance_batch: bool = True#
schema: str = 'sft'#
classmethod default_config() Dict[source]#
class trinity.algorithm.algorithm.PPOAlgorithm[source]#

Bases: AlgorithmType

PPO Algorithm.

use_critic: bool = True#
use_reference: bool = True#
compute_advantage_in_trainer: bool = True#
can_balance_batch: bool = True#
schema: str = 'experience'#
classmethod default_config() Dict[source]#
class trinity.algorithm.algorithm.GRPOAlgorithm[source]#

Bases: AlgorithmType

GRPO algorithm.

use_critic: bool = False#
use_reference: bool = True#
compute_advantage_in_trainer: bool = False#
can_balance_batch: bool = True#
schema: str = 'experience'#
classmethod default_config() Dict[source]#
class trinity.algorithm.algorithm.OPMDAlgorithm[source]#

Bases: AlgorithmType

OPMD algorithm.

use_critic: bool = False#
use_reference: bool = True#
compute_advantage_in_trainer: bool = False#
can_balance_batch: bool = True#
schema: str = 'experience'#
classmethod default_config() Dict[source]#
class trinity.algorithm.algorithm.AsymREAlgorithm[source]#

Bases: AlgorithmType

AsymRE algorithm.

use_critic: bool = False#
use_reference: bool = False#
compute_advantage_in_trainer: bool = False#
can_balance_batch: bool = True#
schema: str = 'experience'#
classmethod default_config() Dict[source]#
class trinity.algorithm.algorithm.DPOAlgorithm[source]#

Bases: AlgorithmType

DPO algorithm.

use_critic: bool = False#
use_reference: bool = True#
compute_advantage_in_trainer: bool = False#
can_balance_batch: bool = False#
schema: str = 'dpo'#
classmethod default_config() Dict[source]#
classmethod check_config(config: Config) None[source]#
class trinity.algorithm.algorithm.TOPRAlgorithm[source]#

Bases: AlgorithmType

TOPR algorithm. See https://arxiv.org/pdf/2503.14286v1

use_critic: bool = False#
use_reference: bool = True#
compute_advantage_in_trainer: bool = False#
can_balance_batch: bool = True#
schema: str = 'experience'#
classmethod default_config() Dict[source]#
class trinity.algorithm.algorithm.CISPOAlgorithm[source]#

Bases: AlgorithmType

CISPO algorithm. See https://arxiv.org/abs/2506.13585

use_critic: bool = False#
use_reference: bool = True#
compute_advantage_in_trainer: bool = False#
can_balance_batch: bool = True#
schema: str = 'experience'#
classmethod default_config() Dict[source]#
class trinity.algorithm.algorithm.MIXAlgorithm[source]#

Bases: AlgorithmType

MIX algorithm.

use_critic: bool = False#
use_reference: bool = True#
compute_advantage_in_trainer: bool = False#
use_rollout: bool = True#
can_balance_batch: bool = True#
schema: str = 'experience'#
classmethod default_config() Dict[source]#
class trinity.algorithm.algorithm.MIXCHORDAlgorithm[source]#

Bases: AlgorithmType

MIX algorithm.

use_critic: bool = False#
use_reference: bool = True#
compute_advantage_in_trainer: bool = False#
use_rollout: bool = True#
can_balance_batch: bool = True#
schema: str = 'experience'#
classmethod default_config() Dict[source]#
class trinity.algorithm.algorithm.RAFTAlgorithm[source]#

Bases: AlgorithmType

RAFT Algorithm. This algorithm is conceptually similar to Supervised Fine-Tuning (SFT) but is designed to work with experience schema from rollouts.

use_critic: bool = False#
use_reference: bool = False#
compute_advantage_in_trainer: bool = False#
can_balance_batch: bool = True#
schema: str = 'experience'#
classmethod default_config() Dict[source]#
class trinity.algorithm.algorithm.sPPOAlgorithm[source]#

Bases: AlgorithmType

sPPO Algorithm.

use_critic: bool = False#
use_reference: bool = False#
compute_advantage_in_trainer: bool = False#
can_balance_batch: bool = True#
schema: str = 'experience'#
classmethod default_config() Dict[source]#