trinity.algorithm.algorithm module

Algorithm classes.

class trinity.algorithm.algorithm.ConstantMeta(name, bases, namespace, **kwargs)[source]: Bases: ABCMeta

class trinity.algorithm.algorithm.AlgorithmType[source]

Bases: ABC

use_critic: bool

use_reference: bool

compute_advantage_in_trainer: bool

can_balance_batch: bool

schema: type

abstract classmethod default_config() → Dict[source]

classmethod name() → str[source]

classmethod check_config(config: Config) → None[source]

class trinity.algorithm.algorithm.SFTAlgorithm[source]

Bases: AlgorithmType

SFT Algorithm.

use_critic: bool = False

use_reference: bool = False

compute_advantage_in_trainer: bool = False

can_balance_batch: bool = True

schema: alias of SFTDataModel

classmethod default_config() → Dict[source]

class trinity.algorithm.algorithm.PPOAlgorithm[source]

Bases: AlgorithmType

PPO Algorithm.

use_critic: bool = True

use_reference: bool = True

compute_advantage_in_trainer: bool = True

can_balance_batch: bool = True

schema: alias of ExperienceModel

classmethod default_config() → Dict[source]

class trinity.algorithm.algorithm.GRPOAlgorithm[source]

Bases: AlgorithmType

GRPO algorithm.

use_critic: bool = False

use_reference: bool = True

compute_advantage_in_trainer: bool = False

can_balance_batch: bool = True

schema: alias of ExperienceModel

classmethod default_config() → Dict[source]

class trinity.algorithm.algorithm.OPMDAlgorithm[source]

Bases: AlgorithmType

OPMD algorithm.

use_critic: bool = False

use_reference: bool = True

compute_advantage_in_trainer: bool = False

can_balance_batch: bool = True

schema: alias of ExperienceModel

classmethod default_config() → Dict[source]

class trinity.algorithm.algorithm.DPOAlgorithm[source]

Bases: AlgorithmType

DPO algorithm.

use_critic: bool = False

use_reference: bool = True

compute_advantage_in_trainer: bool = False

can_balance_batch: bool = False

schema: alias of DPODataModel

classmethod default_config() → Dict[source]

classmethod check_config(config: Config) → None[source]

class trinity.algorithm.algorithm.MIXAlgorithm[source]

Bases: AlgorithmType

MIX algorithm.

use_critic: bool = False

use_reference: bool = True

compute_advantage_in_trainer: bool = False

use_rollout: bool = True

can_balance_batch: bool = True

schema: alias of ExperienceModel

classmethod default_config() → Dict[source]

class trinity.algorithm.algorithm.MIXCHORDAlgorithm[source]

Bases: AlgorithmType

MIX algorithm.

use_critic: bool = False

use_reference: bool = True

compute_advantage_in_trainer: bool = False

use_rollout: bool = True

can_balance_batch: bool = True

schema: alias of ExperienceModel

classmethod default_config() → Dict[source]

class trinity.algorithm.algorithm.RAFTAlgorithm[source]

Bases: AlgorithmType

RAFT Algorithm. This algorithm is conceptually similar to Supervised Fine-Tuning (SFT) but is designed to work with ExperienceModel schema from rollouts.

use_critic: bool = False

use_reference: bool = False

compute_advantage_in_trainer: bool = False

can_balance_batch: bool = True

schema: alias of ExperienceModel

classmethod default_config() → Dict[source]