trinity.algorithm.algorithm module

Algorithm classes.

class trinity.algorithm.algorithm.ConstantMeta(name, bases, namespace, **kwargs)[source]

Bases: ABCMeta

class trinity.algorithm.algorithm.AlgorithmType[source]

Bases: ABC

use_critic: bool
use_reference: bool
compute_advantage_in_trainer: bool
can_balance_batch: bool
schema: type
abstract classmethod default_config() Dict[source]
classmethod name() str[source]
classmethod check_config(config: Config) None[source]
class trinity.algorithm.algorithm.SFTAlgorithm[source]

Bases: AlgorithmType

SFT Algorithm.

use_critic: bool = False
use_reference: bool = False
compute_advantage_in_trainer: bool = False
can_balance_batch: bool = True
schema

alias of SFTDataModel

classmethod default_config() Dict[source]
class trinity.algorithm.algorithm.PPOAlgorithm[source]

Bases: AlgorithmType

PPO Algorithm.

use_critic: bool = True
use_reference: bool = True
compute_advantage_in_trainer: bool = True
can_balance_batch: bool = True
schema

alias of ExperienceModel

classmethod default_config() Dict[source]
class trinity.algorithm.algorithm.GRPOAlgorithm[source]

Bases: AlgorithmType

GRPO algorithm.

use_critic: bool = False
use_reference: bool = True
compute_advantage_in_trainer: bool = False
can_balance_batch: bool = True
schema

alias of ExperienceModel

classmethod default_config() Dict[source]
class trinity.algorithm.algorithm.OPMDAlgorithm[source]

Bases: AlgorithmType

OPMD algorithm.

use_critic: bool = False
use_reference: bool = True
compute_advantage_in_trainer: bool = False
can_balance_batch: bool = True
schema

alias of ExperienceModel

classmethod default_config() Dict[source]
class trinity.algorithm.algorithm.DPOAlgorithm[source]

Bases: AlgorithmType

DPO algorithm.

use_critic: bool = False
use_reference: bool = True
compute_advantage_in_trainer: bool = False
can_balance_batch: bool = False
schema

alias of DPODataModel

classmethod default_config() Dict[source]
classmethod check_config(config: Config) None[source]
class trinity.algorithm.algorithm.MIXAlgorithm[source]

Bases: AlgorithmType

MIX algorithm.

use_critic: bool = False
use_reference: bool = True
compute_advantage_in_trainer: bool = False
use_rollout: bool = True
can_balance_batch: bool = True
schema

alias of ExperienceModel

classmethod default_config() Dict[source]
class trinity.algorithm.algorithm.MIXCHORDAlgorithm[source]

Bases: AlgorithmType

MIX algorithm.

use_critic: bool = False
use_reference: bool = True
compute_advantage_in_trainer: bool = False
use_rollout: bool = True
can_balance_batch: bool = True
schema

alias of ExperienceModel

classmethod default_config() Dict[source]
class trinity.algorithm.algorithm.RAFTAlgorithm[source]

Bases: AlgorithmType

RAFT Algorithm. This algorithm is conceptually similar to Supervised Fine-Tuning (SFT) but is designed to work with ExperienceModel schema from rollouts.

use_critic: bool = False
use_reference: bool = False
compute_advantage_in_trainer: bool = False
can_balance_batch: bool = True
schema

alias of ExperienceModel

classmethod default_config() Dict[source]