trinity.trainer.verl.fsdp_workers module
The main entry point to run the PPO algorithm.
Modified from volcengine/verl
-
class trinity.trainer.verl.fsdp_workers.ActorRolloutRefWorker(*args, **kwargs)[source]
Bases: Worker
This worker can be instantiated as a standalone actor or a standalone rollout or a standalone reference policy
or a hybrid engine based on the config.rollout
-
__init__(config: DictConfig, role: str)[source]
Initialize the worker with environment settings and device configuration.
- Parameters:
cuda_visible_devices (str, optional) – CUDA visible devices configuration. Defaults to None.
-
init_model()[source]
-
setup_weight_sync_group()[source]
-
sync_weight()[source]
-
upload_state_dict(trainer_step: int)[source]
-
set_algorithm(algo_config: AlgorithmConfig)[source]
-
update_actor(data: DataProto)[source]
-
compute_log_prob(data: DataProto)[source]
-
compute_ref_log_prob(data: DataProto)[source]
-
save_checkpoint(local_path, hdfs_path=None, global_step=0, max_ckpt_to_keep=None, model_state_dict_only=False)[source]
-
load_checkpoint(local_path, hdfs_path=None, del_local_after_load=False)[source]
-
clear_optimizer_state()[source]
-
wait_on_save_thread() → None[source]
-
class trinity.trainer.verl.fsdp_workers.CriticWorker(*args, **kwargs)[source]
Bases: Worker
-
__init__(config)[source]
Initialize the worker with environment settings and device configuration.
- Parameters:
cuda_visible_devices (str, optional) – CUDA visible devices configuration. Defaults to None.
-
init_model()[source]
-
compute_values(data: DataProto)[source]
-
update_critic(data: DataProto)[source]
-
save_checkpoint(local_path, hdfs_path=None, global_step=0, max_ckpt_to_keep=None)[source]
-
load_checkpoint(local_path, hdfs_path=None, del_local_after_load=True)[source]
-
clear_optimizer_state()[source]
-
wait_on_save_thread() → None[source]