trinity.algorithm.sample_strategy.sample_strategy module#

class trinity.algorithm.sample_strategy.sample_strategy.SampleStrategy(buffer_config: BufferConfig, **kwargs)[源代码]#

基类:ABC

__init__(buffer_config: BufferConfig, **kwargs) None[源代码]#
set_model_version_metric(exp_list: List[Experience], metrics: Dict)[源代码]#
abstractmethod async sample(step: int) Tuple[List[Experience], Dict, List][源代码]#

Sample data from buffer.

参数:

step (int) -- The step number of current step.

返回:

The sampled List[Experience] data. Dict: Metrics for logging. List: Representative data for logging.

返回类型:

List[Experience]

abstractmethod classmethod default_args() dict[源代码]#

Get the default arguments of the sample strategy.

abstractmethod state_dict() dict[源代码]#

Get the state dict of the sample strategy.

abstractmethod load_state_dict(state_dict: dict) None[源代码]#

Load the state dict of the sample strategy.

class trinity.algorithm.sample_strategy.sample_strategy.DefaultSampleStrategy(buffer_config: BufferConfig, **kwargs)[源代码]#

基类:SampleStrategy

__init__(buffer_config: BufferConfig, **kwargs)[源代码]#
async sample(step: int, **kwargs) Tuple[List[Experience], Dict, List][源代码]#

Sample data from buffer.

参数:

step (int) -- The step number of current step.

返回:

The sampled List[Experience] data. Dict: Metrics for logging. List: Representative data for logging.

返回类型:

List[Experience]

classmethod default_args() dict[源代码]#

Get the default arguments of the sample strategy.

state_dict() dict[源代码]#

Get the state dict of the sample strategy.

load_state_dict(state_dict: dict) None[源代码]#

Load the state dict of the sample strategy.

class trinity.algorithm.sample_strategy.sample_strategy.StalenessControlSampleStrategy(buffer_config: BufferConfig, **kwargs)[源代码]#

基类:DefaultSampleStrategy

__init__(buffer_config: BufferConfig, **kwargs)[源代码]#
async sample(step: int, **kwargs) Tuple[List[Experience], Dict, List][源代码]#

Sample data from buffer.

参数:

step (int) -- The step number of current step.

返回:

The sampled List[Experience] data. Dict: Metrics for logging. List: Representative data for logging.

返回类型:

List[Experience]

class trinity.algorithm.sample_strategy.sample_strategy.WarmupSampleStrategy(buffer_config: BufferConfig, **kwargs)[源代码]#

基类:DefaultSampleStrategy

The warmup sample strategy. Deprecated, keep this class for backward compatibility only. Please use DefaultSampleStrategy instead.

__init__(buffer_config: BufferConfig, **kwargs)[源代码]#