trinity.buffer.schema.formatter module#

class trinity.buffer.schema.formatter.ExperienceFormatter[source]#

Bases: ABC

abstract format(sample: Dict) Experience[source]#

Format a raw sample dict into an experience.

class trinity.buffer.schema.formatter.TaskFormatter(config: StorageConfig)[source]#

Bases: object

Formatter for task data.

Example Input:

{
    "input": "Hello",
    "output": "Hi"
}
__init__(config: StorageConfig)[source]#
format(sample: Dict) Task[source]#

Format a raw sample dict into a Task.

class trinity.buffer.schema.formatter.SFTFormatter(tokenizer, format_config: FormatConfig)[source]#

Bases: ExperienceFormatter

Formatter for SFT data, supporting both message list and plaintext formats.

Uses format_config.prompt_type to distinguish between ‘messages’ and ‘plaintext’.

Example input of MESSAGES:

{
    "messages": [
        {"role": "user", "content": "Hello, how are you?"},
        {"role": "assistant", "content": "I'm fine, thank you!"}
    ]
}

Example input of PLAINTEXT:

{
    "system_prompt_key": "system",
    "prompt_key": "prompt",
    "response_key": "response",
}
__init__(tokenizer, format_config: FormatConfig)[source]#
format(sample: Dict) Experience[source]#

Format a raw sample dict into an experience.

class trinity.buffer.schema.formatter.DPOFormatter(tokenizer, format_config: FormatConfig)[source]#

Bases: ExperienceFormatter

Formatter for DPO plaintext data.

Example Input for PLAINTEXT:

{
    "prompt": "What is your name?",
    "chosen": "My name is Assistant.",
    "rejected": "I don't have a name."
}

Example Input for MESSAGES:

{
    "messages": [
        {"role": "user", "content": "What is your name?"},
    ],
    "chosen": [
        {"role": "assistant", "content": "My name is Assistant."},
    ],
    "rejected": [
        {"role": "assistant", "content": "I don't have a favorite color."}
    ]
}
__init__(tokenizer, format_config: FormatConfig)[source]#
format(sample: Dict) Experience[source]#

Format a raw sample dict into an experience.