helpfulness

`BaseHelpfulnessListWiseReward`

Bases: BaseListWisePrincipleReward

The assistant aims to provide helpful and informative responses to users, responding to their queries with relevant and accurate information.

Source code in rm_gallery/gallery/rm/alignment/base.py

@RewardRegistry.register("base_helpfulness_listwise")
class BaseHelpfulnessListWiseReward(BaseListWisePrincipleReward):
    """The assistant aims to provide helpful and informative responses to users, responding to their queries with relevant and accurate information."""

    name: str = Field(default="base_helpfulness_listwise")
    desc: str = Field(default=DEFAULT_HELPFULNESS_DESC)
    scenario: str = Field(
        default=DEFAULT_HELPFULNESS_SCENARIO, description="assistant scenario"
    )
    principles: List[str] = Field(default=DEFAULT_HELPFULNESS_PRINCIPLES)

`BrainstormingListWiseReward`

Bases: BaseHelpfulnessListWiseReward

Brainstorming: Generating text to come up with new ideas or solutions, with an emphasis on creativity and driving thinking.

Source code in rm_gallery/gallery/rm/alignment/helpfulness/brainstorming.py

@RewardRegistry.register("brainstorming_listwise_reward")
class BrainstormingListWiseReward(BaseHelpfulnessListWiseReward):
    """Brainstorming: Generating text to come up with new ideas or solutions, with an emphasis on creativity and driving thinking."""

    name: str = Field(default="brainstorming_listwise_reward")
    scenario: str = Field(default=SCENARIO, description="assistant scenario")
    principles: List[str] = Field(default=PRINCIPLES)
    desc: str = Field(default=DESC)

`ChatListWiseReward`

Bases: BaseHelpfulnessListWiseReward

Chat: Simulates human conversation and communicates a variety of topics through text understanding and generation, emphasizing coherence and natural flow of interaction.

Source code in rm_gallery/gallery/rm/alignment/helpfulness/chat.py

@RewardRegistry.register("chat_listwise_reward")
class ChatListWiseReward(BaseHelpfulnessListWiseReward):
    """Chat: Simulates human conversation and communicates a variety of topics through text understanding and generation, emphasizing coherence and natural flow of interaction."""

    name: str = Field(default="chat_listwise_reward", description="reward name")
    scenario: str = Field(default=SCENARIO, description="assistant scenario")
    principles: List[str] = Field(default=PRINCIPLES)
    desc: str = Field(default=DESC)

`ClassificationListWiseReward`

Bases: BaseHelpfulnessListWiseReward

Classification: Entails assigning predefined categories or labels to text based on its content.

Source code in rm_gallery/gallery/rm/alignment/helpfulness/classification.py

@RewardRegistry.register("classification_listwise_reward")
class ClassificationListWiseReward(BaseHelpfulnessListWiseReward):
    """Classification: Entails assigning predefined categories or labels to text based on its content."""

    name: str = Field(
        default="classification_listwise_reward", description="reward name"
    )
    scenario: str = Field(default=SCENARIO, description="assistant scenario")
    principles: List[str] = Field(default=PRINCIPLES)
    desc: str = Field(default=DESC)

`ClosedQAListWiseReward`

Bases: BaseHelpfulnessListWiseReward

Closed QA: Search for direct answers to specific questions in given text sources (i.e. given context, given options).

Source code in rm_gallery/gallery/rm/alignment/helpfulness/closed_qa.py

@RewardRegistry.register("closed_qa_listwise_reward")
class ClosedQAListWiseReward(BaseHelpfulnessListWiseReward):
    """Closed QA: Search for direct answers to specific questions in given text sources (i.e. given context, given options)."""

    name: str = Field(default="closed_qa_listwise_reward", description="reward name")
    scenario: str = Field(default=SCENARIO, description="assistant scenario")
    principles: List[str] = Field(default=PRINCIPLES)
    desc: str = Field(default=DESC)

`CodeListWiseReward`

Bases: BaseHelpfulnessListWiseReward

Code: Involves generating, understanding, or modifying programming language code within text.

Source code in rm_gallery/gallery/rm/alignment/helpfulness/code.py

@RewardRegistry.register("code_listwise_reward")
class CodeListWiseReward(BaseHelpfulnessListWiseReward):
    """Code: Involves generating, understanding, or modifying programming language code within text."""

    name: str = Field(default="code_listwise_reward")
    scenario: str = Field(default=SCENARIO, description="assistant scenario")
    principles: List[str] = Field(default=PRINCIPLES)
    desc: str = Field(default=DESC)

`FocusListWiseReward`

Bases: BaseHelpfulnessListWiseReward

Focus: Detects high-quality, on-topic answers to general user queries

Source code in rm_gallery/gallery/rm/alignment/helpfulness/focus.py

@RewardRegistry.register("focus_listwise_reward")
class FocusListWiseReward(BaseHelpfulnessListWiseReward):
    """Focus: Detects high-quality, on-topic answers to general user queries"""

    name: str = Field(default="focus_listwise_reward")
    desc: str = Field(default=DESC)
    scenario: str = Field(default=SCENARIO, description="assistant scenario")
    principles: List[str] = Field(default=PRINCIPLES)

`GenerationListWiseReward`

Bases: BaseHelpfulnessListWiseReward

Generation: Creating new textual content, from articles to stories, with an emphasis on originality and creativity.

Source code in rm_gallery/gallery/rm/alignment/helpfulness/generation.py

@RewardRegistry.register("generation_listwise_reward")
class GenerationListWiseReward(BaseHelpfulnessListWiseReward):
    """Generation: Creating new textual content, from articles to stories, with an emphasis on originality and creativity."""

    name: str = Field(default="generation_listwise_reward", description="reward name")
    scenario: str = Field(default=SCENARIO, description="assistant scenario")
    principles: List[str] = Field(default=PRINCIPLES)
    desc: str = Field(default=DESC)

`MathListWiseReward`

Bases: BaseHelpfulnessListWiseReward

Math: Solves problems at math, on open-ended human prompts ranging from middle school physics and geometry to college-level chemistry, calculus, combinatorics, and more.

Source code in rm_gallery/gallery/rm/alignment/helpfulness/math.py

@RewardRegistry.register("math_listwise_reward")
class MathListWiseReward(BaseHelpfulnessListWiseReward):
    """Math: Solves problems at math, on open-ended human prompts ranging from middle school physics and geometry to college-level chemistry, calculus, combinatorics, and more."""

    name: str = Field(default="math_listwise_reward")
    desc: str = Field(default=DESC)
    scenario: str = Field(default=SCENARIO, description="assistant scenario")
    principles: List[str] = Field(default=PRINCIPLES)

`OpenQAListWiseReward`

Bases: BaseHelpfulnessListWiseReward

Open QA: Search for answers across a wide range of text sources. The challenge is to process large amounts of information and understand complex questions.

Source code in rm_gallery/gallery/rm/alignment/helpfulness/open_qa.py

@RewardRegistry.register("open_qa_listwise_reward")
class OpenQAListWiseReward(BaseHelpfulnessListWiseReward):
    """Open QA: Search for answers across a wide range of text sources. The challenge is to process large amounts of information and understand complex questions."""

    name: str = Field(default="open_qa_listwise_reward")
    scenario: str = Field(default=SCENARIO, description="assistant scenario")
    principles: List[str] = Field(default=PRINCIPLES)
    desc: str = Field(default=DESC)

`PreciseIFListWiseReward`

Bases: BaseHelpfulnessListWiseReward

Precise Instruction Following : Follows precise instructions, such as ‘Answer without the letter u’.

Source code in rm_gallery/gallery/rm/alignment/helpfulness/precise_if.py

@RewardRegistry.register("precise_if_listwise_reward")
class PreciseIFListWiseReward(BaseHelpfulnessListWiseReward):
    """Precise Instruction Following : Follows precise instructions, such as ‘Answer without the letter u’."""

    name: str = Field(default="precise_if_listwise_reward")
    desc: str = Field(default=DESC)
    scenario: str = Field(default=SCENARIO, description="assistant scenario")
    principles: List[str] = Field(default=PRINCIPLES)

`ReasoningListWiseReward`

Bases: BaseHelpfulnessListWiseReward

Reasoning: Involves processing and analyzing text to draw inferences, make predictions, or solve problems, requiring an understanding of underlying concepts and relationships within the text.

Source code in rm_gallery/gallery/rm/alignment/helpfulness/reasoning.py

@RewardRegistry.register("reasoning_listwise_reward")
class ReasoningListWiseReward(BaseHelpfulnessListWiseReward):
    """Reasoning: Involves processing and analyzing text to draw inferences, make predictions, or solve problems, requiring an understanding of underlying concepts and relationships within the text."""

    name: str = Field(default="reasoning_listwise_reward", description="reward name")
    scenario: str = Field(default=SCENARIO, description="assistant scenario")
    principles: List[str] = Field(default=PRINCIPLES)
    desc: str = Field(default=DESC)

`RewardRegistry`

A registry management system for reward modules that maps module names to their corresponding implementation classes.

This class provides a centralized repository for registering and retrieving reward modules by string identifiers. Modules can be registered using decorators and later accessed by their string identifiers.

Attributes:

Name	Type	Description
`_registry`	`Dict[str, Type[BaseReward]]`	Internal dictionary storing the mapping between reward module names and their classes.

Source code in rm_gallery/core/reward/registry.py

class RewardRegistry:
    """A registry management system for reward modules that maps module names to their corresponding implementation classes.

    This class provides a centralized repository for registering and retrieving reward modules by string identifiers.
    Modules can be registered using decorators and later accessed by their string identifiers.

    Attributes:
        _registry: Internal dictionary storing the mapping between reward module names and their classes.
    """

    # Dictionary mapping reward module names to their corresponding classes
    _registry: Dict[str, Type[BaseReward]] = {}

    @classmethod
    def register(cls, name: str):
        """Create a decorator to register a reward module class with a specified identifier.

        The decorator pattern allows classes to be registered while maintaining their original identity.

        Args:
            name: Unique string identifier for the reward module
            module: The BaseReward subclass to be registered

        Returns:
            A decorator function that registers the module when applied to a class
        """

        def _register(module: Type[BaseReward]):
            """Internal registration function that stores the module in the registry.

            Args:
                module: The BaseReward subclass to be registered

            Returns:
                The original module class (unchanged)
            """
            cls._registry[name] = module
            return module

        return _register

    @classmethod
    def get(cls, name: str) -> Type[BaseReward] | None:
        """Retrieve a registered reward module class by its identifier.

        Provides safe access to registered modules without raising errors for missing entries.

        Args:
            name: String identifier of the reward module to retrieve

        Returns:
            The corresponding BaseReward subclass if found, None otherwise
        """
        assert name in cls._registry, f"Reward module '{name}' not found"
        return cls._registry.get(name, None)

    @classmethod
    def list(cls) -> str:
        """
        Returns:
            A list of all registered reward modules
        """
        info = []
        for name, module in cls._registry.items():
            info.append(
                pd.Series(
                    {
                        "Name": name,
                        "Class": module.__name__,
                        "Scenario": module.__doc__.strip(),
                    }
                )
            )

        info_df = pd.concat(info, axis=1).T
        # info_str = info_df.to_markdown(index=False)
        info_str = tabulate(
            info_df,
            headers="keys",
            tablefmt="grid",
            maxcolwidths=[50] * (len(info_df.columns) + 1),
            # showindex=False,
        )
        # info_str = tabulate(info_df, headers='keys', tablefmt='github')
        return info_str

`get(name)` `classmethod`

Retrieve a registered reward module class by its identifier.

Provides safe access to registered modules without raising errors for missing entries.

Parameters:

Name	Type	Description	Default
`name`	`str`	String identifier of the reward module to retrieve	required

Returns:

Type	Description
`Type[BaseReward] \| None`	The corresponding BaseReward subclass if found, None otherwise

Source code in rm_gallery/core/reward/registry.py

@classmethod
def get(cls, name: str) -> Type[BaseReward] | None:
    """Retrieve a registered reward module class by its identifier.

    Provides safe access to registered modules without raising errors for missing entries.

    Args:
        name: String identifier of the reward module to retrieve

    Returns:
        The corresponding BaseReward subclass if found, None otherwise
    """
    assert name in cls._registry, f"Reward module '{name}' not found"
    return cls._registry.get(name, None)

`list()` `classmethod`

Returns:

Type	Description
`str`	A list of all registered reward modules

Source code in rm_gallery/core/reward/registry.py

@classmethod
def list(cls) -> str:
    """
    Returns:
        A list of all registered reward modules
    """
    info = []
    for name, module in cls._registry.items():
        info.append(
            pd.Series(
                {
                    "Name": name,
                    "Class": module.__name__,
                    "Scenario": module.__doc__.strip(),
                }
            )
        )

    info_df = pd.concat(info, axis=1).T
    # info_str = info_df.to_markdown(index=False)
    info_str = tabulate(
        info_df,
        headers="keys",
        tablefmt="grid",
        maxcolwidths=[50] * (len(info_df.columns) + 1),
        # showindex=False,
    )
    # info_str = tabulate(info_df, headers='keys', tablefmt='github')
    return info_str

`register(name)` `classmethod`

Create a decorator to register a reward module class with a specified identifier.

The decorator pattern allows classes to be registered while maintaining their original identity.

Parameters:

Name	Type	Description	Default
`name`	`str`	Unique string identifier for the reward module	required
`module`		The BaseReward subclass to be registered	required

Returns:

Type	Description
	A decorator function that registers the module when applied to a class

Source code in rm_gallery/core/reward/registry.py

@classmethod
def register(cls, name: str):
    """Create a decorator to register a reward module class with a specified identifier.

    The decorator pattern allows classes to be registered while maintaining their original identity.

    Args:
        name: Unique string identifier for the reward module
        module: The BaseReward subclass to be registered

    Returns:
        A decorator function that registers the module when applied to a class
    """

    def _register(module: Type[BaseReward]):
        """Internal registration function that stores the module in the registry.

        Args:
            module: The BaseReward subclass to be registered

        Returns:
            The original module class (unchanged)
        """
        cls._registry[name] = module
        return module

    return _register

`RewriteListWiseReward`

Bases: BaseHelpfulnessListWiseReward

Rewrite: the assistant aims to modifies existing text to alter its style while preserving the original information and intent.

Source code in rm_gallery/gallery/rm/alignment/helpfulness/rewrite.py

@RewardRegistry.register("rewrite_listwise_reward")
class RewriteListWiseReward(BaseHelpfulnessListWiseReward):
    """Rewrite: the assistant aims to modifies existing text to alter its style while preserving the original information and intent."""

    name: str = Field(default="rewrite_listwise_reward", description="reward name")
    scenario: str = Field(default=SCENARIO, description="assistant scenario")
    principles: List[str] = Field(default=PRINCIPLES)
    desc: str = Field(default=DESC)

`RolePlayingListWiseReward`

Bases: BaseHelpfulnessListWiseReward

Role Playing: Entails adopting specific characters or personas within text-based scenarios, engaging in dialogues or actions that reflect the assigned roles.

Source code in rm_gallery/gallery/rm/alignment/helpfulness/role_playing.py

@RewardRegistry.register("role_playing_listwise_reward")
class RolePlayingListWiseReward(BaseHelpfulnessListWiseReward):
    """Role Playing: Entails adopting specific characters or personas within text-based scenarios, engaging in dialogues or actions that reflect the assigned roles."""

    name: str = Field(default="role_playing_listwise_reward")
    scenario: str = Field(default=SCENARIO, description="assistant scenario")
    principles: List[str] = Field(default=PRINCIPLES)
    desc: str = Field(default=DESC, description="task description")

`SummarizationListWiseReward`

Bases: BaseHelpfulnessListWiseReward

Summarization: The text is compressed into a short form, retaining the main information, which is divided into extraction (directly selected from the original text) and production (rewriting the information).

Source code in rm_gallery/gallery/rm/alignment/helpfulness/summarization.py

@RewardRegistry.register("summarization_listwise_reward")
class SummarizationListWiseReward(BaseHelpfulnessListWiseReward):
    """Summarization: The text is compressed into a short form, retaining the main information, which is divided into extraction (directly selected from the original text) and production (rewriting the information)."""

    name: str = Field(
        default="summarization_listwise_reward", description="reward name"
    )
    scenario: str = Field(default=SCENARIO, description="assistant scenario")
    principles: List[str] = Field(default=PRINCIPLES)
    desc: str = Field(default=DESC, description="task description")

`TranslationListWiseReward`

Bases: BaseHelpfulnessListWiseReward

Translation: Converting text from one language to another.

Source code in rm_gallery/gallery/rm/alignment/helpfulness/translation.py

@RewardRegistry.register("translation_listwise_reward")
class TranslationListWiseReward(BaseHelpfulnessListWiseReward):
    """Translation: Converting text from one language to another."""

    name: str = Field(default="translation_listwise_reward", description="reward name")
    scenario: str = Field(default=SCENARIO, description="assistant scenario")
    principles: List[str] = Field(default=PRINCIPLES)
    desc: str = Field(default=DESC, description="task description")

helpfulness

BaseHelpfulnessListWiseReward

BrainstormingListWiseReward

ChatListWiseReward

ClassificationListWiseReward

ClosedQAListWiseReward

CodeListWiseReward

FocusListWiseReward

GenerationListWiseReward

MathListWiseReward

OpenQAListWiseReward

PreciseIFListWiseReward

ReasoningListWiseReward

RewardRegistry

get(name) classmethod

list() classmethod

register(name) classmethod

RewriteListWiseReward

RolePlayingListWiseReward

SummarizationListWiseReward

TranslationListWiseReward

`BaseHelpfulnessListWiseReward`

`BrainstormingListWiseReward`

`ChatListWiseReward`

`ClassificationListWiseReward`

`ClosedQAListWiseReward`

`CodeListWiseReward`

`FocusListWiseReward`

`GenerationListWiseReward`

`MathListWiseReward`

`OpenQAListWiseReward`

`PreciseIFListWiseReward`

`ReasoningListWiseReward`

`RewardRegistry`

`get(name)` `classmethod`

`list()` `classmethod`

`register(name)` `classmethod`

`RewriteListWiseReward`

`RolePlayingListWiseReward`

`SummarizationListWiseReward`

`TranslationListWiseReward`