code

`BasePointWiseReward`

Bases: BaseReward

Point-wise reward module for individual response evaluation.

Evaluates each response independently without considering relative ranking.

Source code in rm_gallery/core/reward/base.py

class BasePointWiseReward(BaseReward):
    """
    Point-wise reward module for individual response evaluation.

    Evaluates each response independently without considering relative ranking.
    """

    @abstractmethod
    def _evaluate(
        self, sample: DataSample, **kwargs
    ) -> RewardResult[RewardDimensionWithScore]:
        """
        Processes a single response to generate reward metrics.

        Parameters:
            sample (DataSample): Single-response data sample
            **kwargs: Evaluation parameters

        Returns:
            RewardResult[RewardDimensionWithScore]: Response-specific reward metrics
        """
        ...

    def _parallel(
        self,
        func: Callable,
        sample: DataSample,
        thread_pool: ThreadPoolExecutor | None = None,
        **kwargs,
    ) -> DataSample:
        """
        Processes responses in a data sample using parallel or sequential execution.

        This method applies the provided function to each response in the sample,
        either in parallel using a thread pool or sequentially. Results are merged
        back into the corresponding response objects.

        Parameters:
            func (Callable): Function to apply to each response. Should accept a
                DataSample and return an object with 'details' and 'extra_data' attributes.
            sample (DataSample): Input sample containing multiple responses to process
            thread_pool (ThreadPoolExecutor | None): Optional thread pool for parallel execution
            **kwargs: Additional arguments passed to func

        Returns:
            DataSample: Modified copy of input sample with reward metrics updated in each response

        The method creates a deep copy of the input sample to avoid modifying original data.
        When using a thread pool, it submits tasks for each response and waits for completion
        before merging results. Response objects are updated with both reward details and
        additional metadata from processing results.
        """
        sample = sample.model_copy(deep=True)
        futures = []
        for i, output in enumerate(sample.output):
            # Create sub-sample for individual response processing
            subsample = DataSample(
                unique_id=sample.unique_id, input=sample.input, output=[output]
            )

            if thread_pool:
                futures.append(
                    (
                        i,
                        thread_pool.submit(func, sample=subsample, **kwargs),
                    )
                )
            else:
                result = func(
                    sample=subsample,
                    **kwargs,
                )
                output.answer.reward.details += result.details
                output.answer.additional_kwargs[self.name] = result.extra_data

        # Process parallel execution results
        if thread_pool:
            wait([future[-1] for future in futures], return_when=ALL_COMPLETED)
            # Merge results back into sample outputs
            for i, future in futures:
                result = future.result()
                output = sample.output[i]
                output.answer.reward.details += result.details
                output.answer.additional_kwargs[self.name] = result.extra_data

        for output in sample.output:
            if len(output.answer.reward.details) > 0:
                output.answer.reward.score = sum(
                    r.score for r in output.answer.reward.details
                ) / len(output.answer.reward.details)

        return sample

    async def _async_parallel(
        self,
        func: Callable,
        sample: DataSample,
        semaphore: asyncio.Semaphore,
        **kwargs,
    ) -> DataSample:
        """
        Async version of _parallel method for BasePointWiseReward.

        Processes responses in a data sample using async execution with semaphore control.

        Parameters:
            func (Callable): Function to apply to each response
            sample (DataSample): Input sample containing multiple responses to process
            semaphore (asyncio.Semaphore): Semaphore for async concurrency control
            **kwargs: Additional arguments passed to func

        Returns:
            DataSample: Modified copy of input sample with reward metrics updated in each response
        """
        sample = sample.model_copy(deep=True)

        async def _async_evaluate_output(i: int, output):
            """Async wrapper for individual output evaluation"""
            subsample = DataSample(
                unique_id=sample.unique_id, input=sample.input, output=[output]
            )

            # Use asyncio.to_thread to wrap the sync function
            async with semaphore:
                result = await asyncio.to_thread(func, sample=subsample, **kwargs)

            return i, result

        # Create tasks for all outputs
        tasks = []
        for i, output in enumerate(sample.output):
            task = asyncio.create_task(_async_evaluate_output(i, output))
            tasks.append(task)

        # Wait for all tasks to complete
        results = await asyncio.gather(*tasks)

        # Merge results back into sample outputs
        for i, result in results:
            output = sample.output[i]
            output.answer.reward.details += result.details
            output.answer.additional_kwargs[self.name] = result.extra_data

        # Calculate average score for each output
        for output in sample.output:
            if len(output.answer.reward.details) > 0:
                output.answer.reward.score = sum(
                    r.score for r in output.answer.reward.details
                ) / len(output.answer.reward.details)

        return sample

`CodeExecutionReward`

Bases: BasePointWiseReward

Executes code against test cases and evaluates correctness based on test case results.

This reward model evaluates code by executing it against test cases using a testing framework that supports both call-based and standard input code evaluation methods.

Source code in rm_gallery/gallery/rm/code/code.py

@RewardRegistry.register("code_execution")
class CodeExecutionReward(BasePointWiseReward):
    """
    Executes code against test cases and evaluates correctness based on test case results.

    This reward model evaluates code by executing it against test cases using a testing framework
    that supports both call-based and standard input code evaluation methods.
    """

    name: str = Field(default="code_execution", description="Code execution reward")
    continuous: bool = Field(
        default=True, description="Use continuous scoring (partial credit)"
    )
    timeout: int = Field(
        default=10, description="Timeout in seconds for code execution"
    )
    test_framework_available: bool = Field(
        default=True, description="Whether testing framework is available"
    )
    compute_score: Optional[Any] = Field(
        default=None, description="Compute score function"
    )

    def __init__(self, **data):
        super().__init__(**data)
        try:
            from rm_gallery.gallery.rm.code.prime_code import compute_score

            self.compute_score = compute_score
            self.test_framework_available = True
        except ImportError:
            print(
                "Warning: Code testing framework not available. Please ensure rm_gallery.gallery.rm.code.prime_code is properly installed."
            )
            self.test_framework_available = False

    def _extract_code(self, content: str) -> str:
        """
        Extract code from content

        Args:
            content: Text content that may contain code blocks

        Returns:
            Extracted code
        """
        # Try to find Python code in various formats
        code_match = re.search(r"```python\n(.*?)\n```", content, re.DOTALL)
        if code_match:
            return code_match.group(1)

        # Try other formats
        code_match = re.search(r"```\n(.*?)\n```", content, re.DOTALL)
        if code_match:
            return code_match.group(1)

        # If no code block markers, assume the entire content is code
        return content

    def _evaluate(
        self, sample: DataSample, **kwargs
    ) -> RewardResult[RewardDimensionWithScore]:
        """
        Evaluate code against test cases

        Args:
            sample: Data sample containing code content and test cases

        Returns:
            RewardResult: Reward result containing evaluation score
        """
        # Extract code from response
        content = sample.output[0].answer.content
        extracted_code = self._extract_code(content)

        # Default values
        score = 0.0
        reason = "No evaluation performed"
        extra_data = {"extracted_code": extracted_code}

        # Check if testing framework is available
        if not self.test_framework_available:
            reason = "Code testing framework not available"
            extra_data["error"] = reason
        else:
            # Get test cases from sample metadata or label
            test_cases = None
            if sample.metadata and "inputs_outputs" in sample.metadata:
                test_cases = sample.metadata["inputs_outputs"]
            elif (
                sample.output[0].answer.label
                and "inputs_outputs" in sample.output[0].answer.label
            ):
                test_cases = sample.output[0].answer.label["inputs_outputs"]

            if not test_cases:
                reason = "No test cases available for evaluation"
            elif not extracted_code:
                score = 0.0
                reason = "No valid code extracted from response"
                extra_data["test_cases"] = test_cases
            else:
                # Convert test cases to string if needed
                if isinstance(test_cases, dict):
                    test_cases_str = json.dumps(test_cases)
                else:
                    test_cases_str = test_cases

                # Evaluate code using testing framework
                try:
                    success, metadata = self.compute_score(
                        completion=extracted_code,
                        test_cases=test_cases_str,
                        continuous=self.continuous,
                    )

                    # Determine score based on success rate
                    if isinstance(success, bool):
                        pass_rate = 1.0 if success else 0.0
                    else:
                        pass_rate = float(success)

                    # Score is always between 0 and 1
                    score = pass_rate

                    # Generate reason based on results
                    if pass_rate == 1.0:
                        reason = "All test cases passed successfully"
                    elif pass_rate == 0.0:
                        reason = "No test cases passed"
                    else:
                        reason = f"Partial success: {pass_rate * 100:.1f}% of test cases passed"

                    # Include metadata in extra_data
                    extra_data = {
                        "extracted_code": extracted_code,
                        "test_cases": test_cases,
                        "pass_rate": pass_rate,
                    }

                except Exception as e:
                    error_traceback = traceback.format_exc()
                    score = 0.0
                    reason = f"Evaluation error: {str(e)}"
                    extra_data = {
                        "extracted_code": extracted_code,
                        "test_cases": test_cases,
                        "error": str(e),
                        "traceback": error_traceback,
                    }

        # Single return statement at the end of the function
        return RewardResult(
            name=self.name,
            details=[
                RewardDimensionWithScore(
                    name=self.name,
                    score=score,
                    reason=reason,
                )
            ],
            extra_data=extra_data,
        )

`CodeStyleReward`

Bases: BasePointWiseReward

Basic code style checking including indentation consistency and naming conventions.

Source code in rm_gallery/gallery/rm/code/code.py

@RewardRegistry.register("code_style")
class CodeStyleReward(BasePointWiseReward):
    """Basic code style checking including indentation consistency and naming conventions."""

    name: str = Field(default="code_style", description="Code style reward")

    def _check_indentation(self, code: str) -> tuple[bool, str]:
        """Check indentation consistency"""
        lines = code.split("\n")
        indent_type = None  # 'spaces' or 'tabs'
        indent_size = None

        for line in lines:
            if line.strip():  # Non-empty line
                leading = len(line) - len(line.lstrip())
                if leading > 0:
                    if line.startswith(" "):
                        if indent_type is None:
                            indent_type = "spaces"
                            indent_size = leading
                        elif indent_type != "spaces":
                            return False, "Mixed indentation types (spaces and tabs)"
                    elif line.startswith("\t"):
                        if indent_type is None:
                            indent_type = "tabs"
                        elif indent_type != "tabs":
                            return False, "Mixed indentation types (spaces and tabs)"

        return True, "Consistent indentation"

    def _check_naming(self, code: str) -> tuple[float, str]:
        """Check naming conventions"""
        # Simple naming check
        function_pattern = r"def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\("
        variable_pattern = r"([a-zA-Z_][a-zA-Z0-9_]*)\s*="

        functions = re.findall(function_pattern, code)
        variables = re.findall(variable_pattern, code)

        total_names = len(functions) + len(variables)
        if total_names == 0:
            return 1.0, "No names to check"

        good_names = 0

        # Check function names (should be snake_case)
        for func in functions:
            if re.match(r"^[a-z_][a-z0-9_]*$", func):
                good_names += 1

        # Check variable names (should be snake_case)
        for var in variables:
            if re.match(r"^[a-z_][a-z0-9_]*$", var):
                good_names += 1

        score = good_names / total_names
        return (
            score,
            f"Naming convention: {good_names}/{total_names} names follow snake_case",
        )

    def _evaluate(
        self, sample: DataSample, **kwargs
    ) -> RewardResult[RewardDimensionWithScore]:
        """
        Check code style

        Args:
            sample: Data sample containing code

        Returns:
            RewardResult: Reward result containing code style score
        """
        content = sample.output[0].answer.content

        # Extract code blocks
        code_pattern = r"```(?:python)?\n(.*?)\n```"
        code_blocks = re.findall(code_pattern, content, re.DOTALL)

        if not code_blocks:
            return RewardResult(
                name=self.name,
                details=[
                    RewardDimensionWithScore(
                        name=self.name,
                        score=0.0,
                        reason="No code blocks found to check style",
                    )
                ],
                extra_data={"code_blocks": []},
            )

        total_score = 0.0
        details = []

        for i, code in enumerate(code_blocks):
            block_score = 0.0

            # Check indentation
            indent_ok, indent_msg = self._check_indentation(code)
            if indent_ok:
                block_score += 0.5
            details.append(f"Block {i}: {indent_msg}")

            # Check naming
            naming_score, naming_msg = self._check_naming(code)
            block_score += naming_score * 0.5
            details.append(f"Block {i}: {naming_msg}")

            total_score += block_score

        # Average score
        average_score = total_score / len(code_blocks)

        return RewardResult(
            name=self.name,
            details=[
                RewardDimensionWithScore(
                    name=self.name,
                    score=average_score,
                    reason=f"Code style score: {average_score:.3f}; "
                    + "; ".join(details),
                )
            ],
            extra_data={
                "average_score": average_score,
                "code_blocks_count": len(code_blocks),
                "details": details,
            },
        )

`DataSample`

Bases: BaseModel

Complete data sample structure for reward modeling training and evaluation.

Represents a single interaction with input context, multiple possible outputs, and associated metadata for comprehensive reward model training.

Attributes:

Name	Type	Description
`unique_id`	`str`	Unique identifier for tracking and deduplication
`input`	`List[ChatMessage]`	Conversation context as list of chat messages
`output`	`List[DataOutput]`	List of possible responses with evaluations
`task_category`	`Optional[str]`	Optional categorization for task-specific analysis
`source`	`Optional[str]`	Origin dataset or system that generated this sample
`created_at`	`datetime`	Timestamp for temporal tracking
`metadata`	`Optional[Dict]`	Additional context and debugging information

Source code in rm_gallery/core/data/schema.py

class DataSample(BaseModel):
    """
    Complete data sample structure for reward modeling training and evaluation.

    Represents a single interaction with input context, multiple possible outputs,
    and associated metadata for comprehensive reward model training.

    Attributes:
        unique_id: Unique identifier for tracking and deduplication
        input: Conversation context as list of chat messages
        output: List of possible responses with evaluations
        task_category: Optional categorization for task-specific analysis
        source: Origin dataset or system that generated this sample
        created_at: Timestamp for temporal tracking
        metadata: Additional context and debugging information
    """

    unique_id: str = Field(..., description="Unique identifier for the data")
    input: List[ChatMessage] = Field(default_factory=list, description="input")
    output: List[DataOutput] = Field(default_factory=list, description="output")
    task_category: Optional[str] = Field(default=None, description="task category")
    source: Optional[str] = Field(default=None, description="source")
    created_at: datetime = Field(default_factory=datetime.now, description="createdAt")
    metadata: Optional[Dict] = Field(default=None, description="metadata")

    def update(self, sample: "DataSample") -> "DataSample":
        """
        Merge another sample's data into this sample for combining evaluations.

        Updates additional_kwargs and reward details from the source sample
        while preserving the original structure.

        Args:
            sample: Source sample to merge data from

        Returns:
            Self with updated data for method chaining
        """
        self.input[-1].additional_kwargs.update(sample.input[-1].additional_kwargs)
        for i, output in enumerate(self.output):
            output.answer.additional_kwargs.update(
                sample.output[i].answer.additional_kwargs
            )
            output.answer.reward.details.extend(sample.output[i].answer.reward.details)

            if output.steps:
                for j, step in output.steps:
                    step.additional_kwargs.update(
                        sample.output[i].steps[j].additional_kwargs
                    )
                    step.reward.details.extend(sample.output[i].steps[j].reward.details)
        return self

    class Config:
        arbitrary_types_allowed = True
        json_encoders = {datetime: lambda v: v.isoformat()}

`update(sample)`

Merge another sample's data into this sample for combining evaluations.

Updates additional_kwargs and reward details from the source sample while preserving the original structure.

Parameters:

Name	Type	Description	Default
`sample`	`DataSample`	Source sample to merge data from	required

Returns:

Type	Description
`DataSample`	Self with updated data for method chaining

Source code in rm_gallery/core/data/schema.py

def update(self, sample: "DataSample") -> "DataSample":
    """
    Merge another sample's data into this sample for combining evaluations.

    Updates additional_kwargs and reward details from the source sample
    while preserving the original structure.

    Args:
        sample: Source sample to merge data from

    Returns:
        Self with updated data for method chaining
    """
    self.input[-1].additional_kwargs.update(sample.input[-1].additional_kwargs)
    for i, output in enumerate(self.output):
        output.answer.additional_kwargs.update(
            sample.output[i].answer.additional_kwargs
        )
        output.answer.reward.details.extend(sample.output[i].answer.reward.details)

        if output.steps:
            for j, step in output.steps:
                step.additional_kwargs.update(
                    sample.output[i].steps[j].additional_kwargs
                )
                step.reward.details.extend(sample.output[i].steps[j].reward.details)
    return self

`PatchSimilarityReward`

Bases: BasePointWiseReward

Calculate similarity between generated patch and oracle patch using difflib.SequenceMatcher.

This reward measures how similar the generated patch is to the reference patch, providing a similarity score and detailed diff information.

Source code in rm_gallery/gallery/rm/code/code.py

@RewardRegistry.register("code_patch_similarity")
class PatchSimilarityReward(BasePointWiseReward):
    """
    Calculate similarity between generated patch and oracle patch using difflib.SequenceMatcher.

    This reward measures how similar the generated patch is to the reference patch,
    providing a similarity score and detailed diff information.
    """

    name: str = Field(default="patch_similarity", description="Patch similarity reward")

    def _evaluate(
        self, sample: DataSample, **kwargs
    ) -> RewardResult[RewardDimensionWithScore]:
        """
        Calculate patch similarity.

        Args:
            sample: Data sample containing generated patch

        Returns:
            RewardResult: Reward result containing similarity score
        """
        generated = sample.output[0].answer.content.strip()
        reference = sample.output[0].answer.label.get("reference", "").strip()

        # Use SequenceMatcher to calculate similarity
        matcher = difflib.SequenceMatcher(None, generated, reference)
        similarity = matcher.ratio()

        # Get detailed diff information
        opcodes = list(matcher.get_opcodes())

        return RewardResult(
            name=self.name,
            details=[
                RewardDimensionWithScore(
                    name=self.name,
                    score=similarity,
                    reason=f"Patch similarity: {similarity:.3f} based on sequence matching",
                )
            ],
            extra_data={
                "similarity": similarity,
                "generated": generated,
                "reference": reference,
                "opcodes": opcodes,
            },
        )

`RewardDimensionWithScore`

Bases: RewardDimension

Pointwise/Stepwise reward dimension with a numerical score.

Attributes:

Name	Type	Description
`score`	`float`	Numerical value representing the reward magnitude

Source code in rm_gallery/core/reward/schema.py

class RewardDimensionWithScore(RewardDimension):
    """
    Pointwise/Stepwise reward dimension with a numerical score.

    Attributes:
        score (float): Numerical value representing the reward magnitude
    """

    score: float = Field(default=..., description="score")

`RewardRegistry`

A registry management system for reward modules that maps module names to their corresponding implementation classes.

This class provides a centralized repository for registering and retrieving reward modules by string identifiers. Modules can be registered using decorators and later accessed by their string identifiers.

Attributes:

Name	Type	Description
`_registry`	`Dict[str, Type[BaseReward]]`	Internal dictionary storing the mapping between reward module names and their classes.

Source code in rm_gallery/core/reward/registry.py

class RewardRegistry:
    """A registry management system for reward modules that maps module names to their corresponding implementation classes.

    This class provides a centralized repository for registering and retrieving reward modules by string identifiers.
    Modules can be registered using decorators and later accessed by their string identifiers.

    Attributes:
        _registry: Internal dictionary storing the mapping between reward module names and their classes.
    """

    # Dictionary mapping reward module names to their corresponding classes
    _registry: Dict[str, Type[BaseReward]] = {}

    @classmethod
    def register(cls, name: str):
        """Create a decorator to register a reward module class with a specified identifier.

        The decorator pattern allows classes to be registered while maintaining their original identity.

        Args:
            name: Unique string identifier for the reward module
            module: The BaseReward subclass to be registered

        Returns:
            A decorator function that registers the module when applied to a class
        """

        def _register(module: Type[BaseReward]):
            """Internal registration function that stores the module in the registry.

            Args:
                module: The BaseReward subclass to be registered

            Returns:
                The original module class (unchanged)
            """
            cls._registry[name] = module
            return module

        return _register

    @classmethod
    def get(cls, name: str) -> Type[BaseReward] | None:
        """Retrieve a registered reward module class by its identifier.

        Provides safe access to registered modules without raising errors for missing entries.

        Args:
            name: String identifier of the reward module to retrieve

        Returns:
            The corresponding BaseReward subclass if found, None otherwise
        """
        assert name in cls._registry, f"Reward module '{name}' not found"
        return cls._registry.get(name, None)

    @classmethod
    def list(cls) -> str:
        """
        Returns:
            A list of all registered reward modules
        """
        info = []
        for name, module in cls._registry.items():
            info.append(
                pd.Series(
                    {
                        "Name": name,
                        "Class": module.__name__,
                        "Scenario": module.__doc__.strip(),
                    }
                )
            )

        info_df = pd.concat(info, axis=1).T
        # info_str = info_df.to_markdown(index=False)
        info_str = tabulate(
            info_df,
            headers="keys",
            tablefmt="grid",
            maxcolwidths=[50] * (len(info_df.columns) + 1),
            # showindex=False,
        )
        # info_str = tabulate(info_df, headers='keys', tablefmt='github')
        return info_str

`get(name)` `classmethod`

Retrieve a registered reward module class by its identifier.

Provides safe access to registered modules without raising errors for missing entries.

Parameters:

Name	Type	Description	Default
`name`	`str`	String identifier of the reward module to retrieve	required

Returns:

Type	Description
`Type[BaseReward] \| None`	The corresponding BaseReward subclass if found, None otherwise

Source code in rm_gallery/core/reward/registry.py

@classmethod
def get(cls, name: str) -> Type[BaseReward] | None:
    """Retrieve a registered reward module class by its identifier.

    Provides safe access to registered modules without raising errors for missing entries.

    Args:
        name: String identifier of the reward module to retrieve

    Returns:
        The corresponding BaseReward subclass if found, None otherwise
    """
    assert name in cls._registry, f"Reward module '{name}' not found"
    return cls._registry.get(name, None)

`list()` `classmethod`

Returns:

Type	Description
`str`	A list of all registered reward modules

Source code in rm_gallery/core/reward/registry.py

@classmethod
def list(cls) -> str:
    """
    Returns:
        A list of all registered reward modules
    """
    info = []
    for name, module in cls._registry.items():
        info.append(
            pd.Series(
                {
                    "Name": name,
                    "Class": module.__name__,
                    "Scenario": module.__doc__.strip(),
                }
            )
        )

    info_df = pd.concat(info, axis=1).T
    # info_str = info_df.to_markdown(index=False)
    info_str = tabulate(
        info_df,
        headers="keys",
        tablefmt="grid",
        maxcolwidths=[50] * (len(info_df.columns) + 1),
        # showindex=False,
    )
    # info_str = tabulate(info_df, headers='keys', tablefmt='github')
    return info_str

`register(name)` `classmethod`

Create a decorator to register a reward module class with a specified identifier.

The decorator pattern allows classes to be registered while maintaining their original identity.

Parameters:

Name	Type	Description	Default
`name`	`str`	Unique string identifier for the reward module	required
`module`		The BaseReward subclass to be registered	required

Returns:

Type	Description
	A decorator function that registers the module when applied to a class

Source code in rm_gallery/core/reward/registry.py

@classmethod
def register(cls, name: str):
    """Create a decorator to register a reward module class with a specified identifier.

    The decorator pattern allows classes to be registered while maintaining their original identity.

    Args:
        name: Unique string identifier for the reward module
        module: The BaseReward subclass to be registered

    Returns:
        A decorator function that registers the module when applied to a class
    """

    def _register(module: Type[BaseReward]):
        """Internal registration function that stores the module in the registry.

        Args:
            module: The BaseReward subclass to be registered

        Returns:
            The original module class (unchanged)
        """
        cls._registry[name] = module
        return module

    return _register

`RewardResult`

Bases: BaseModel, Generic[T]

Container for reward calculation results with generic type support.

Attributes:

Name	Type	Description
`name`	`str`	Identifier of the reward module that generated this result
`details`	`List[T]`	Collection of detailed reward information items
`extra_data`	`dict`	Additional metadata or context information

Source code in rm_gallery/core/reward/schema.py

class RewardResult(BaseModel, Generic[T]):
    """
    Container for reward calculation results with generic type support.

    Attributes:
        name (str): Identifier of the reward module that generated this result
        details (List[T]): Collection of detailed reward information items
        extra_data (dict): Additional metadata or context information
    """

    name: str = Field(default=..., description="reward module name")
    details: List[T] = Field(default_factory=list, description="reward details")
    extra_data: dict = Field(default_factory=dict, description="extra data")

`SyntaxCheckReward`

Bases: BasePointWiseReward

Check code syntax using Abstract Syntax Tree to validate Python code blocks.

Source code in rm_gallery/gallery/rm/code/code.py

@RewardRegistry.register("code_syntax_check")
class SyntaxCheckReward(BasePointWiseReward):
    """Check code syntax using Abstract Syntax Tree to validate Python code blocks."""

    name: str = Field(default="syntax_check", description="Syntax check reward")

    def _evaluate(
        self, sample: DataSample, **kwargs
    ) -> RewardResult[RewardDimensionWithScore]:
        """
        Check code syntax

        Args:
            sample: Data sample containing code content

        Returns:
            RewardResult: Reward result containing syntax check results
        """
        content = sample.output[0].answer.content

        # Extract code blocks
        code_pattern = r"```(?:python)?\n(.*?)\n```"
        code_blocks = re.findall(code_pattern, content, re.DOTALL)

        if not code_blocks:
            # No code blocks, return neutral score
            return RewardResult(
                name=self.name,
                details=[
                    RewardDimensionWithScore(
                        name=self.name,
                        score=0.0,
                        reason="No code blocks found to check",
                    )
                ],
                extra_data={"code_blocks": [], "syntax_errors": []},
            )

        syntax_errors = []
        valid_blocks = 0

        for i, code in enumerate(code_blocks):
            try:
                ast.parse(code.strip())
                valid_blocks += 1
            except SyntaxError as e:
                syntax_errors.append(
                    {"block": i, "error": str(e), "line": e.lineno, "offset": e.offset}
                )

        # Calculate score: ratio of valid code blocks
        score = valid_blocks / len(code_blocks) if code_blocks else 0.0

        # Apply penalty if syntax errors exist
        if syntax_errors:
            score -= 0.5

        return RewardResult(
            name=self.name,
            details=[
                RewardDimensionWithScore(
                    name=self.name,
                    score=score,
                    reason=f"Syntax check: {valid_blocks}/{len(code_blocks)} blocks valid, {len(syntax_errors)} errors",
                )
            ],
            extra_data={
                "code_blocks": code_blocks,
                "valid_blocks": valid_blocks,
                "total_blocks": len(code_blocks),
                "syntax_errors": syntax_errors,
            },
        )

code

BasePointWiseReward

CodeExecutionReward

CodeStyleReward

DataSample

update(sample)

PatchSimilarityReward

RewardDimensionWithScore

RewardRegistry

get(name) classmethod

list() classmethod

register(name) classmethod

RewardResult

SyntaxCheckReward

`BasePointWiseReward`

`CodeExecutionReward`

`CodeStyleReward`

`DataSample`

`update(sample)`

`PatchSimilarityReward`

`RewardDimensionWithScore`

`RewardRegistry`

`get(name)` `classmethod`

`list()` `classmethod`

`register(name)` `classmethod`

`RewardResult`

`SyntaxCheckReward`