memoryscope.core.utils.tool_functions 源代码

import hashlib
import random
import re
import time
from copy import deepcopy
from importlib import import_module
from typing import List

import numpy as np
import pyfiglet
from termcolor import colored

from memoryscope.enumeration.message_role_enum import MessageRoleEnum
from memoryscope.scheme.message import Message

ALL_COLORS = ["red", "green", "yellow", "blue", "magenta", "cyan", "light_grey", "light_red", "light_green",
              "light_yellow", "light_blue", "light_magenta", "light_cyan", "white"]



[文档]
def underscore_to_camelcase(name: str, is_first_title: bool = True) -> str:
    """
    Converts an underscore_notation string to CamelCase.

    Args:
        name (str): The underscore_notation string to be converted.
        is_first_title (bool): Title the first word or not. Defaults to True

    Returns:
        str: A CamelCase formatted string.
    """
    name_split = name.split("_")
    if is_first_title:
        return "".join(x.title() for x in name_split)
    else:
        return name_split[0] + ''.join(x.title() for x in name_split[1:])




[文档]
def camelcase_to_underscore(name: str) -> str:
    """
    Converts a CamelCase string to underscore_notation.

    Args:
        name (str): The CamelCase formatted string to be converted.

    Returns:
        str: A converted string in underscore_notation.
    """
    return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()




[文档]
def init_instance_by_config(config: dict, default_class_dir: str = "memoryscope", **kwargs):
    """
    Initialize an instance of a class specified in the configuration dictionary.

    This function dynamically imports a class from a module path, allowing for
    user-defined classes or default paths. It supports adding a suffix to the
    class name, merging additional keyword arguments with the config, and handling
    nested module paths.

    Args:
        config (dict): A dictionary containing the configuration, including
                       the 'class' key that specifies the class's module path.
        default_class_dir (str, optional): The default module path prefix
                                            to use if not explicitly defined in
                                            'config'. Defaults to "memory_scope".
        **kwargs: Additional keyword arguments to pass to the class constructor.

    Returns:
        instance: An instance initialized with the provided config and kwargs.
    """

    config_copy = deepcopy(config)
    origin_class_path: str = config_copy.pop("class")
    if not origin_class_path:
        raise RuntimeError("empty class path!")
    user_defined: bool = config_copy.pop("user_defined", False)

    class_path_list = []
    if not user_defined and default_class_dir and not origin_class_path.startswith(default_class_dir):
        class_path_list.append(default_class_dir)

    class_path_split = origin_class_path.split(".")
    class_file_name: str = class_path_split[-1]

    class_name = underscore_to_camelcase(class_file_name)
    if class_name == class_file_name:
        class_path_list.extend(class_path_split[-1:])
    else:
        class_path_list.extend(class_path_split)

    module = import_module(".".join(class_path_list))
    config_copy.update(kwargs)
    return getattr(module, class_name)(**config_copy)




[文档]
def prompt_to_msg(system_prompt: str,
                  few_shot: str,
                  user_query: str,
                  concat_system_prompt: bool = True) -> List[Message]:
    """
    Converts input strings into a structured list of message objects suitable for AI interactions.

    Args:
        system_prompt (str): The system-level instruction or context.
        few_shot (str): An example or demonstration input, often used for illustrating expected behavior.
        user_query (str): The actual user query or prompt to be processed.
        concat_system_prompt(bool): Concat system prompt again or not in the user message.
            A simple method to improve the effectiveness for some LLMs. Defaults to True.

    Returns:
        List[Message]: A list of Message objects, each representing a part of the conversation setup.
    """

    system_message = Message(role=MessageRoleEnum.SYSTEM.value, content=system_prompt.strip())
    if concat_system_prompt:
        user_content_list = [system_prompt, few_shot, user_query]
    else:
        user_content_list = [few_shot, user_query]
    user_message = Message(role=MessageRoleEnum.USER.value, content="\n".join([x.strip() for x in user_content_list]))
    return [system_message, user_message]




[文档]
def char_logo(words: str, seed: int = time.time_ns(), color=None):
    """
    Render the context of logo with colors

    Args:
        words: The context of logo.
        seed: The random seed which generates colors if there is no specific color. Defaults to the current timestamp.
        color: The specific color. Defaults to None.

    Returns:
        A rendered logo
    """
    font = pyfiglet.Figlet()
    rendered_text = font.renderText(words)
    colored_lines = []
    all_colors = ALL_COLORS.copy()
    random.seed = seed
    for line in rendered_text.splitlines():
        line_color = color
        if line_color is None:
            random.shuffle(all_colors)
            line_color = all_colors[0]
        colored_line = ""
        for char in line:
            colored_char = colored(char, line_color, attrs=['bold'])
            colored_line += colored_char
        colored_lines.append(colored_line)
    return colored_lines




[文档]
def md5_hash(input_string: str) -> str:
    """
    Computes a MD5 hash of the given input string.

    Args:
        input_string (str): The string for which the MD5 hash needs to be computed.

    Returns:
        str: A hexadecimal MD5 hash representation.
    """
    m = hashlib.md5()
    m.update(input_string.encode('utf-8'))
    return m.hexdigest()




[文档]
def contains_keyword(text, keywords) -> bool:
    """
    Checks if the given text contains any of the specified keywords, ignoring case.

    Args:
        text (str): The text to search within.
        keywords (List[str]): A list of keywords to look for in the text.

    Returns:
        bool: True if any keyword is found in the text, False otherwise.
    """
    escaped_keywords = map(re.escape, keywords)
    pattern = re.compile('|'.join(escaped_keywords), re.IGNORECASE)
    return pattern.search(text) is not None




[文档]
def cosine_similarity(query: List[float], documents: List[List[float]]):
    query = np.array(query)
    documents = np.array(documents)

    query_norm = np.linalg.norm(query)
    if query_norm == 0:
        raise ValueError("Query vector norm is zero, which will result in a division by zero")

    documents_norm = np.linalg.norm(documents, axis=1)
    if np.any(documents_norm == 0):
        raise ValueError("One of the document vectors has zero norm, which will result in a division by zero")

    dot_product = np.dot(documents, query)

    cosine_similarities = dot_product / (query_norm * documents_norm)
    return cosine_similarities.tolist()




[文档]
def cosine_similarity_matrix(query: List[List[float]]):
    query = np.array(query)

    documents_norm = np.linalg.norm(query, axis=1)
    if np.any(documents_norm == 0):
        raise ValueError("One of the document vectors has zero norm, which will result in a division by zero")
    
    n_query = query.shape[0]
    query_expanded = np.expand_dims(query, axis=0)
    query_triplicated = np.repeat(query_expanded, repeats=n_query, axis=0)
    query_transpose = query_triplicated.swapaxes(0, 1)

    q = np.expand_dims(documents_norm, axis=0)
    norm_dot = q.transpose() * q
    dot_product = (query_triplicated*query_transpose).sum(-1) / norm_dot

    return dot_product