Source code for memoryscope.core.utils.datetime_handler

import datetime
import re
from typing import List

from memoryscope.constants.language_constants import WEEKDAYS, DATATIME_WORD_LIST, MONTH_DICT
from memoryscope.enumeration.language_enum import LanguageEnum



[docs]
class DatetimeHandler(object):
    """
    Handles operations related to datetime such as parsing, extraction, and formatting,
    with support for both Chinese and English contexts including weekday names and
    specialized text parsing for date components.
    """



[docs]
    def __init__(self, dt: datetime.datetime | str | int | float = None):
        """
        Initialize the DatetimeHandler instance with a datetime object, string, integer, or float representation
        of a timestamp. If no argument is provided, the current time is used.

        Args:
            dt (datetime.datetime | str | int | float, optional):
                The datetime to be handled. Can be a datetime object, a timestamp string, or a numeric timestamp.
                Defaults to None, which sets the instance to the current datetime.

        Attributes:
            self._dt (datetime.datetime): The internal datetime representation of the input.
            self._dt_info_dict (dict | None): A dictionary containing parsed datetime information, defaults to None.
        """
        if isinstance(dt, str | int | float):
            if isinstance(dt, str):
                dt = float(dt)
            self._dt: datetime.datetime = datetime.datetime.fromtimestamp(dt)
        elif isinstance(dt, datetime.datetime):
            self._dt: datetime.datetime = dt
        else:
            self._dt: datetime.datetime = datetime.datetime.now()

        self._dt_info_dict: dict | None = None


    def _parse_dt_info(self, language: LanguageEnum):
        """
        Parses the datetime object (_dt) into a dictionary containing detailed date and time components,
        including language-specific weekday representation.

        Returns:
            dict: A dictionary with keys representing date and time parts such as 'year', 'month',
                  'day', 'hour', 'minute', 'second', 'week', and 'weekday' with respective values.
                  The 'weekday' value is translated based on the current language context.
        """
        return {
            "year": self._dt.year,
            "month": MONTH_DICT[language][self._dt.month - 1],
            "day": self._dt.day,
            "hour": self._dt.hour,
            "minute": self._dt.minute,
            "second": self._dt.second,
            "week": self._dt.isocalendar().week,
            "weekday": WEEKDAYS[language][self._dt.isocalendar().weekday - 1],
        }


[docs]
    def get_dt_info_dict(self, language: LanguageEnum):
        """
        Property method to get the dictionary containing parsed datetime information.
        If None, initialize using `_parse_dt_info`.

        Returns:
            dict: A dictionary with parsed datetime information.
        """
        if self._dt_info_dict is None:
            self._dt_info_dict = self._parse_dt_info(language=language)
        return self._dt_info_dict



[docs]
    @classmethod
    def extract_date_parts_cn(cls, input_string: str) -> dict:
        """
        Extracts various components of a date (year, month, day, etc.) from an input string based on Chinese formats.

        This method identifies year, month, day, weekday, and hour components within the input
        string based on predefined patterns. It supports relative terms like '每' (every) and
        translates weekday names into numeric representations.

        Args:
            input_string (str): The Chinese text containing date and time information.

        Returns:
            dict: A dictionary with keys 'year', 'month', 'day', 'weekday', and 'hour',
                  each holding the corresponding extracted value. If a component is not found,
                  it will not be included in the dictionary. For relative terms like '每' (every),
                  the value is set to -1.

        """
        # Extending our pattern to handle every/每 as a possible value.
        patterns = {
            "year": r"(\d+|每)年",
            "month": r"(\d+|每)月",
            "day": r"(\d+|每)日",
            "weekday": r"周([一二三四五六日])",
            "hour": r"(\d+)点"
        }
        weekday_dict = {"一": 1, "二": 2, "三": 3, "四": 4, "五": 5, "六": 6, "日": 7}
        extracted_data = {}

        # Search for patterns in the input string and populate the dictionary
        for key, pattern in patterns.items():
            match = re.search(pattern, input_string)
            if match:  # If there is a match, include it in the output dictionary
                if match.group(1) == "每":
                    extracted_data[key] = -1
                elif match.group(1) in weekday_dict.keys():
                    extracted_data[key] = weekday_dict[match.group(1)]
                else:
                    extracted_data[key] = int(match.group(1))
        return extracted_data



[docs]
    @classmethod
    def extract_date_parts_en(cls, input_string: str) -> dict:
        """
        Extracts various components of a date (year, month, day, etc.) from an input string based on English formats.

        This method employs regex patterns to identify and parse different date and time elements within the provided
        text. It supports extraction of year, month name, day, 12-hour and 24-hour time formats, and weekdays.

        Args:
            input_string (str): The English text containing date and time information.

        Returns:
            dict: A dictionary containing the extracted date parts with default values of -1 where components are not
            found. Keys include 'year', 'month', 'day', 'hour', 'minute', 'second', and 'weekday'.
        """
        date_info = {
            "year": -1,
            "month": -1,
            "day": -1,
            "hour": -1,
            "minute": -1,
            "second": -1,
            "weekday": -1
        }

        # Patterns to extract the parts of the date/time
        patterns = {
            "year": r"\b(\d{4})\b",
            "month": r"\b(January|February|March|April|May|June|July|August|September|October|November|December)\b",
            "day_month_year": r"\b(?P<month>January|February|March|April|May|June|July|August|September|October"
                              r"|November|December) (?P<day>\d{1,2}),? (?P<year>\d{4})\b",
            "day_month": r"\b(?P<month>January|February|March|April|May|June|July|August|September|October|November"
                         r"|December) (?P<day>\d{1,2})\b",
            "hour_12": r"\b(\d{1,2})\s*(AM|PM|am|pm)\b",
            "hour_24": r"\b(\d{1,2}):(\d{2}):(\d{2})\b"
        }

        month_mapping = {
            "January": 1, "February": 2, "March": 3, "April": 4, "May": 5, "June": 6, "July": 7, "August": 8,
            "September": 9, "October": 10, "November": 11, "December": 12
        }

        weekday_mapping = {
            "Monday": 1, "Tuesday": 2, "Wednesday": 3, "Thursday": 4, "Friday": 5, "Saturday": 6, "Sunday": 7
        }

        # Attempt to match full date (day month year)
        day_month_year_match = re.search(patterns["day_month_year"], input_string)
        if day_month_year_match:
            date_info["year"] = int(day_month_year_match.group("year"))
            date_info["month"] = month_mapping[day_month_year_match.group("month")]
            date_info["day"] = int(day_month_year_match.group("day"))

        # If year wasn't found, try matching day and month without year
        elif date_info["year"] == -1:
            day_month_match = re.search(patterns["day_month"], input_string)
            if day_month_match:
                date_info["month"] = month_mapping[day_month_match.group("month")]
                date_info["day"] = int(day_month_match.group("day"))

        # Extract year if not already found
        if date_info["year"] == -1:
            year_match = re.search(patterns["year"], input_string)
            if year_match:
                date_info["year"] = int(year_match.group(0))

        # Extract month if not already found
        if date_info["month"] == -1:
            month_match = re.search(patterns["month"], input_string)
            if month_match:
                date_info["month"] = month_mapping[month_match.group(0)]

        # Extract 12-hour format time
        hour_12_match = re.search(patterns["hour_12"], input_string)
        if hour_12_match:
            hour, period = int(hour_12_match.group(1)), hour_12_match.group(2).lower()
            if period == 'pm' and hour != 12:
                hour += 12
            elif period == 'am' and hour == 12:
                hour = 0
            date_info["hour"] = hour

        # Identify weekday
        for week_day, value in weekday_mapping.items():
            if week_day in input_string:
                date_info["weekday"] = value
                break

        return date_info



[docs]
    @classmethod
    def extract_date_parts(cls, input_string: str, language: LanguageEnum) -> dict:
        """
        Extracts various date components from the input string based on the current language context.

        This method dynamically selects a language-specific function to parse the input string and extract
        date parts such as year, month, day, etc. If the function for current language context does not exist,
        a warning is logged and an empty dictionary is returned.

        Args:
            input_string (str): The string containing date information to be parsed.
            language (str): current language.

        Returns:
            dict: A dictionary containing extracted date components, or an empty dictionary if parsing fails.
        """
        func_name = f"extract_date_parts_{language.value}"
        if not hasattr(cls, func_name):
            # cls.logger.warning(f"language={language.value} needs to complete extract_date_parts func!")
            return {}
        return getattr(cls, func_name)(input_string=input_string)



[docs]
    @classmethod
    def has_time_word_cn(cls, query: str, datetime_word_list: List[str]) -> bool:
        """
        Check if the input query contains any datetime-related words based on the cn language context.

        Args:
            query (str): The input string to check for datetime-related words.
            datetime_word_list (list[str]): datetime keywords

        Returns:
            bool: True if the query contains at least one datetime-related word, False otherwise.
        """
        contain_datetime = False
        # TODO use re
        for datetime_word in datetime_word_list:
            if datetime_word in query:
                contain_datetime = True
                break
        return contain_datetime



[docs]
    @classmethod
    def has_time_word_en(cls, query: str, datetime_word_list: List[str]) -> bool:
        """
        Check if the input query contains any datetime-related words based on the en language context.

        Args:
            query (str): The input string to check for datetime-related words.
            datetime_word_list (list[str]): datetime keywords

        Returns:
            bool: True if the query contains at least one datetime-related word, False otherwise.
        """
        contain_datetime = False
        for datetime_word in datetime_word_list:
            datetime_word = datetime_word.lower()
            # TODO fix strip
            if datetime_word in [x.strip().lower().strip(",").strip(".").strip("?").strip(":")
                                 for x in query.split(" ")]:
                contain_datetime = True
                break
        return contain_datetime



[docs]
    @classmethod
    def has_time_word(cls, query: str, language: LanguageEnum) -> bool:
        func_name = f"has_time_word_{language.value}"
        if not hasattr(cls, func_name):
            # cls.logger.warning(f"language={language.value} needs to complete has_time_word function!")
            return False

        if language not in DATATIME_WORD_LIST:
            # cls.logger.warning(f"language={language.value} is missing in DATATIME_WORD_LIST!")
            return False

        datetime_word_list = DATATIME_WORD_LIST[language]
        return getattr(cls, func_name)(query=query, datetime_word_list=datetime_word_list)



[docs]
    def datetime_format(self, dt_format: str = "%Y%m%d") -> str:
        """
        Format the stored datetime object into a string based on the provided format.

        Args:
            dt_format (str, optional): The datetime format string. Defaults to "%Y%m%d".

        Returns:
            str: A formatted datetime string.
        """
        return self._dt.strftime(dt_format)



[docs]
    def string_format(self, string_format: str, language: LanguageEnum) -> str:
        """
        Format the datetime information stored in the instance using a custom string format.

        Args:
            string_format (str): A format string where placeholders are keys from `dt_info_dict`.
            language (str): current language.

        Returns:
            str: A formatted datetime string.
        """
        return string_format.format(**self.get_dt_info_dict(language=language))


    @property
    def timestamp(self) -> int:
        """
        Get the timestamp representation of the stored datetime.

        Returns:
            int: A timestamp value.
        """
        return int(self._dt.timestamp())