Skip to content

file

read_dataset(file_path)

Reads dataset from a file based on its extension.

Parameters:

Name Type Description Default
file_path str

Path to the dataset file.

required

Returns:

Name Type Description
Any

Dataset content parsed according to the file format.

Raises:

Type Description
ValueError

If the file format is not supported.

Source code in rm_gallery/core/utils/file.py
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def read_dataset(file_path: str):
    """
    Reads dataset from a file based on its extension.

    Args:
        file_path (str): Path to the dataset file.

    Returns:
        Any: Dataset content parsed according to the file format.

    Raises:
        ValueError: If the file format is not supported.
    """
    name, suffix = os.path.splitext(file_path)
    if suffix == ".json":
        return read_json(file_path)
    elif suffix == ".jsonl":
        return read_jsonl(file_path)
    elif suffix == ".yaml":
        return read_yaml(file_path)
    else:
        raise ValueError(f"Unsupported file format: {suffix}")

read_json(file_path)

Reads JSON data from the specified file path.

Parameters:

Name Type Description Default
file_path str

Path to the JSON file.

required

Returns:

Name Type Description
Any

Parsed JSON data.

Raises:

Type Description
FileNotFoundError

If the file does not exist or is not a file.

Source code in rm_gallery/core/utils/file.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
def read_json(file_path):
    """
    Reads JSON data from the specified file path.

    Args:
        file_path (str): Path to the JSON file.

    Returns:
        Any: Parsed JSON data.

    Raises:
        FileNotFoundError: If the file does not exist or is not a file.
    """
    if not os.path.exists(file_path) or not os.path.isfile(file_path):
        raise FileNotFoundError(f"File {file_path} does not exist or is not a file.")

    with open(file_path, "r", encoding="utf-8") as file:
        data = json.load(file)
    return data

read_jsonl(file_path)

Load data from the json line.

Parameters:

Name Type Description Default
file_path str

Path to the JSONL file.

required

Returns:

Type Description

List[Dict]: List of JSON objects read from the file.

Raises:

Type Description
FileNotFoundError

If the file does not exist or is not a file.

Source code in rm_gallery/core/utils/file.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def read_jsonl(file_path):
    """
    Load data from the json line.

    Args:
        file_path (str): Path to the JSONL file.

    Returns:
        List[Dict]: List of JSON objects read from the file.

    Raises:
        FileNotFoundError: If the file does not exist or is not a file.
    """
    if not os.path.exists(file_path) or not os.path.isfile(file_path):
        raise FileNotFoundError(f"File {file_path} does not exist or is not a file.")

    content = []
    with jsonlines.open(file_path, mode="r") as reader:
        for obj in reader:
            content.append(obj)
    return content

read_yaml(file_path)

Reads a YAML file and returns its content.

Parameters:

Name Type Description Default
file_path str

The path to the YAML file.

required

Returns:

Name Type Description
dict

The content of the YAML file as a dictionary. Returns None if the file is not found or parsing fails.

Raises:

Type Description
FileNotFoundError

If the file does not exist.

YAMLError

If there is an error parsing the YAML file.

Source code in rm_gallery/core/utils/file.py
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
def read_yaml(file_path):
    """
    Reads a YAML file and returns its content.

    Args:
        file_path (str): The path to the YAML file.

    Returns:
        dict: The content of the YAML file as a dictionary. Returns None if the file is not found or parsing fails.

    Raises:
        FileNotFoundError: If the file does not exist.
        yaml.YAMLError: If there is an error parsing the YAML file.
    """
    try:
        with open(file_path, "r") as file:
            return yaml.safe_load(file)
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
    except yaml.YAMLError as exc:
        print(f"Error parsing YAML file: {exc}")
    return None

split_samples(samples, ratio=0.1)

Splits a list of samples into training and testing sets.

Parameters:

Name Type Description Default
samples List[Union[dict, DataSample]]

List of samples to split.

required
ratio float

Proportion of the dataset to include in the train split. Defaults to 0.1.

0.1

Returns:

Type Description

Tuple[List[Union[dict, DataSample]], List[Union[dict, DataSample]]]: Train and test splits.

Source code in rm_gallery/core/utils/file.py
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
def split_samples(samples: List[dict | DataSample], ratio: float = 0.1):
    """
    Splits a list of samples into training and testing sets.

    Args:
        samples (List[Union[dict, DataSample]]): List of samples to split.
        ratio (float, optional): Proportion of the dataset to include in the train split. Defaults to 0.1.

    Returns:
        Tuple[List[Union[dict, DataSample]], List[Union[dict, DataSample]]]: Train and test splits.
    """
    samples = deepcopy(samples)
    random.shuffle(samples)
    train_samples = samples[: int(len(samples) * ratio)]
    test_samples = samples[int(len(samples) * ratio) :]
    return train_samples, test_samples

write_json(data, file_path, ensure_ascii=False, indent=4)

Writes data to a JSON file.

Parameters:

Name Type Description Default
data Any

Data to be written to the JSON file.

required
file_path str

Path to the output JSON file.

required
ensure_ascii bool

Whether to ensure ASCII encoding. Defaults to False.

False
indent int

Indentation level for pretty-printing. Defaults to 4.

4
Source code in rm_gallery/core/utils/file.py
34
35
36
37
38
39
40
41
42
43
44
45
def write_json(data, file_path, ensure_ascii=False, indent=4):
    """
    Writes data to a JSON file.

    Args:
        data (Any): Data to be written to the JSON file.
        file_path (str): Path to the output JSON file.
        ensure_ascii (bool, optional): Whether to ensure ASCII encoding. Defaults to False.
        indent (int, optional): Indentation level for pretty-printing. Defaults to 4.
    """
    with open(file_path, "w", encoding="utf-8") as file:
        json.dump(data, file, ensure_ascii=ensure_ascii, indent=indent)

write_jsonl(file_path, data)

Write data to jsonl.

Parameters:

Name Type Description Default
file_path str

Path to the output JSONL file.

required
data List[Dict]

Data to be written to the JSONL file.

required
Source code in rm_gallery/core/utils/file.py
71
72
73
74
75
76
77
78
79
80
81
def write_jsonl(file_path, data):
    """
    Write data to jsonl.

    Args:
        file_path (str): Path to the output JSONL file.
        data (List[Dict]): Data to be written to the JSONL file.
    """
    with jsonlines.open(file_path, mode="w") as writer:
        for item in data:
            writer.write(item)

write_raw_content(file_path, data, auto_create_dir=True, mode='w')

Writes raw text data to a file, optionally creating the directory path.

Parameters:

Name Type Description Default
file_path str

Path to the output file.

required
data List[str]

List of strings to be written line by line.

required
auto_create_dir bool

Whether to automatically create the directory if it doesn't exist. Defaults to True.

True
Source code in rm_gallery/core/utils/file.py
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def write_raw_content(file_path, data, auto_create_dir=True, mode="w"):
    """
    Writes raw text data to a file, optionally creating the directory path.

    Args:
        file_path (str): Path to the output file.
        data (List[str]): List of strings to be written line by line.
        auto_create_dir (bool, optional): Whether to automatically create the directory if it doesn't exist. Defaults to True.
    """
    dir_path = os.path.dirname(file_path)
    if auto_create_dir and not os.path.exists(dir_path):
        os.makedirs(dir_path)
    with open(file_path, mode) as f:
        for line in data:
            f.write(line)
            f.write("\n")