# Copyright 2024 ByteDance and/or its affiliates. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import gzip import json import pickle from pathlib import Path from typing import Any, Union import pandas as pd from protenix.utils.torch_utils import map_values_to_list PANDAS_NA_VALUES = [ "", "#N/A", "#N/A N/A", "#NA", "-1.#IND", "-1.#QNAN", "-NaN", "-nan", "1.#IND", "1.#QNAN", "", "N/A", # "NA", "NULL", "NaN", "n/a", "nan", "null", ] def read_indices_csv(csv: Union[str, Path]) -> pd.DataFrame: """ Read a csv file without the content changing. Args: csv (Union[str, Path]): A csv file path. Returns: pd.DataFrame : A pandas DataFrame. """ df = pd.read_csv(csv, na_values=PANDAS_NA_VALUES, keep_default_na=False, dtype=str) return df def load_gzip_pickle(pkl: Union[str, Path]) -> Any: """ Load a gzip pickle file. Args: pkl (Union[str, Path]): A gzip pickle file path. Returns: Any: The loaded data. """ with gzip.open(pkl, "rb") as f: data = pickle.load(f) return data def dump_gzip_pickle(data: Any, pkl: Union[str, Path]): """ Dump a gzip pickle file. Args: data (Any): The data to be dumped. pkl (Union[str, Path]): A gzip pickle file path. """ with gzip.open(pkl, "wb") as f: pickle.dump(data, f) def save_json(data: dict, output_fpath: Union[str, Path], indent: int = 4): """ Save a dictionary to a JSON file. Args: data (dict): The dictionary to be saved. output_fpath (Union[str, Path]): The output file path. indent (int, optional): The indentation level for the JSON file. Defaults to 4. """ data_json = data.copy() data_json = map_values_to_list(data_json) with open(output_fpath, "w") as f: if indent is not None: json.dump(data_json, f, indent=indent) else: json.dump(data_json, f)