Spaces:

peft-internal-testing
/

PEFT-method-comparison

Running

File size: 5,378 Bytes

a76607e

# Copyright 2025-present the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Data processing used for analyzing and presenting the results"""

import json
import os

import pandas as pd


def preprocess(rows, task_name: str, print_fn=print):
    results = []
    skipped = 0
    for row in rows:
        run_info = row["run_info"]
        train_info = row["train_info"]
        meta_info = row["meta_info"]
        if run_info["peft_config"]:
            peft_type = run_info["peft_config"]["peft_type"]
        else:
            peft_type = "full-finetuning"
        if train_info["status"] != "success":
            skipped += 1
            continue

        train_metrics = train_info["metrics"][-1]

        # extract the fields that make most sense
        dct = {
            "task_name": task_name,
            "experiment_name": run_info["experiment_name"],
            "model_id": run_info["train_config"]["model_id"],
            "train_config": run_info["train_config"],
            "peft_type": peft_type,
            "peft_config": run_info["peft_config"],
            "cuda_memory_reserved_avg": train_info["cuda_memory_reserved_avg"],
            "cuda_memory_max": train_info["cuda_memory_max"],
            "cuda_memory_reserved_99th": train_info["cuda_memory_reserved_99th"],
            "total_time": run_info["total_time"],
            "train_time": train_info["train_time"],
            "file_size": train_info["file_size"],
            "test_accuracy": train_metrics["test accuracy"],
            "train_loss": train_metrics["train loss"],
            "train_samples": train_metrics["train samples"],
            "train_total_tokens": train_metrics["train total tokens"],
            "peft_version": meta_info["package_info"]["peft-version"],
            "peft_branch": run_info["peft_branch"],
            "transformers_version": meta_info["package_info"]["transformers-version"],
            "datasets_version": meta_info["package_info"]["datasets-version"],
            "torch_version": meta_info["package_info"]["torch-version"],
            "bitsandbytes_version": meta_info["package_info"]["bitsandbytes-version"],
            "package_info": meta_info["package_info"],
            "system_info": meta_info["system_info"],
            "created_at": run_info["created_at"],
        }
        results.append(dct)

    if skipped:
        print_fn(f"Skipped {skipped} of {len(rows)} entries because the train status != success")

    return results


def load_jsons(path):
    results = []
    for fn in os.listdir(path):
        if fn.endswith(".json"):
            with open(os.path.join(path, fn)) as f:
                row = json.load(f)
                results.append(row)
    return results


def load_df(path, task_name, print_fn=print):
    jsons = load_jsons(path)
    preprocessed = preprocess(jsons, task_name=task_name, print_fn=print_fn)
    dtype_dict = {
        "task_name": "string",
        "experiment_name": "string",
        "model_id": "string",
        "train_config": "string",
        "peft_type": "string",
        "peft_config": "string",
        "cuda_memory_reserved_avg": int,
        "cuda_memory_max": int,
        "cuda_memory_reserved_99th": int,
        "total_time": float,
        "train_time": float,
        "file_size": int,
        "test_accuracy": float,
        "train_loss": float,
        "train_samples": int,
        "train_total_tokens": int,
        "peft_version": "string",
        "peft_branch": "string",
        "transformers_version": "string",
        "datasets_version": "string",
        "torch_version": "string",
        "bitsandbytes_version": "string",
        "package_info": "string",
        "system_info": "string",
        "created_at": "string",
    }
    df = pd.DataFrame(preprocessed)
    df = df.astype(dtype_dict)
    df["created_at"] = pd.to_datetime(df["created_at"])
    # round training time to nearest second
    df["train_time"] = df["train_time"].round().astype(int)
    df["total_time"] = df["total_time"].round().astype(int)

    # reorder columns for better viewing, pinned_columns arg in Gradio seems not to work correctly
    important_columns = [
        "experiment_name",
        "peft_type",
        "total_time",
        "train_time",
        "test_accuracy",
        "train_loss",
        "cuda_memory_max",
        "cuda_memory_reserved_99th",
        "cuda_memory_reserved_avg",
        "file_size",
        "created_at",
        "task_name",
    ]
    other_columns = [col for col in df if col not in important_columns]
    df = df[important_columns + other_columns]

    size_before_drop_dups = len(df)
    columns = ["experiment_name", "model_id", "peft_type", "created_at"]
    # we want to keep only the most recent run for each experiment
    df = df.sort_values("created_at").drop_duplicates(columns, keep="last")
    return df