|
import os.path |
|
import time |
|
from pathlib import Path |
|
from typing import Callable, Optional, Tuple |
|
|
|
import pandas as pd |
|
from datasets import Dataset |
|
from optimum.onnxruntime import ( |
|
ORTModelForSequenceClassification, |
|
ORTOptimizer, |
|
ORTQuantizer, |
|
) |
|
from optimum.onnxruntime.configuration import ( |
|
AutoCalibrationConfig, |
|
AutoOptimizationConfig, |
|
AutoQuantizationConfig, |
|
) |
|
from optimum.pipelines import pipeline as opt_pipeline |
|
from pynvml import nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo, nvmlInit |
|
from sklearn.metrics import roc_auc_score |
|
from transformers import AutoTokenizer, PreTrainedModel, PreTrainedTokenizer, pipeline |
|
from transformers.pipelines.base import KeyDataset |
|
|
|
from detoxify.detoxify import load_checkpoint |
|
|
|
|
|
def get_gpu_utilization() -> int: |
|
nvmlInit() |
|
handle = nvmlDeviceGetHandleByIndex(0) |
|
info = nvmlDeviceGetMemoryInfo(handle) |
|
return info.used // 1024**2 |
|
|
|
|
|
def load_data(base_path: Path, nrows: Optional[int] = None) -> pd.DataFrame: |
|
labels_path = base_path / "test_labels.csv" |
|
test_path = base_path / "test.csv" |
|
|
|
labels_df = pd.read_csv(labels_path, index_col=0, nrows=nrows) |
|
test_df = pd.read_csv(test_path, index_col=0, nrows=nrows) |
|
|
|
test_df["label"] = labels_df |
|
return test_df |
|
|
|
|
|
def get_toxicity(result): |
|
return list(filter(lambda r: r["label"] == "toxicity", result))[0]["score"] |
|
|
|
|
|
def evaluate_devices(data_path: Path, evaluate_model_fn: Callable, **kwargs): |
|
small_df = load_data(data_path, nrows=1000) |
|
cpu_eval = evaluate_model_fn("cpu", small_df, **kwargs) |
|
|
|
big_df = load_data(data_path) |
|
gpu_eval = evaluate_model_fn("cuda:0", big_df, **kwargs) |
|
|
|
return { |
|
"scores": gpu_eval["scores"], |
|
"samples_per_second_cpu": len(small_df) / cpu_eval["time_seconds"], |
|
"samples_per_second_gpu": len(big_df) / gpu_eval["time_seconds"], |
|
"gpu_memory_mb": gpu_eval["gpu_memory_mb"], |
|
} |
|
|
|
|
|
def evaluate_pipeline(pipe, df): |
|
results = pipe( |
|
KeyDataset(Dataset.from_pandas(df), "content"), |
|
top_k=None, |
|
batch_size=4, |
|
padding="longest", |
|
truncation=True, |
|
) |
|
t1 = time.time() |
|
toxicity_pred = pd.Series(map(get_toxicity, results), index=df.index) |
|
t2 = time.time() |
|
|
|
scores = { |
|
"all": roc_auc_score(df.label, toxicity_pred), |
|
} |
|
languages = ["it", "fr", "ru", "pt", "es", "tr"] |
|
for lang in languages: |
|
idx = df.lang == lang |
|
scores[lang] = roc_auc_score(df[idx].label, toxicity_pred[idx]) |
|
|
|
return { |
|
"scores": scores, |
|
"time_seconds": t2 - t1, |
|
"gpu_memory_mb": get_gpu_utilization(), |
|
} |
|
|
|
|
|
def load_original_model(device: str) -> Tuple[PreTrainedModel, PreTrainedTokenizer]: |
|
model, tokenizer, class_names = load_checkpoint( |
|
model_type="multilingual", device=device |
|
) |
|
identity_classes = [ |
|
"male", |
|
"female", |
|
"homosexual_gay_or_lesbian", |
|
"christian", |
|
"jewish", |
|
"muslim", |
|
"black", |
|
"white", |
|
"psychiatric_or_mental_illness", |
|
] |
|
model.config.id2label = {n: c for n, c in enumerate(class_names + identity_classes)} |
|
model.config.label2id = {c: n for n, c in enumerate(class_names + identity_classes)} |
|
|
|
return model, tokenizer |
|
|
|
|
|
def evaluate_original_model(device: str, test_df: pd.DataFrame): |
|
model, tokenizer = load_original_model(device) |
|
|
|
pipe = pipeline( |
|
model=model, |
|
task="text-classification", |
|
tokenizer=tokenizer, |
|
function_to_apply="sigmoid", |
|
device=device, |
|
) |
|
|
|
return evaluate_pipeline(pipe, test_df) |
|
|
|
|
|
def save_original_model(base_path: Path = Path(".")): |
|
model, tokenizer = load_original_model("cpu") |
|
pipe = pipeline( |
|
model=model, |
|
task="text-classification", |
|
tokenizer=tokenizer, |
|
function_to_apply="sigmoid", |
|
) |
|
pipe.save_pretrained(base_path) |
|
|
|
|
|
def evaluate_ort_model(device: str, test_df: pd.DataFrame, base_path: Path = Path(".")): |
|
model = ORTModelForSequenceClassification.from_pretrained(base_path, export=True) |
|
tokenizer = AutoTokenizer.from_pretrained(base_path, device=device) |
|
|
|
pipe = opt_pipeline( |
|
model=model, |
|
task="text-classification", |
|
tokenizer=tokenizer, |
|
function_to_apply="sigmoid", |
|
device=device, |
|
accelerator="ort", |
|
) |
|
|
|
return evaluate_pipeline(pipe, test_df) |
|
|
|
|
|
def evaluate_ort_optimize_model( |
|
device: str, test_df: pd.DataFrame, base_path: Path = Path(".") |
|
): |
|
tokenizer = AutoTokenizer.from_pretrained(base_path, device=device) |
|
|
|
if not os.path.exists(base_path / "model_optimized.onnx"): |
|
model = ORTModelForSequenceClassification.from_pretrained( |
|
base_path, export=True |
|
) |
|
|
|
oconfig = AutoOptimizationConfig.O4() |
|
optimizer = ORTOptimizer.from_pretrained(model) |
|
optimizer.optimize( |
|
save_dir=base_path, |
|
optimization_config=oconfig, |
|
) |
|
|
|
model = ORTModelForSequenceClassification.from_pretrained( |
|
base_path, file_name="model_optimized.onnx" |
|
) |
|
pipe = opt_pipeline( |
|
model=model, |
|
task="text-classification", |
|
function_to_apply="sigmoid", |
|
device=device, |
|
accelerator="ort", |
|
tokenizer=tokenizer, |
|
) |
|
|
|
return evaluate_pipeline(pipe, test_df) |
|
|
|
|
|
def evaluate_ort_quantize_model( |
|
device: str, |
|
test_df: pd.DataFrame, |
|
base_path: Path = Path("."), |
|
overwrite: bool = False, |
|
): |
|
tokenizer = AutoTokenizer.from_pretrained(base_path, device=device) |
|
|
|
if overwrite or not os.path.exists(base_path / "model_quantized.onnx"): |
|
model = ORTModelForSequenceClassification.from_pretrained( |
|
base_path, export=True |
|
) |
|
qconfig = AutoQuantizationConfig.avx2(is_static=True, per_channel=False) |
|
quantizer = ORTQuantizer.from_pretrained(model) |
|
|
|
def preprocess_fn(ex): |
|
return tokenizer(ex["content"]) |
|
|
|
|
|
calibration_dataset = ( |
|
Dataset.from_pandas(test_df) |
|
.map(preprocess_fn) |
|
.select_columns(["input_ids", "attention_mask"]) |
|
) |
|
calibration_config = AutoCalibrationConfig.minmax(calibration_dataset) |
|
ranges = quantizer.fit( |
|
dataset=calibration_dataset, |
|
calibration_config=calibration_config, |
|
operators_to_quantize=qconfig.operators_to_quantize, |
|
) |
|
|
|
quantizer.quantize( |
|
save_dir=base_path, |
|
quantization_config=qconfig, |
|
calibration_tensors_range=ranges, |
|
) |
|
|
|
model = ORTModelForSequenceClassification.from_pretrained( |
|
base_path, |
|
file_name="model_quantized.onnx", |
|
foo="bar", |
|
) |
|
pipe = opt_pipeline( |
|
model=model, |
|
task="text-classification", |
|
function_to_apply="sigmoid", |
|
device=device, |
|
accelerator="ort", |
|
tokenizer=tokenizer, |
|
) |
|
|
|
return evaluate_pipeline(pipe, test_df) |
|
|
|
|
|
if __name__ == "__main__": |
|
import argparse |
|
|
|
parser = argparse.ArgumentParser() |
|
parser.add_argument( |
|
"data_path", |
|
type=str, |
|
help="Path to jigsaw multilingual toxic comment data. " |
|
'For example: "jigsaw_data/jigsaw-multilingual-toxic-comment-classification"', |
|
) |
|
parser.add_argument( |
|
"--models_path", |
|
type=str, |
|
default=".", |
|
help="Path to model weights directory (root of the repo)", |
|
) |
|
parser.add_argument( |
|
"model", type=str, help="Model to evaluate (original, ort, optimized, quantized)." |
|
) |
|
|
|
args = parser.parse_args() |
|
|
|
data = Path(args.data_path) |
|
models_p = Path(args.models_path) |
|
|
|
if args.model == "original": |
|
print(evaluate_devices(data, evaluate_original_model)) |
|
elif args.model == "ort": |
|
print(evaluate_devices(data, evaluate_ort_model, base_path=models_p)) |
|
elif args.model == "optimized": |
|
print(evaluate_devices(data, evaluate_ort_optimize_model, base_path=models_p)) |
|
elif args.model == "quantized": |
|
print(evaluate_devices(data, evaluate_ort_quantize_model, base_path=models_p)) |
|
else: |
|
raise ValueError(f"Invalid model received: {args.model!r}") |
|
|