Spaces:
Build error
Build error
File size: 3,340 Bytes
3860729 07320d0 3860729 c73d190 3860729 a69b127 a37d279 3860729 a69b127 a37d279 a69b127 3860729 c73d190 3860729 c73d190 3860729 07320d0 3860729 c73d190 3860729 c73d190 3860729 c73d190 3860729 c73d190 a69b127 a37d279 c73d190 3860729 c73d190 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import os
import sys
import torch
from dotenv import find_dotenv, load_dotenv
found_dotenv = find_dotenv(".env")
if len(found_dotenv) == 0:
found_dotenv = find_dotenv(".env.example")
print(f"loading env vars from: {found_dotenv}")
load_dotenv(found_dotenv, override=False)
path = os.path.dirname(found_dotenv)
print(f"Adding {path} to sys.path")
sys.path.append(path)
from llm_toolkit.llm_utils import *
from llm_toolkit.translation_utils import *
device = check_gpu()
is_cuda = torch.cuda.is_available()
model_name = os.getenv("MODEL_NAME")
adapter_name_or_path = os.getenv("ADAPTER_NAME_OR_PATH")
load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
data_path = os.getenv("DATA_PATH")
results_path = os.getenv("RESULTS_PATH")
batch_size = int(os.getenv("BATCH_SIZE", 1))
use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 2048))
print(
model_name,
adapter_name_or_path,
load_in_4bit,
data_path,
results_path,
use_english_datasets,
max_new_tokens,
batch_size,
)
if is_cuda:
torch.cuda.empty_cache()
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"(0) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")
torch.cuda.empty_cache()
model, tokenizer = load_model(
model_name, load_in_4bit=load_in_4bit, adapter_name_or_path=adapter_name_or_path
)
if is_cuda:
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"(2) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")
datasets = load_translation_dataset(data_path, tokenizer)
if len(sys.argv) > 1:
num = int(sys.argv[1])
if num > 0:
print(f"--- evaluating {num} entries")
datasets["test"] = datasets["test"].select(range(num))
print_row_details(datasets["test"].to_pandas(), indices=[0, -1])
def on_repetition_penalty_step_completed(model_name, predictions):
save_results(
model_name,
results_path,
datasets["test"],
predictions,
)
metrics = calc_metrics(datasets["test"]["english"], predictions, debug=True)
print(f"{model_name} metrics: {metrics}")
if adapter_name_or_path is not None:
model_name += "/" + adapter_name_or_path.split("/")[-1]
evaluate_model_with_repetition_penalty(
model,
tokenizer,
model_name,
datasets["test"],
on_repetition_penalty_step_completed,
start_repetition_penalty=1.0,
end_repetition_penalty=1.3,
step_repetition_penalty=0.02,
batch_size=batch_size,
max_new_tokens=max_new_tokens,
device=device,
)
if is_cuda:
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"(3) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")
|