testtrain / app.py
cheberle's picture
f
b7b3996
raw
history blame
1.63 kB
import gradio as gr
import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer, TrainingArguments, Trainer, AutoModelForCausalLM
import torch
import os
# Force CPU
os.environ["CUDA_VISIBLE_DEVICES"] = ""
def train_model(file, hf_token):
try:
# Basic data loading test
df = pd.read_csv(file.name)
print(f"Loaded CSV with {len(df)} rows")
# Load tokenizer and model
model_name = "facebook/opt-125m"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
low_cpu_mem_usage=True,
torch_dtype=torch.float32
)
# Basic dataset creation
dataset = Dataset.from_pandas(df)
args = TrainingArguments(
output_dir="./results",
per_device_train_batch_size=1,
num_train_epochs=1,
no_cuda=True,
local_rank=-1,
use_cpu=True
)
trainer = Trainer(
model=model,
args=args,
train_dataset=dataset,
tokenizer=tokenizer
)
return f"Setup successful! Loaded {len(df)} rows"
except Exception as e:
return f"Error: {str(e)}\nType: {type(e)}"
demo = gr.Interface(
fn=train_model,
inputs=[
gr.File(label="Upload CSV file"),
gr.Textbox(label="HF Token", type="password")
],
outputs="text",
title="Product Classifier Training (CPU)",
)
if __name__ == "__main__":
demo.launch(debug=True)