Spaces:
Runtime error
Runtime error
from transformers import RobertaForSequenceClassification, RobertaTokenizer, Trainer, TrainingArguments | |
from datasets import Dataset | |
import pandas as pd | |
import torch | |
# Load the dataset | |
df = pd.read_csv("processed_step3.csv") | |
# Prepare the dataset for Hugging Face | |
def preprocess_data(row): | |
return {"text": row["full_text"], "labels": row["narratives"]} | |
# Create a Dataset object | |
hf_dataset = Dataset.from_pandas(df).map(preprocess_data) | |
# Load pre-trained tokenizer and model | |
tokenizer = RobertaTokenizer.from_pretrained("roberta-base") | |
model = RobertaForSequenceClassification.from_pretrained( | |
"roberta-base", num_labels=len(set(df["narratives"]))) | |
# Tokenize the data | |
def tokenize_function(examples): | |
return tokenizer(examples["text"], padding="max_length", truncation=True) | |
hf_dataset = hf_dataset.map(tokenize_function, batched=True) | |
# Set Hugging Face TrainingArguments | |
training_args = TrainingArguments( | |
output_dir="./results", | |
evaluation_strategy="epoch", | |
save_strategy="epoch", | |
per_device_train_batch_size=8, | |
num_train_epochs=3, | |
load_best_model_at_end=True, | |
logging_dir="./logs", | |
logging_steps=10, | |
push_to_hub=True, # Push to Hugging Face Model Hub | |
hub_model_id="eerrffuunn/semeval-task" | |
) | |
# Trainer for training the model | |
trainer = Trainer( | |
model=model, | |
args=training_args, | |
train_dataset=hf_dataset["train"], | |
eval_dataset=hf_dataset["validation"], | |
tokenizer=tokenizer | |
) | |
# Train the model | |
trainer.train() | |
# Save the model and tokenizer | |
trainer.save_model("semeval_model") | |
tokenizer.save_pretrained("semeval_model") | |