|
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments |
|
import pandas as pd |
|
from datasets import Dataset |
|
from transformers import BertTokenizerFast |
|
|
|
|
|
data = [ |
|
{"text": "¿Qué hamburguesas tienen?", "label": 0}, |
|
{"text": "¿Qué combos con papas tienen?", "label": 1}, |
|
{"text": "¿Qué tipos de refrescos tienen?", "label": 2}, |
|
{"text": "¿Cómo hago una orden?", "label": 3} |
|
] |
|
|
|
df = pd.DataFrame(data) |
|
dataset = Dataset.from_pandas(df) |
|
|
|
|
|
|
|
|
|
tokenizer = BertTokenizerFast.from_pretrained("Sebastian2903/SMARTORDERIA") |
|
|
|
|
|
def preprocess_function(examples): |
|
return tokenizer(examples['text'], truncation=True, padding=True) |
|
|
|
tokenized_dataset = dataset.map(preprocess_function, batched=True) |
|
|
|
|
|
train_test_split = tokenized_dataset.train_test_split(test_size=0.1) |
|
train_dataset = train_test_split['train'] |
|
eval_dataset = train_test_split['test'] |
|
|
|
|
|
model = AutoModelForSequenceClassification.from_pretrained("Sebastian2903/SMARTORDERIA", num_labels=4) |
|
|
|
|
|
training_args = TrainingArguments( |
|
output_dir='./results', |
|
evaluation_strategy="epoch", |
|
per_device_train_batch_size=16, |
|
per_device_eval_batch_size=16, |
|
num_train_epochs=3, |
|
weight_decay=0.01, |
|
) |
|
|
|
trainer = Trainer( |
|
model=model, |
|
args=training_args, |
|
train_dataset=train_dataset, |
|
eval_dataset=eval_dataset, |
|
) |
|
|
|
|
|
trainer.train() |
|
|
|
|
|
model.save_pretrained("Sebastian2903/SMARTORDERIA") |
|
tokenizer.save_pretrained("Sebastian2903/SMARTORDERIA") |
|
|