|
from huggingface_hub import from_pretrained_fastai |
|
import gradio as gr |
|
|
|
|
|
import torch |
|
from transformers import pipeline |
|
from transformers import Seq2SeqTrainer, AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, DataCollatorForSeq2Seq |
|
from transformers import AutoTokenizer |
|
|
|
|
|
|
|
repo_id = "islasher/mbart-spanishToQuechua" |
|
|
|
|
|
|
|
|
|
|
|
|
|
nombre_modelo = 'islasher/mbart-spanishToQuechua' |
|
|
|
|
|
|
|
model_checkpoint = "facebook/mbart-large-50" |
|
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) |
|
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint) |
|
|
|
|
|
|
|
from transformers import DataCollatorForSeq2Seq |
|
|
|
data_collator = DataCollatorForSeq2Seq(tokenizer) |
|
|
|
|
|
|
|
import numpy as np |
|
|
|
import evaluate |
|
|
|
metric = evaluate.load("sacrebleu") |
|
|
|
def postprocess_text(preds, labels): |
|
preds = [pred.strip() for pred in preds] |
|
labels = [[label.strip()] for label in labels] |
|
|
|
return preds, labels |
|
|
|
def compute_metrics(eval_preds): |
|
preds, labels = eval_preds |
|
if isinstance(preds, tuple): |
|
preds = preds[0] |
|
decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True) |
|
|
|
labels = np.where(labels != -100, labels, tokenizer.pad_token_id) |
|
decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True) |
|
|
|
decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels) |
|
|
|
result = metric.compute(predictions=decoded_preds, references=decoded_labels) |
|
result = {"bleu": result["score"]} |
|
|
|
prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds] |
|
result["gen_len"] = np.mean(prediction_lens) |
|
result = {k: round(v, 4) for k, v in result.items()} |
|
return result |
|
|
|
|
|
|
|
from transformers import pipeline |
|
neutralizer = pipeline('text2text-generation', model='islasher/mbart-spanishToQuechua') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gr.Interface(fn=neutralizer, inputs="text", outputs="text").launch(share=False) |
|
|