File size: 2,494 Bytes
dcc31e0 265e009 dcc31e0 265e009 5827b0b 265e009 5827b0b 265e009 dcc31e0 265e009 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
from huggingface_hub import from_pretrained_fastai
import gradio as gr
# from fastai.vision.all import *
# from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
from transformers import pipeline
from transformers import Seq2SeqTrainer, AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, DataCollatorForSeq2Seq
from transformers import AutoTokenizer
# repo_id = "YOUR_USERNAME/YOUR_LEARNER_NAME"
repo_id = "islasher/mbart-spanishToQuechua"
# Definimos una función que se encarga de llevar a cabo las predicciones
# Cargar el modelo y el tokenizador
nombre_modelo = 'islasher/mbart-spanishToQuechua'
#tokenizer = AutoTokenizer.from_pretrained(nombre_modelo)
model_checkpoint = "facebook/mbart-large-50"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)
from transformers import DataCollatorForSeq2Seq
data_collator = DataCollatorForSeq2Seq(tokenizer) #para preparar los datos
import numpy as np
import evaluate
metric = evaluate.load("sacrebleu")
def postprocess_text(preds, labels):
preds = [pred.strip() for pred in preds]
labels = [[label.strip()] for label in labels]
return preds, labels
def compute_metrics(eval_preds):
preds, labels = eval_preds
if isinstance(preds, tuple):
preds = preds[0]
decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)
result = metric.compute(predictions=decoded_preds, references=decoded_labels)
result = {"bleu": result["score"]}
prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
result["gen_len"] = np.mean(prediction_lens)
result = {k: round(v, 4) for k, v in result.items()}
return result
from transformers import pipeline
neutralizer = pipeline('text2text-generation', model='islasher/mbart-spanishToQuechua')
#CAMBIAR LO QUE SE RETORNA Y PONER LO DEL DECODER.
# def predict(frase):
# inputs = tokenizer(frase, return_tensors="pt")
# outputs = model(**inputs)
# trad = tokenizer.decode(outputs[0], skip_special_tokens=True)
# return trad
# Creamos la interfaz y la lanzamos.
gr.Interface(fn=neutralizer, inputs="text", outputs="text").launch(share=False)
|