from huggingface_hub import from_pretrained_fastai import gradio as gr # from fastai.vision.all import * # from transformers import AutoModelForSequenceClassification, AutoTokenizer import torch from transformers import pipeline from transformers import Seq2SeqTrainer, AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, DataCollatorForSeq2Seq from transformers import AutoTokenizer # repo_id = "YOUR_USERNAME/YOUR_LEARNER_NAME" repo_id = "islasher/mbart-spanishToQuechua" # Definimos una funciĆ³n que se encarga de llevar a cabo las predicciones # Cargar el modelo y el tokenizador nombre_modelo = 'islasher/mbart-spanishToQuechua' #tokenizer = AutoTokenizer.from_pretrained(nombre_modelo) model_checkpoint = "facebook/mbart-large-50" tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint) from transformers import DataCollatorForSeq2Seq data_collator = DataCollatorForSeq2Seq(tokenizer) #para preparar los datos import numpy as np import evaluate metric = evaluate.load("sacrebleu") def postprocess_text(preds, labels): preds = [pred.strip() for pred in preds] labels = [[label.strip()] for label in labels] return preds, labels def compute_metrics(eval_preds): preds, labels = eval_preds if isinstance(preds, tuple): preds = preds[0] decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True) labels = np.where(labels != -100, labels, tokenizer.pad_token_id) decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True) decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels) result = metric.compute(predictions=decoded_preds, references=decoded_labels) result = {"bleu": result["score"]} prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds] result["gen_len"] = np.mean(prediction_lens) result = {k: round(v, 4) for k, v in result.items()} return result from transformers import pipeline neutralizer = pipeline('text2text-generation', model='islasher/mbart-spanishToQuechua') #CAMBIAR LO QUE SE RETORNA Y PONER LO DEL DECODER. # def predict(frase): # inputs = tokenizer(frase, return_tensors="pt") # outputs = model(**inputs) # trad = tokenizer.decode(outputs[0], skip_special_tokens=True) # return trad # Creamos la interfaz y la lanzamos. gr.Interface(fn=neutralizer, inputs="text", outputs="text").launch(share=False)