import random import gradio as gr from transformers import AutoTokenizer, pipeline, T5ForConditionalGeneration model_name = "versae/byt5-base-finetuned-modernisa" # "versae/modernisa-pre" tokenizer = AutoTokenizer.from_pretrained(model_name) model = T5ForConditionalGeneration.from_pretrained(model_name) def modernisa(lines=None, file_obj=None): is_file_valid = False if file_obj: is_file_valid = file_obj.name.endswith(".txt") if is_file_valid: with open(file_obj.name) as file: lines = file.read() generated_text = [] if lines: lines = [line for line in lines.strip().split('\n') if line.strip()] text2text_generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer) outputs = [] for line in lines: outputs += text2text_generator([line], max_length=150) generated_text = [output["generated_text"] for output in outputs] if is_file_valid and file_obj: output_file = f"{file_obj.name.rsplit('.', 1)[0]}_modernized.txt" else: output_file = "modernized.txt" with open(output_file, "w") as output: output.write("\n".join(generated_text)) return "
".join(generated_text), output_file samples = [ """Otra vez, Don Iuan, me dad, y otras mil vezes los braços. Otra, y otras mil sean lazos de nuestra antigua amistad. Como venis? Yo me siento tan alegre, tan vfano, tan venturoso, tan vano, que no podrà el pensamiento encareceros jamàs las venturas que posseo, porque el pensamiento creo""" ] gr.Interface( fn=modernisa, inputs=[ gr.inputs.Textbox(lines=12, label="Enter Spanish Golden Age text", default=random.choice(samples)), gr.inputs.File(file_count="single", label="Or upload a plain text file (.txt)", type="file", optional=True), ], outputs=[ gr.outputs.HTML(label="Modern Spanish"), gr.outputs.File(label="Download file"), ] ).launch(inline=False)