|
import random |
|
import gradio as gr |
|
from transformers import AutoTokenizer, pipeline, T5ForConditionalGeneration |
|
|
|
model_name = "versae/byt5-base-finetuned-modernisa" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = T5ForConditionalGeneration.from_pretrained(model_name) |
|
|
|
def modernisa(lines=None, file_obj=None): |
|
is_file_valid = False |
|
if file_obj: |
|
is_file_valid = file_obj.name.endswith(".txt") |
|
if is_file_valid: |
|
with open(file_obj.name) as file: |
|
lines = file.read() |
|
generated_text = [] |
|
if lines: |
|
lines = [line for line in lines.strip().split('\n') if line.strip()] |
|
text2text_generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer) |
|
outputs = [] |
|
for line in lines: |
|
outputs += text2text_generator([line], max_length=150) |
|
generated_text = [output["generated_text"] for output in outputs] |
|
if is_file_valid and file_obj: |
|
output_file = f"{file_obj.name.rsplit('.', 1)[0]}_modernized.txt" |
|
else: |
|
output_file = "modernized.txt" |
|
with open(output_file, "w") as output: |
|
output.write("\n".join(generated_text)) |
|
return "<br/>".join(generated_text), output_file |
|
|
|
|
|
samples = [ |
|
"""Otra vez, Don Iuan, me dad, |
|
y otras mil vezes los braços. |
|
Otra, y otras mil sean lazos |
|
de nuestra antigua amistad. |
|
Como venis? |
|
Yo me siento |
|
tan alegre, tan vfano, |
|
tan venturoso, tan vano, |
|
que no podrà el pensamiento |
|
encareceros jamàs |
|
las venturas que posseo, |
|
porque el pensamiento creo""" |
|
] |
|
|
|
gr.Interface( |
|
fn=modernisa, |
|
inputs=[ |
|
gr.inputs.Textbox(lines=12, label="Enter Spanish Golden Age text", default=random.choice(samples)), |
|
gr.inputs.File(file_count="single", label="Or upload a plain text file (.txt)", type="file", optional=True), |
|
], |
|
outputs=[ |
|
gr.outputs.HTML(label="Modern Spanish"), |
|
gr.outputs.File(label="Download file"), |
|
] |
|
).launch(inline=False) |
|
|