File size: 2,005 Bytes
e33fddd
68e305f
5c0d878
68e305f
312bc9a
68e305f
 
 
77f42d9
263bcbf
77f42d9
263bcbf
 
 
 
c131f81
263bcbf
 
 
 
 
 
c131f81
263bcbf
 
 
 
c131f81
 
27e7c38
68e305f
e33fddd
 
 
9328f46
 
0719bc2
 
9328f46
 
0719bc2
 
9328f46
 
 
e33fddd
9328f46
 
 
77f42d9
263bcbf
6d3ff00
77f42d9
35bb108
 
263bcbf
35bb108
c131f81
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import random
import gradio as gr
from transformers import AutoTokenizer, pipeline, T5ForConditionalGeneration

model_name = "versae/byt5-base-finetuned-modernisa"  # "versae/modernisa-pre"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

def modernisa(lines=None, file_obj=None):
    is_file_valid = False
    if file_obj:
        is_file_valid = file_obj.name.endswith(".txt")
        if is_file_valid:
            with open(file_obj.name) as file:
                lines = file.read()
    generated_text = []
    if lines:
        lines = [line for line in lines.strip().split('\n') if line.strip()]
        text2text_generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
        outputs = []
        for line in lines:
            outputs += text2text_generator([line], max_length=150)
        generated_text = [output["generated_text"] for output in outputs]
        if is_file_valid and file_obj:
            output_file = f"{file_obj.name.rsplit('.', 1)[0]}_modernized.txt"
        else:
            output_file = "modernized.txt"
    with open(output_file, "w") as output:
        output.write("\n".join(generated_text))
    return "<br/>".join(generated_text), output_file


samples = [
"""Otra vez, Don Iuan, me dad,
y otras mil vezes los braços.
Otra, y otras mil sean lazos
de nuestra antigua amistad.
Como venis?
Yo me siento
tan alegre, tan vfano,
tan venturoso, tan vano,
que no podrà el pensamiento
encareceros jamàs
las venturas que posseo,
porque el pensamiento creo"""
]

gr.Interface(
    fn=modernisa,
    inputs=[
        gr.inputs.Textbox(lines=12, label="Enter Spanish Golden Age text", default=random.choice(samples)),
        gr.inputs.File(file_count="single", label="Or upload a plain text file (.txt)", type="file", optional=True),
    ],
    outputs=[
        gr.outputs.HTML(label="Modern Spanish"),
        gr.outputs.File(label="Download file"),
    ]
).launch(inline=False)