File size: 2,903 Bytes
b366450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed90147
 
b366450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc362f4
ed90147
b366450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed90147
 
b366450
 
 
 
 
 
 
 
 
 
 
 
 
d4e5a7f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import gradio as gr
import re

from gradio.mix import Parallel
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
)

def clean_text(text):
    text = text.encode("ascii", errors="ignore").decode(
        "ascii"
    )  # remove non-ascii, Chinese characters
    text = re.sub(r"\n", " ", text)
    text = re.sub(r"\n\n", " ", text)
    text = re.sub(r"\t", " ", text)
    text = text.strip(" ")
    text = re.sub(
        " +", " ", text
    ).strip()  # get rid of multiple spaces and replace with a single
    return text

modchoice_1 = "chinhon/headline_writer"

def headline_writer1(text):
    input_text = clean_text(text)

    tokenizer_1 = AutoTokenizer.from_pretrained(modchoice_1)

    model_1 = AutoModelForSeq2SeqLM.from_pretrained(modchoice_1)

    with tokenizer_1.as_target_tokenizer():
        batch = tokenizer_1(
            input_text, truncation=True, padding="longest", return_tensors="pt"
        )

    translated = model_1.generate(**batch)

    summary_1 = tokenizer_1.batch_decode(translated, skip_special_tokens=True)

    return summary_1[0]


headline1 = gr.Interface(
    fn=headline_writer1,
    inputs=gr.Textbox(),
    outputs=gr.Textbox(label=""),
)


modchoice_2 = "chinhon/pegasus-multi_news-headline"

def headline_writer2(text):
    input_text = clean_text(text)

    tokenizer_2 = AutoTokenizer.from_pretrained(modchoice_2)

    model_2 = AutoModelForSeq2SeqLM.from_pretrained(modchoice_2)

    with tokenizer_2.as_target_tokenizer():
        batch = tokenizer_2(
            input_text, truncation=True, padding="longest", return_tensors="pt"
        )

    translated = model_2.generate(**batch)

    summary_2 = tokenizer_2.batch_decode(translated, skip_special_tokens=True)

    return summary_2[0]


headline2 = gr.Interface(
    fn=headline_writer2,
    inputs=gr.Textbox(lines=50),
    outputs=gr.Textbox(label=""),
)


modchoice_3 = "chinhon/pegasus-newsroom-headline_writer"

def headline_writer3(text):
    input_text = clean_text(text)

    tokenizer_3 = AutoTokenizer.from_pretrained(modchoice_3)

    model_3 = AutoModelForSeq2SeqLM.from_pretrained(modchoice_3)

    with tokenizer_3.as_target_tokenizer():
        batch = tokenizer_3(
            input_text, truncation=True, padding="longest", return_tensors="pt"
        )

    translated = model_3.generate(**batch)

    summary_3 = tokenizer_3.batch_decode(
        translated, skip_special_tokens=True, max_length=100
    )

    return summary_3[0]


headline3 = gr.Interface(
    fn=headline_writer3,
    inputs=gr.Textbox(),
    outputs=gr.Textbox(label=""),
)


Parallel(
    headline1,
    headline2,
    headline3,
    title="AI Headlines Generator",
    inputs=gr.inputs.Textbox(
        lines=20,
        label="Paste the first few paragraphs of your story here, and choose from 3 suggested headlines",
    ),
    theme="darkhuggingface",
).launch(enable_queue=True)