File size: 3,391 Bytes
784e681
5ca455e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
784e681
f521d1e
784e681
 
 
 
 
 
f521d1e
784e681
f521d1e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
784e681
 
f521d1e
 
 
 
 
 
 
 
 
 
 
 
784e681
 
 
 
 
f521d1e
7d64f09
f521d1e
 
 
 
784e681
2595b3e
f521d1e
 
2595b3e
3a8d738
f521d1e
784e681
f521d1e
784e681
 
 
f521d1e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
"""import gradio as gr
import nltk
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

nltk.download('punkt')

def fragment_text(text, tokenizer):
    sentences = nltk.tokenize.sent_tokenize(text)
    max_len = tokenizer.max_len_single_sentence

    chunks = []
    chunk = ""
    count = -1

    for sentence in sentences:
        count += 1
        combined_length = len(tokenizer.tokenize(sentence)) + len(chunk)

        if combined_length <= max_len:
            chunk += sentence + " "
        else:
            chunks.append(chunk.strip())
            chunk = sentence + " "

    if chunk != "":
        chunks.append(chunk.strip())

    return chunks


def summarize_text(text, tokenizer, model):
    chunks = fragment_text(text, tokenizer)

    summaries = []
    for chunk in chunks:
        input = tokenizer(chunk, return_tensors='pt')
        output = model.generate(**input)
        summary = tokenizer.decode(*output, skip_special_tokens=True)
        summaries.append(summary)

    final_summary = " ".join(summaries)
    return final_summary

checkpoint = "tclopess/bart_samsum"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)

def summarize_and_display(text):
    summary = summarize_text(text, tokenizer, model)
    return summary

iface = gr.Interface(
    fn=summarize_and_display,
    inputs=gr.Textbox(label="Enter text to summarize:"),
    outputs=gr.Textbox(label="Summary:"),
    live=False,  # Set live to False to add a button
    button="Summarize",  # Add a button with the label "Summarize"
    title="Text Summarizer with Button",
)

iface.launch(share=True)
"""

import gradio as gr
import nltk
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

nltk.download('punkt')


def fragment_text(text, tokenizer):
    sentences = nltk.tokenize.sent_tokenize(text)
    max_len = tokenizer.max_len_single_sentence

    chunks = []
    chunk = ""
    count = -1

    for sentence in sentences:
        count += 1
        combined_length = len(tokenizer.tokenize(sentence)) + len(chunk)

        if combined_length <= max_len:
            chunk += sentence + " "
        else:
            chunks.append(chunk.strip())
            chunk = sentence + " "

    if chunk != "":
        chunks.append(chunk.strip())

    return chunks


def summarize_text(text, tokenizer, model):
    chunks = fragment_text(text, tokenizer)

    summaries = []
    for chunk in chunks:
        input = tokenizer(chunk, return_tensors='pt')
        output = model.generate(**input)
        summary = tokenizer.decode(*output, skip_special_tokens=True)
        summaries.append(summary)

    final_summary = " ".join(summaries)
    return final_summary


checkpoint = "tclopess/bart_samsum"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)


def summarize_and_display(text, button_click_event, interface_state):
    summary = summarize_text(text, tokenizer, model)
    return summary


iface = gr.Interface(
    fn=summarize_and_display,
    inputs=[
        gr.Textbox(label="Enter text to summarize:"),
        gr.Label(label="Summarize"),
        gr.Button("Summarize"),  # Provide the label directly here
    ],
    outputs=gr.Textbox(label="Summary:"),
    live=True,
    title="Text Summarizer with Button",
)

iface.launch(share=True)