lossLopes commited on
Commit
135d3c0
·
1 Parent(s): 036fae7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -1
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import gradio as gr
2
  import nltk
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
 
@@ -56,3 +56,65 @@ iface = gr.Interface(
56
 
57
  # Launch the Gradio Interface
58
  iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """import gradio as gr
2
  import nltk
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
 
 
56
 
57
  # Launch the Gradio Interface
58
  iface.launch()
59
+ """
60
+ import gradio as gr
61
+ import nltk
62
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
63
+
64
+ nltk.download('punkt')
65
+
66
+ def fragment_text(text, tokenizer):
67
+ sentences = nltk.tokenize.sent_tokenize(text)
68
+ max_len = tokenizer.max_len_single_sentence
69
+
70
+ chunks = []
71
+ chunk = ""
72
+ count = -1
73
+
74
+ for sentence in sentences:
75
+ count += 1
76
+ combined_length = len(tokenizer.tokenize(sentence)) + len(chunk)
77
+
78
+ if combined_length <= max_len:
79
+ chunk += sentence + " "
80
+ else:
81
+ chunks.append(chunk.strip())
82
+ chunk = sentence + " "
83
+
84
+ if chunk != "":
85
+ chunks.append(chunk.strip())
86
+
87
+ return chunks
88
+
89
+
90
+ def summarize_text(text, tokenizer, model):
91
+ chunks = fragment_text(text, tokenizer)
92
+
93
+ summaries = []
94
+ for chunk in chunks:
95
+ input = tokenizer(chunk, return_tensors='pt')
96
+ output = model.generate(**input)
97
+ summary = tokenizer.decode(*output, skip_special_tokens=True)
98
+ summaries.append(summary)
99
+
100
+ final_summary = " ".join(summaries)
101
+ return final_summary
102
+
103
+ checkpoint = "tclopess/bart_samsum"
104
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
105
+ model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
106
+
107
+ def summarize_and_display(text):
108
+ summary = summarize_text(text, tokenizer, model)
109
+ return summary
110
+
111
+ iface = gr.Interface(
112
+ fn=summarize_and_display,
113
+ inputs=gr.Textbox(label="Enter text to summarize:"),
114
+ outputs=gr.Textbox(label="Summary:"),
115
+ live=True,
116
+ title="Text Summarizer with Button",
117
+ description="Click the 'Summarize' button to generate a summary of the text.",
118
+ )
119
+
120
+ iface.launch(share=True)