lossLopes commited on
Commit
f521d1e
·
1 Parent(s): 784e681

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -15
app.py CHANGED
@@ -60,38 +60,76 @@ iface = gr.Interface(
60
 
61
  iface.launch(share=True)
62
  """
 
63
  import gradio as gr
64
  import nltk
65
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
66
 
67
  nltk.download('punkt')
68
 
 
69
  def fragment_text(text, tokenizer):
70
- # Your existing code for text fragmentation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  def summarize_text(text, tokenizer, model):
73
- # Your existing code for text summarization
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  checkpoint = "tclopess/bart_samsum"
76
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
77
  model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
78
 
79
- def summarize_with_button(text):
80
- return summarize_text(text, tokenizer, model)
 
 
 
 
 
 
 
 
81
 
82
  iface = gr.Interface(
83
- fn=summarize_with_button,
84
- inputs=gr.Textbox(label="Enter text to summarize:"),
 
 
 
85
  outputs=gr.Textbox(label="Summary:"),
 
86
  title="Text Summarizer with Button",
87
  )
88
 
89
- def summarize_text_button():
90
- iface.launch(share=True)
91
-
92
- gr.Interface(
93
- summarize_text_button,
94
- inputs=None,
95
- outputs=None,
96
- title="Click to Summarize"
97
- ).launch()
 
60
 
61
  iface.launch(share=True)
62
  """
63
+
64
  import gradio as gr
65
  import nltk
66
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
67
 
68
  nltk.download('punkt')
69
 
70
+
71
  def fragment_text(text, tokenizer):
72
+ sentences = nltk.tokenize.sent_tokenize(text)
73
+ max_len = tokenizer.max_len_single_sentence
74
+
75
+ chunks = []
76
+ chunk = ""
77
+ count = -1
78
+
79
+ for sentence in sentences:
80
+ count += 1
81
+ combined_length = len(tokenizer.tokenize(sentence)) + len(chunk)
82
+
83
+ if combined_length <= max_len:
84
+ chunk += sentence + " "
85
+ else:
86
+ chunks.append(chunk.strip())
87
+ chunk = sentence + " "
88
+
89
+ if chunk != "":
90
+ chunks.append(chunk.strip())
91
+
92
+ return chunks
93
+
94
 
95
  def summarize_text(text, tokenizer, model):
96
+ chunks = fragment_text(text, tokenizer)
97
+
98
+ summaries = []
99
+ for chunk in chunks:
100
+ input = tokenizer(chunk, return_tensors='pt')
101
+ output = model.generate(**input)
102
+ summary = tokenizer.decode(*output, skip_special_tokens=True)
103
+ summaries.append(summary)
104
+
105
+ final_summary = " ".join(summaries)
106
+ return final_summary
107
+
108
 
109
  checkpoint = "tclopess/bart_samsum"
110
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
111
  model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
112
 
113
+
114
+ def summarize_and_display(text):
115
+ summary = summarize_text(text, tokenizer, model)
116
+ return summary
117
+
118
+
119
+ def start_summarization(text):
120
+ summary = summarize_and_display(text)
121
+ gr.update(summary)
122
+
123
 
124
  iface = gr.Interface(
125
+ fn=start_summarization,
126
+ inputs=[
127
+ gr.Textbox(label="Enter text to summarize:"),
128
+ gr.Button(label="Summarize"),
129
+ ],
130
  outputs=gr.Textbox(label="Summary:"),
131
+ live=True,
132
  title="Text Summarizer with Button",
133
  )
134
 
135
+ iface.launch(share=True)