|
"""import gradio as gr |
|
import nltk |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
|
nltk.download('punkt') |
|
|
|
def fragment_text(text, tokenizer): |
|
sentences = nltk.tokenize.sent_tokenize(text) |
|
max_len = tokenizer.max_len_single_sentence |
|
|
|
chunks = [] |
|
chunk = "" |
|
count = -1 |
|
|
|
for sentence in sentences: |
|
count += 1 |
|
combined_length = len(tokenizer.tokenize(sentence)) + len(chunk) |
|
|
|
if combined_length <= max_len: |
|
chunk += sentence + " " |
|
else: |
|
chunks.append(chunk.strip()) |
|
chunk = sentence + " " |
|
|
|
if chunk != "": |
|
chunks.append(chunk.strip()) |
|
|
|
return chunks |
|
|
|
|
|
def summarize_text(text, tokenizer, model): |
|
chunks = fragment_text(text, tokenizer) |
|
|
|
summaries = [] |
|
for chunk in chunks: |
|
input = tokenizer(chunk, return_tensors='pt') |
|
output = model.generate(**input) |
|
summary = tokenizer.decode(*output, skip_special_tokens=True) |
|
summaries.append(summary) |
|
|
|
final_summary = " ".join(summaries) |
|
return final_summary |
|
|
|
checkpoint = "tclopess/bart_samsum" |
|
tokenizer = AutoTokenizer.from_pretrained(checkpoint) |
|
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint) |
|
|
|
def summarize_and_display(text): |
|
summary = summarize_text(text, tokenizer, model) |
|
return summary |
|
|
|
iface = gr.Interface( |
|
fn=summarize_and_display, |
|
inputs=gr.Textbox(label="Enter text to summarize:"), |
|
outputs=gr.Textbox(label="Summary:"), |
|
live=False, # Set live to False to add a button |
|
button="Summarize", # Add a button with the label "Summarize" |
|
title="Text Summarizer with Button", |
|
) |
|
|
|
iface.launch(share=True) |
|
""" |
|
|
|
import gradio as gr |
|
import nltk |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
|
nltk.download('punkt') |
|
|
|
|
|
def fragment_text(text, tokenizer): |
|
sentences = nltk.tokenize.sent_tokenize(text) |
|
max_len = tokenizer.max_len_single_sentence |
|
|
|
chunks = [] |
|
chunk = "" |
|
count = -1 |
|
|
|
for sentence in sentences: |
|
count += 1 |
|
combined_length = len(tokenizer.tokenize(sentence)) + len(chunk) |
|
|
|
if combined_length <= max_len: |
|
chunk += sentence + " " |
|
else: |
|
chunks.append(chunk.strip()) |
|
chunk = sentence + " " |
|
|
|
if chunk != "": |
|
chunks.append(chunk.strip()) |
|
|
|
return chunks |
|
|
|
|
|
def summarize_text(text, tokenizer, model): |
|
chunks = fragment_text(text, tokenizer) |
|
|
|
summaries = [] |
|
for chunk in chunks: |
|
input = tokenizer(chunk, return_tensors='pt') |
|
output = model.generate(**input) |
|
summary = tokenizer.decode(*output, skip_special_tokens=True) |
|
summaries.append(summary) |
|
|
|
final_summary = " ".join(summaries) |
|
return final_summary |
|
|
|
|
|
checkpoint = "tclopess/bart_samsum" |
|
tokenizer = AutoTokenizer.from_pretrained(checkpoint) |
|
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint) |
|
|
|
|
|
def summarize_and_display(text, button_click_event, interface_state): |
|
summary = summarize_text(text, tokenizer, model) |
|
return summary |
|
|
|
|
|
iface = gr.Interface( |
|
fn=summarize_and_display, |
|
inputs=[ |
|
gr.Textbox(label="Enter text to summarize:"), |
|
gr.Label(label="Summarize"), |
|
gr.Button("Summarize"), |
|
], |
|
outputs=gr.Textbox(label="Summary:"), |
|
live=True, |
|
title="Text Summarizer with Button", |
|
) |
|
|
|
iface.launch(share=True) |
|
|