File size: 3,391 Bytes
784e681 5ca455e 784e681 f521d1e 784e681 f521d1e 784e681 f521d1e 784e681 f521d1e 784e681 f521d1e 7d64f09 f521d1e 784e681 2595b3e f521d1e 2595b3e 3a8d738 f521d1e 784e681 f521d1e 784e681 f521d1e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
"""import gradio as gr
import nltk
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
nltk.download('punkt')
def fragment_text(text, tokenizer):
sentences = nltk.tokenize.sent_tokenize(text)
max_len = tokenizer.max_len_single_sentence
chunks = []
chunk = ""
count = -1
for sentence in sentences:
count += 1
combined_length = len(tokenizer.tokenize(sentence)) + len(chunk)
if combined_length <= max_len:
chunk += sentence + " "
else:
chunks.append(chunk.strip())
chunk = sentence + " "
if chunk != "":
chunks.append(chunk.strip())
return chunks
def summarize_text(text, tokenizer, model):
chunks = fragment_text(text, tokenizer)
summaries = []
for chunk in chunks:
input = tokenizer(chunk, return_tensors='pt')
output = model.generate(**input)
summary = tokenizer.decode(*output, skip_special_tokens=True)
summaries.append(summary)
final_summary = " ".join(summaries)
return final_summary
checkpoint = "tclopess/bart_samsum"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
def summarize_and_display(text):
summary = summarize_text(text, tokenizer, model)
return summary
iface = gr.Interface(
fn=summarize_and_display,
inputs=gr.Textbox(label="Enter text to summarize:"),
outputs=gr.Textbox(label="Summary:"),
live=False, # Set live to False to add a button
button="Summarize", # Add a button with the label "Summarize"
title="Text Summarizer with Button",
)
iface.launch(share=True)
"""
import gradio as gr
import nltk
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
nltk.download('punkt')
def fragment_text(text, tokenizer):
sentences = nltk.tokenize.sent_tokenize(text)
max_len = tokenizer.max_len_single_sentence
chunks = []
chunk = ""
count = -1
for sentence in sentences:
count += 1
combined_length = len(tokenizer.tokenize(sentence)) + len(chunk)
if combined_length <= max_len:
chunk += sentence + " "
else:
chunks.append(chunk.strip())
chunk = sentence + " "
if chunk != "":
chunks.append(chunk.strip())
return chunks
def summarize_text(text, tokenizer, model):
chunks = fragment_text(text, tokenizer)
summaries = []
for chunk in chunks:
input = tokenizer(chunk, return_tensors='pt')
output = model.generate(**input)
summary = tokenizer.decode(*output, skip_special_tokens=True)
summaries.append(summary)
final_summary = " ".join(summaries)
return final_summary
checkpoint = "tclopess/bart_samsum"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
def summarize_and_display(text, button_click_event, interface_state):
summary = summarize_text(text, tokenizer, model)
return summary
iface = gr.Interface(
fn=summarize_and_display,
inputs=[
gr.Textbox(label="Enter text to summarize:"),
gr.Label(label="Summarize"),
gr.Button("Summarize"), # Provide the label directly here
],
outputs=gr.Textbox(label="Summary:"),
live=True,
title="Text Summarizer with Button",
)
iface.launch(share=True)
|