|
|
|
import gradio as gr |
|
import transformers |
|
from transformers import pipeline |
|
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer |
|
import torch |
|
|
|
|
|
model_name = "snrspeaks/t5-one-line-summary" |
|
model = AutoModelForSeq2SeqLM.from_pretrained(model_name) |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
|
|
summarizer = pipeline( |
|
"summarization", |
|
"pszemraj/long-t5-tglobal-base-16384-book-summary", |
|
device=0 if torch.cuda.is_available() else -1, |
|
) |
|
|
|
params = { |
|
"max_length": 256, |
|
"min_length": 8, |
|
"no_repeat_ngram_size": 3, |
|
"early_stopping": True, |
|
"repetition_penalty": 3.5, |
|
"length_penalty": 0.3, |
|
"encoder_no_repeat_ngram_size": 3, |
|
"num_beams": 4, |
|
} |
|
|
|
|
|
|
|
def summarize(text): |
|
input_ids = tokenizer.encode("summarize: " + text, return_tensors="pt", add_special_tokens=True) |
|
generated_id = model.generate(input_ids=input_ids,num_beams=5,max_length=50,repetition_penalty=2.5,length_penalty=1,early_stopping=True,num_return_sequences=1) |
|
pred = tokenizer.decode(generated_id[0], skip_special_tokens=True, clean_up_tokenization_spaces=True) |
|
|
|
result = summarizer(text, **params) |
|
pred2 = result[0]['summary_text'] |
|
|
|
output = pred + "\n\n" + pred2 |
|
return output |
|
|
|
|
|
with gr.Blocks() as demo: |
|
text = gr.Textbox(label="Text", lines=10, placeholder="Enter text here") |
|
t1 = gr.Textbox(label="Output") |
|
btn = gr.Button("Summarise") |
|
btn.click(fn=summarize, inputs=text, outputs=t1) |
|
|
|
demo.launch() |