|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
from sentence_splitter import SentenceSplitter |
|
import spaces |
|
|
|
device = "cuda" |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("NoaiGPT/777") |
|
model = AutoModelForSeq2SeqLM.from_pretrained("NoaiGPT/777").to(device) |
|
|
|
splitter = SentenceSplitter(language='en') |
|
|
|
@spaces.GPU |
|
def process_and_generate(text): |
|
def generate_title(sentence): |
|
input_ids = tokenizer(f'paraphraser: {sentence}', return_tensors="pt", padding="longest", truncation=True, max_length=64).input_ids.to(device) |
|
outputs = model.generate( |
|
input_ids, |
|
num_beams=8, |
|
num_beam_groups=4, |
|
num_return_sequences=6, |
|
repetition_penalty=12.0, |
|
diversity_penalty=4.0, |
|
no_repeat_ngram_size=3, |
|
temperature=1.1, |
|
top_k=50, |
|
top_p=0.95, |
|
max_length=64 |
|
) |
|
return tokenizer.batch_decode(outputs, skip_special_tokens=True) |
|
|
|
paragraphs = text.split('\n\n') |
|
results = [] |
|
final_paragraphs = [] |
|
|
|
for paragraph in paragraphs: |
|
sentences = splitter.split(paragraph) |
|
paragraph_results = [] |
|
final_sentences = [] |
|
|
|
for sentence in sentences: |
|
titles = generate_title(sentence) |
|
paragraph_results.append(f"Original: {sentence}\nParaphrases:\n" + "\n".join(titles)) |
|
final_sentences.append(titles[0]) |
|
|
|
results.append("\n\n".join(paragraph_results)) |
|
final_paragraphs.append(" ".join(final_sentences)) |
|
|
|
detailed_output = "\n\n---\n\n".join(results) |
|
final_text = "\n\n".join(final_paragraphs) |
|
|
|
return detailed_output, final_text |
|
|
|
iface = gr.Interface( |
|
fn=process_and_generate, |
|
inputs=gr.Textbox(lines=10, label="Input Text"), |
|
outputs=[ |
|
gr.Textbox(lines=20, label="Detailed Paraphrases"), |
|
gr.Textbox(lines=10, label="Final Paraphrased Text") |
|
], |
|
title="Diverse Paraphrase Generator", |
|
description="Generate multiple diverse paraphrases for each sentence in the input text using NoaiGPT/777 model." |
|
) |
|
|
|
iface.launch() |