dtest / app.py
NoaiGPT's picture
asd
2663a06
raw
history blame
2.18 kB
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sentence_splitter import SentenceSplitter
import spaces
device = "cuda"
tokenizer = AutoTokenizer.from_pretrained("NoaiGPT/777")
model = AutoModelForSeq2SeqLM.from_pretrained("NoaiGPT/777").to(device)
splitter = SentenceSplitter(language='en')
@spaces.GPU
def process_and_generate(text):
def generate_title(sentence):
input_ids = tokenizer(f'paraphraser: {sentence}', return_tensors="pt", padding="longest", truncation=True, max_length=64).input_ids.to(device)
outputs = model.generate(
input_ids,
num_beams=8,
num_beam_groups=4,
num_return_sequences=6,
repetition_penalty=12.0,
diversity_penalty=4.0,
no_repeat_ngram_size=3,
temperature=1.1,
top_k=50,
top_p=0.95,
max_length=64
)
return tokenizer.batch_decode(outputs, skip_special_tokens=True)
paragraphs = text.split('\n\n')
results = []
final_paragraphs = []
for paragraph in paragraphs:
sentences = splitter.split(paragraph)
paragraph_results = []
final_sentences = []
for sentence in sentences:
titles = generate_title(sentence)
paragraph_results.append(f"Original: {sentence}\nParaphrases:\n" + "\n".join(titles))
final_sentences.append(titles[0]) # Use the first paraphrase for the final paragraph
results.append("\n\n".join(paragraph_results))
final_paragraphs.append(" ".join(final_sentences))
detailed_output = "\n\n---\n\n".join(results)
final_text = "\n\n".join(final_paragraphs)
return detailed_output, final_text
iface = gr.Interface(
fn=process_and_generate,
inputs=gr.Textbox(lines=10, label="Input Text"),
outputs=[
gr.Textbox(lines=20, label="Detailed Paraphrases"),
gr.Textbox(lines=10, label="Final Paraphrased Text")
],
title="Diverse Paraphrase Generator",
description="Generate multiple diverse paraphrases for each sentence in the input text using NoaiGPT/777 model."
)
iface.launch()