dtest / app.py
NoaiGPT's picture
asd
e5edf69
raw
history blame
1.81 kB
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import spaces
from sentence_splitter import SentenceSplitter
device = "cuda"
tokenizer = AutoTokenizer.from_pretrained("NoaiGPT/777")
model = AutoModelForSeq2SeqLM.from_pretrained("NoaiGPT/777").to(device)
# Initialize the sentence splitter
splitter = SentenceSplitter(language='en')
@spaces.GPU
def generate_title(text):
input_ids = tokenizer(f'paraphraser: {text}', return_tensors="pt", padding="longest", truncation=True, max_length=64).input_ids.to(device)
outputs = model.generate(
input_ids,
num_beams=8,
num_beam_groups=4,
num_return_sequences=6,
repetition_penalty=12.0,
diversity_penalty=4.0,
no_repeat_ngram_size=3,
temperature=1.1,
top_k=50,
top_p=0.95,
max_length=64
)
return tokenizer.batch_decode(outputs, skip_special_tokens=True)
def process_text(text):
paragraphs = text.split('\n\n')
results = []
for paragraph in paragraphs:
sentences = splitter.split(paragraph)
paragraph_results = []
for sentence in sentences:
titles = generate_title(sentence)
paragraph_results.append(f"Original: {sentence}\nParaphrases:\n" + "\n".join(titles))
results.append("\n\n".join(paragraph_results))
return "\n\n---\n\n".join(results)
def gradio_generate_title(text):
return process_text(text)
iface = gr.Interface(
fn=gradio_generate_title,
inputs=gr.Textbox(lines=10, label="Input Text"),
outputs=gr.Textbox(lines=20, label="Generated Paraphrases"),
title="Diverse Paraphrase Generator",
description="Generate multiple diverse paraphrases for each sentence in the input text using NoaiGPT/777 model."
)
iface.launch()