|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
import spaces |
|
from sentence_splitter import SentenceSplitter |
|
|
|
device = "cuda" |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("NoaiGPT/777") |
|
model = AutoModelForSeq2SeqLM.from_pretrained("NoaiGPT/777").to(device) |
|
|
|
|
|
splitter = SentenceSplitter(language='en') |
|
|
|
@spaces.GPU |
|
def generate_title(text): |
|
input_ids = tokenizer(f'paraphraser: {text}', return_tensors="pt", padding="longest", truncation=True, max_length=64).input_ids.to(device) |
|
outputs = model.generate( |
|
input_ids, |
|
num_beams=8, |
|
num_beam_groups=4, |
|
num_return_sequences=6, |
|
repetition_penalty=12.0, |
|
diversity_penalty=4.0, |
|
no_repeat_ngram_size=3, |
|
temperature=1.1, |
|
top_k=50, |
|
top_p=0.95, |
|
max_length=64 |
|
) |
|
return tokenizer.batch_decode(outputs, skip_special_tokens=True) |
|
|
|
def process_text(text): |
|
paragraphs = text.split('\n\n') |
|
results = [] |
|
for paragraph in paragraphs: |
|
sentences = splitter.split(paragraph) |
|
paragraph_results = [] |
|
for sentence in sentences: |
|
titles = generate_title(sentence) |
|
paragraph_results.append(f"Original: {sentence}\nParaphrases:\n" + "\n".join(titles)) |
|
results.append("\n\n".join(paragraph_results)) |
|
return "\n\n---\n\n".join(results) |
|
|
|
def gradio_generate_title(text): |
|
return process_text(text) |
|
|
|
iface = gr.Interface( |
|
fn=gradio_generate_title, |
|
inputs=gr.Textbox(lines=10, label="Input Text"), |
|
outputs=gr.Textbox(lines=20, label="Generated Paraphrases"), |
|
title="Diverse Paraphrase Generator", |
|
description="Generate multiple diverse paraphrases for each sentence in the input text using NoaiGPT/777 model." |
|
) |
|
|
|
iface.launch() |