|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
from sentence_splitter import SentenceSplitter |
|
import itertools |
|
import spaces |
|
device = "cuda" |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("NoaiGPT/777") |
|
model = AutoModelForSeq2SeqLM.from_pretrained("NoaiGPT/777").to(device) |
|
|
|
splitter = SentenceSplitter(language='en') |
|
|
|
@spaces.GPU |
|
def process_and_generate(text): |
|
def generate_title(sentence): |
|
input_ids = tokenizer(f'paraphraser: {sentence}', return_tensors="pt", padding="longest", truncation=True, max_length=64).input_ids.to(device) |
|
outputs = model.generate( |
|
input_ids, |
|
num_beams=8, |
|
num_beam_groups=4, |
|
num_return_sequences=6, |
|
repetition_penalty=12.0, |
|
diversity_penalty=4.0, |
|
no_repeat_ngram_size=3, |
|
temperature=1.1, |
|
top_k=50, |
|
top_p=0.95, |
|
max_length=64 |
|
) |
|
return tokenizer.batch_decode(outputs, skip_special_tokens=True) |
|
|
|
paragraphs = text.split('\n\n') |
|
detailed_results = [] |
|
all_paraphrases = [] |
|
|
|
for paragraph in paragraphs: |
|
sentences = splitter.split(paragraph) |
|
paragraph_results = [] |
|
paragraph_paraphrases = [] |
|
|
|
for sentence in sentences: |
|
titles = generate_title(sentence) |
|
paragraph_results.append(f"Original: {sentence}\nParaphrases:\n" + "\n".join(titles)) |
|
paragraph_paraphrases.append(titles) |
|
|
|
detailed_results.append("\n\n".join(paragraph_results)) |
|
all_paraphrases.append(paragraph_paraphrases) |
|
|
|
detailed_output = "\n\n---\n\n".join(detailed_results) |
|
|
|
|
|
all_paragraph_combinations = [] |
|
for paragraph_paraphrases in all_paraphrases: |
|
paragraph_combinations = list(itertools.product(*paragraph_paraphrases)) |
|
all_paragraph_combinations.append([" ".join(combo) for combo in paragraph_combinations]) |
|
|
|
|
|
all_text_variations = ["\n\n".join(variation) for variation in itertools.product(*all_paragraph_combinations)] |
|
|
|
|
|
max_variations = 100 |
|
all_text_variations = all_text_variations[:max_variations] |
|
|
|
final_output = "All Paraphrase Combinations:\n\n" + "\n\n---\n\n".join(all_text_variations) |
|
|
|
return detailed_output, final_output |
|
|
|
iface = gr.Interface( |
|
fn=process_and_generate, |
|
inputs=gr.Textbox(lines=10, label="Input Text"), |
|
outputs=[ |
|
gr.Textbox(lines=20, label="Detailed Paraphrases"), |
|
gr.Textbox(lines=20, label="All Paraphrase Combinations") |
|
], |
|
title="Diverse Paraphrase Generator", |
|
description="Generate multiple diverse paraphrases for each sentence in the input text using NoaiGPT/777 model." |
|
) |
|
|
|
iface.launch() |