dtest / app.py
NoaiGPT's picture
asd
5203ba4
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sentence_splitter import SentenceSplitter
import itertools
import spaces
device = "cuda"
tokenizer = AutoTokenizer.from_pretrained("NoaiGPT/777")
model = AutoModelForSeq2SeqLM.from_pretrained("NoaiGPT/777").to(device)
splitter = SentenceSplitter(language='en')
@spaces.GPU
def process_and_generate(text):
def generate_title(sentence):
input_ids = tokenizer(f'paraphraser: {sentence}', return_tensors="pt", padding="longest", truncation=True, max_length=64).input_ids.to(device)
outputs = model.generate(
input_ids,
num_beams=8,
num_beam_groups=4,
num_return_sequences=6,
repetition_penalty=12.0,
diversity_penalty=4.0,
no_repeat_ngram_size=3,
temperature=1.1,
top_k=50,
top_p=0.95,
max_length=64
)
return tokenizer.batch_decode(outputs, skip_special_tokens=True)
paragraphs = text.split('\n\n')
detailed_results = []
all_paraphrases = []
for paragraph in paragraphs:
sentences = splitter.split(paragraph)
paragraph_results = []
paragraph_paraphrases = []
for sentence in sentences:
titles = generate_title(sentence)
paragraph_results.append(f"Original: {sentence}\nParaphrases:\n" + "\n".join(titles))
paragraph_paraphrases.append(titles)
detailed_results.append("\n\n".join(paragraph_results))
all_paraphrases.append(paragraph_paraphrases)
detailed_output = "\n\n---\n\n".join(detailed_results)
# Generate all possible combinations of paraphrases for each paragraph
all_paragraph_combinations = []
for paragraph_paraphrases in all_paraphrases:
paragraph_combinations = list(itertools.product(*paragraph_paraphrases))
all_paragraph_combinations.append([" ".join(combo) for combo in paragraph_combinations])
# Join the paragraphs to create full text variations
all_text_variations = ["\n\n".join(variation) for variation in itertools.product(*all_paragraph_combinations)]
# Limit the number of variations to prevent overwhelming output
max_variations = 100
all_text_variations = all_text_variations[:max_variations]
final_output = "All Paraphrase Combinations:\n\n" + "\n\n---\n\n".join(all_text_variations)
return detailed_output, final_output
iface = gr.Interface(
fn=process_and_generate,
inputs=gr.Textbox(lines=10, label="Input Text"),
outputs=[
gr.Textbox(lines=20, label="Detailed Paraphrases"),
gr.Textbox(lines=20, label="All Paraphrase Combinations")
],
title="Diverse Paraphrase Generator",
description="Generate multiple diverse paraphrases for each sentence in the input text using NoaiGPT/777 model."
)
iface.launch()