File size: 2,900 Bytes
f8f1745 51ef7b1 e5edf69 d9aa266 2663a06 51ef7b1 f8f1745 51ef7b1 e5edf69 2663a06 5565ef8 e5edf69 d9aa266 5565ef8 e5edf69 d9aa266 5565ef8 e5edf69 d9aa266 5565ef8 d9aa266 5203ba4 d9aa266 5565ef8 d9aa266 e5edf69 d9aa266 51ef7b1 5565ef8 e5edf69 5565ef8 d9aa266 5565ef8 e5edf69 51ef7b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sentence_splitter import SentenceSplitter
import itertools
import spaces
device = "cuda"
tokenizer = AutoTokenizer.from_pretrained("NoaiGPT/777")
model = AutoModelForSeq2SeqLM.from_pretrained("NoaiGPT/777").to(device)
splitter = SentenceSplitter(language='en')
@spaces.GPU
def process_and_generate(text):
def generate_title(sentence):
input_ids = tokenizer(f'paraphraser: {sentence}', return_tensors="pt", padding="longest", truncation=True, max_length=64).input_ids.to(device)
outputs = model.generate(
input_ids,
num_beams=8,
num_beam_groups=4,
num_return_sequences=6,
repetition_penalty=12.0,
diversity_penalty=4.0,
no_repeat_ngram_size=3,
temperature=1.1,
top_k=50,
top_p=0.95,
max_length=64
)
return tokenizer.batch_decode(outputs, skip_special_tokens=True)
paragraphs = text.split('\n\n')
detailed_results = []
all_paraphrases = []
for paragraph in paragraphs:
sentences = splitter.split(paragraph)
paragraph_results = []
paragraph_paraphrases = []
for sentence in sentences:
titles = generate_title(sentence)
paragraph_results.append(f"Original: {sentence}\nParaphrases:\n" + "\n".join(titles))
paragraph_paraphrases.append(titles)
detailed_results.append("\n\n".join(paragraph_results))
all_paraphrases.append(paragraph_paraphrases)
detailed_output = "\n\n---\n\n".join(detailed_results)
# Generate all possible combinations of paraphrases for each paragraph
all_paragraph_combinations = []
for paragraph_paraphrases in all_paraphrases:
paragraph_combinations = list(itertools.product(*paragraph_paraphrases))
all_paragraph_combinations.append([" ".join(combo) for combo in paragraph_combinations])
# Join the paragraphs to create full text variations
all_text_variations = ["\n\n".join(variation) for variation in itertools.product(*all_paragraph_combinations)]
# Limit the number of variations to prevent overwhelming output
max_variations = 100
all_text_variations = all_text_variations[:max_variations]
final_output = "All Paraphrase Combinations:\n\n" + "\n\n---\n\n".join(all_text_variations)
return detailed_output, final_output
iface = gr.Interface(
fn=process_and_generate,
inputs=gr.Textbox(lines=10, label="Input Text"),
outputs=[
gr.Textbox(lines=20, label="Detailed Paraphrases"),
gr.Textbox(lines=20, label="All Paraphrase Combinations")
],
title="Diverse Paraphrase Generator",
description="Generate multiple diverse paraphrases for each sentence in the input text using NoaiGPT/777 model."
)
iface.launch() |