import gradio as gr from transformers import AutoTokenizer, AutoModelForSeq2SeqLM from sentence_splitter import SentenceSplitter import itertools import spaces device = "cuda" tokenizer = AutoTokenizer.from_pretrained("NoaiGPT/777") model = AutoModelForSeq2SeqLM.from_pretrained("NoaiGPT/777").to(device) splitter = SentenceSplitter(language='en') @spaces.GPU def process_and_generate(text): def generate_title(sentence): input_ids = tokenizer(f'paraphraser: {sentence}', return_tensors="pt", padding="longest", truncation=True, max_length=64).input_ids.to(device) outputs = model.generate( input_ids, num_beams=8, num_beam_groups=4, num_return_sequences=6, repetition_penalty=12.0, diversity_penalty=4.0, no_repeat_ngram_size=3, temperature=1.1, top_k=50, top_p=0.95, max_length=64 ) return tokenizer.batch_decode(outputs, skip_special_tokens=True) paragraphs = text.split('\n\n') detailed_results = [] all_paraphrases = [] for paragraph in paragraphs: sentences = splitter.split(paragraph) paragraph_results = [] paragraph_paraphrases = [] for sentence in sentences: titles = generate_title(sentence) paragraph_results.append(f"Original: {sentence}\nParaphrases:\n" + "\n".join(titles)) paragraph_paraphrases.append(titles) detailed_results.append("\n\n".join(paragraph_results)) all_paraphrases.append(paragraph_paraphrases) detailed_output = "\n\n---\n\n".join(detailed_results) # Generate all possible combinations of paraphrases for each paragraph all_paragraph_combinations = [] for paragraph_paraphrases in all_paraphrases: paragraph_combinations = list(itertools.product(*paragraph_paraphrases)) all_paragraph_combinations.append([" ".join(combo) for combo in paragraph_combinations]) # Join the paragraphs to create full text variations all_text_variations = ["\n\n".join(variation) for variation in itertools.product(*all_paragraph_combinations)] # Limit the number of variations to prevent overwhelming output max_variations = 100 all_text_variations = all_text_variations[:max_variations] final_output = "All Paraphrase Combinations:\n\n" + "\n\n---\n\n".join(all_text_variations) return detailed_output, final_output iface = gr.Interface( fn=process_and_generate, inputs=gr.Textbox(lines=10, label="Input Text"), outputs=[ gr.Textbox(lines=20, label="Detailed Paraphrases"), gr.Textbox(lines=20, label="All Paraphrase Combinations") ], title="Diverse Paraphrase Generator", description="Generate multiple diverse paraphrases for each sentence in the input text using NoaiGPT/777 model." ) iface.launch()