import gradio as gr from transformers import AutoTokenizer, AutoModelForSeq2SeqLM from sentence_splitter import SentenceSplitter device = "cuda" tokenizer = AutoTokenizer.from_pretrained("NoaiGPT/777") model = AutoModelForSeq2SeqLM.from_pretrained("NoaiGPT/777").to(device) splitter = SentenceSplitter(language='en') def process_and_generate(text): def generate_title(sentence): input_ids = tokenizer(f'paraphraser: {sentence}', return_tensors="pt", padding="longest", truncation=True, max_length=64).input_ids.to(device) outputs = model.generate( input_ids, num_beams=8, num_beam_groups=4, num_return_sequences=6, repetition_penalty=12.0, diversity_penalty=4.0, no_repeat_ngram_size=3, temperature=1.1, top_k=50, top_p=0.95, max_length=64 ) return tokenizer.batch_decode(outputs, skip_special_tokens=True) paragraphs = text.split('\n\n') results = [] final_paragraphs = [] for paragraph in paragraphs: sentences = splitter.split(paragraph) paragraph_results = [] final_sentences = [] for sentence in sentences: titles = generate_title(sentence) paragraph_results.append(f"Original: {sentence}\nParaphrases:\n" + "\n".join(titles)) final_sentences.append(titles[0]) # Use the first paraphrase for the final paragraph results.append("\n\n".join(paragraph_results)) final_paragraphs.append(" ".join(final_sentences)) detailed_output = "\n\n---\n\n".join(results) final_text = "\n\n".join(final_paragraphs) return detailed_output, final_text iface = gr.Interface( fn=process_and_generate, inputs=gr.Textbox(lines=10, label="Input Text"), outputs=[ gr.Textbox(lines=20, label="Detailed Paraphrases"), gr.Textbox(lines=10, label="Final Paraphrased Text") ], title="Diverse Paraphrase Generator", description="Generate multiple diverse paraphrases for each sentence in the input text using NoaiGPT/777 model." ) iface.launch()