File size: 2,900 Bytes
f8f1745
51ef7b1
e5edf69
d9aa266
2663a06
51ef7b1
f8f1745
51ef7b1
 
 
e5edf69
 
2663a06
5565ef8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e5edf69
d9aa266
 
5565ef8
e5edf69
 
 
d9aa266
5565ef8
e5edf69
 
 
d9aa266
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5565ef8
d9aa266
5203ba4
d9aa266
5565ef8
d9aa266
e5edf69
d9aa266
51ef7b1
 
5565ef8
e5edf69
5565ef8
 
d9aa266
5565ef8
e5edf69
 
51ef7b1
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sentence_splitter import SentenceSplitter
import itertools
import spaces
device = "cuda"

tokenizer = AutoTokenizer.from_pretrained("NoaiGPT/777")
model = AutoModelForSeq2SeqLM.from_pretrained("NoaiGPT/777").to(device)

splitter = SentenceSplitter(language='en')

@spaces.GPU
def process_and_generate(text):
    def generate_title(sentence):
        input_ids = tokenizer(f'paraphraser: {sentence}', return_tensors="pt", padding="longest", truncation=True, max_length=64).input_ids.to(device)
        outputs = model.generate(
            input_ids,
            num_beams=8,
            num_beam_groups=4,
            num_return_sequences=6,
            repetition_penalty=12.0,
            diversity_penalty=4.0,
            no_repeat_ngram_size=3,
            temperature=1.1,
            top_k=50,
            top_p=0.95,
            max_length=64
        )
        return tokenizer.batch_decode(outputs, skip_special_tokens=True)

    paragraphs = text.split('\n\n')
    detailed_results = []
    all_paraphrases = []

    for paragraph in paragraphs:
        sentences = splitter.split(paragraph)
        paragraph_results = []
        paragraph_paraphrases = []

        for sentence in sentences:
            titles = generate_title(sentence)
            paragraph_results.append(f"Original: {sentence}\nParaphrases:\n" + "\n".join(titles))
            paragraph_paraphrases.append(titles)

        detailed_results.append("\n\n".join(paragraph_results))
        all_paraphrases.append(paragraph_paraphrases)

    detailed_output = "\n\n---\n\n".join(detailed_results)

    # Generate all possible combinations of paraphrases for each paragraph
    all_paragraph_combinations = []
    for paragraph_paraphrases in all_paraphrases:
        paragraph_combinations = list(itertools.product(*paragraph_paraphrases))
        all_paragraph_combinations.append([" ".join(combo) for combo in paragraph_combinations])

    # Join the paragraphs to create full text variations
    all_text_variations = ["\n\n".join(variation) for variation in itertools.product(*all_paragraph_combinations)]

    # Limit the number of variations to prevent overwhelming output
    max_variations = 100
    all_text_variations = all_text_variations[:max_variations]

    final_output = "All Paraphrase Combinations:\n\n" + "\n\n---\n\n".join(all_text_variations)

    return detailed_output, final_output

iface = gr.Interface(
    fn=process_and_generate,
    inputs=gr.Textbox(lines=10, label="Input Text"),
    outputs=[
        gr.Textbox(lines=20, label="Detailed Paraphrases"),
        gr.Textbox(lines=20, label="All Paraphrase Combinations")
    ],
    title="Diverse Paraphrase Generator",
    description="Generate multiple diverse paraphrases for each sentence in the input text using NoaiGPT/777 model."
)

iface.launch()