NoaiGPT commited on
Commit
5565ef8
1 Parent(s): e5edf69
Files changed (1) hide show
  1. app.py +34 -26
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
- import spaces
4
  from sentence_splitter import SentenceSplitter
5
 
6
  device = "cuda"
@@ -8,46 +7,55 @@ device = "cuda"
8
  tokenizer = AutoTokenizer.from_pretrained("NoaiGPT/777")
9
  model = AutoModelForSeq2SeqLM.from_pretrained("NoaiGPT/777").to(device)
10
 
11
- # Initialize the sentence splitter
12
  splitter = SentenceSplitter(language='en')
13
 
14
- @spaces.GPU
15
- def generate_title(text):
16
- input_ids = tokenizer(f'paraphraser: {text}', return_tensors="pt", padding="longest", truncation=True, max_length=64).input_ids.to(device)
17
- outputs = model.generate(
18
- input_ids,
19
- num_beams=8,
20
- num_beam_groups=4,
21
- num_return_sequences=6,
22
- repetition_penalty=12.0,
23
- diversity_penalty=4.0,
24
- no_repeat_ngram_size=3,
25
- temperature=1.1,
26
- top_k=50,
27
- top_p=0.95,
28
- max_length=64
29
- )
30
- return tokenizer.batch_decode(outputs, skip_special_tokens=True)
31
-
32
- def process_text(text):
33
  paragraphs = text.split('\n\n')
34
  results = []
 
 
35
  for paragraph in paragraphs:
36
  sentences = splitter.split(paragraph)
37
  paragraph_results = []
 
 
38
  for sentence in sentences:
39
  titles = generate_title(sentence)
40
  paragraph_results.append(f"Original: {sentence}\nParaphrases:\n" + "\n".join(titles))
 
 
41
  results.append("\n\n".join(paragraph_results))
42
- return "\n\n---\n\n".join(results)
 
 
 
43
 
44
- def gradio_generate_title(text):
45
- return process_text(text)
46
 
47
  iface = gr.Interface(
48
- fn=gradio_generate_title,
49
  inputs=gr.Textbox(lines=10, label="Input Text"),
50
- outputs=gr.Textbox(lines=20, label="Generated Paraphrases"),
 
 
 
51
  title="Diverse Paraphrase Generator",
52
  description="Generate multiple diverse paraphrases for each sentence in the input text using NoaiGPT/777 model."
53
  )
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
3
  from sentence_splitter import SentenceSplitter
4
 
5
  device = "cuda"
 
7
  tokenizer = AutoTokenizer.from_pretrained("NoaiGPT/777")
8
  model = AutoModelForSeq2SeqLM.from_pretrained("NoaiGPT/777").to(device)
9
 
 
10
  splitter = SentenceSplitter(language='en')
11
 
12
+ def process_and_generate(text):
13
+ def generate_title(sentence):
14
+ input_ids = tokenizer(f'paraphraser: {sentence}', return_tensors="pt", padding="longest", truncation=True, max_length=64).input_ids.to(device)
15
+ outputs = model.generate(
16
+ input_ids,
17
+ num_beams=8,
18
+ num_beam_groups=4,
19
+ num_return_sequences=6,
20
+ repetition_penalty=12.0,
21
+ diversity_penalty=4.0,
22
+ no_repeat_ngram_size=3,
23
+ temperature=1.1,
24
+ top_k=50,
25
+ top_p=0.95,
26
+ max_length=64
27
+ )
28
+ return tokenizer.batch_decode(outputs, skip_special_tokens=True)
29
+
 
30
  paragraphs = text.split('\n\n')
31
  results = []
32
+ final_paragraphs = []
33
+
34
  for paragraph in paragraphs:
35
  sentences = splitter.split(paragraph)
36
  paragraph_results = []
37
+ final_sentences = []
38
+
39
  for sentence in sentences:
40
  titles = generate_title(sentence)
41
  paragraph_results.append(f"Original: {sentence}\nParaphrases:\n" + "\n".join(titles))
42
+ final_sentences.append(titles[0]) # Use the first paraphrase for the final paragraph
43
+
44
  results.append("\n\n".join(paragraph_results))
45
+ final_paragraphs.append(" ".join(final_sentences))
46
+
47
+ detailed_output = "\n\n---\n\n".join(results)
48
+ final_text = "\n\n".join(final_paragraphs)
49
 
50
+ return detailed_output, final_text
 
51
 
52
  iface = gr.Interface(
53
+ fn=process_and_generate,
54
  inputs=gr.Textbox(lines=10, label="Input Text"),
55
+ outputs=[
56
+ gr.Textbox(lines=20, label="Detailed Paraphrases"),
57
+ gr.Textbox(lines=10, label="Final Paraphrased Text")
58
+ ],
59
  title="Diverse Paraphrase Generator",
60
  description="Generate multiple diverse paraphrases for each sentence in the input text using NoaiGPT/777 model."
61
  )