NoaiGPT commited on
Commit
e5edf69
1 Parent(s): b542a68
Files changed (1) hide show
  1. app.py +29 -14
app.py CHANGED
@@ -1,40 +1,55 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
  import spaces
 
4
 
5
  device = "cuda"
6
 
7
  tokenizer = AutoTokenizer.from_pretrained("NoaiGPT/777")
8
  model = AutoModelForSeq2SeqLM.from_pretrained("NoaiGPT/777").to(device)
9
 
 
 
 
10
  @spaces.GPU
11
  def generate_title(text):
12
  input_ids = tokenizer(f'paraphraser: {text}', return_tensors="pt", padding="longest", truncation=True, max_length=64).input_ids.to(device)
13
  outputs = model.generate(
14
  input_ids,
15
- num_beams=8, # Increased from 4
16
  num_beam_groups=4,
17
- num_return_sequences=6, # Increased from 4
18
- repetition_penalty=12.0, # Increased from 10.0
19
- diversity_penalty=4.0, # Increased from 3.0
20
- no_repeat_ngram_size=3, # Increased from 2
21
- temperature=1.1, # Increased from 0.9
22
- top_k=50, # Added top_k sampling
23
- top_p=0.95, # Added top_p sampling
24
  max_length=64
25
  )
26
  return tokenizer.batch_decode(outputs, skip_special_tokens=True)
27
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  def gradio_generate_title(text):
29
- titles = generate_title(text)
30
- return "\n\n".join(titles)
31
 
32
  iface = gr.Interface(
33
  fn=gradio_generate_title,
34
- inputs=gr.Textbox(lines=5, label="Input Text"),
35
- outputs=gr.Textbox(lines=10, label="Generated Titles"),
36
- title="Diverse Title Generator",
37
- description="Generate multiple diverse paraphrased titles from input text using NoaiGPT/777 model."
38
  )
39
 
40
  iface.launch()
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
  import spaces
4
+ from sentence_splitter import SentenceSplitter
5
 
6
  device = "cuda"
7
 
8
  tokenizer = AutoTokenizer.from_pretrained("NoaiGPT/777")
9
  model = AutoModelForSeq2SeqLM.from_pretrained("NoaiGPT/777").to(device)
10
 
11
+ # Initialize the sentence splitter
12
+ splitter = SentenceSplitter(language='en')
13
+
14
  @spaces.GPU
15
  def generate_title(text):
16
  input_ids = tokenizer(f'paraphraser: {text}', return_tensors="pt", padding="longest", truncation=True, max_length=64).input_ids.to(device)
17
  outputs = model.generate(
18
  input_ids,
19
+ num_beams=8,
20
  num_beam_groups=4,
21
+ num_return_sequences=6,
22
+ repetition_penalty=12.0,
23
+ diversity_penalty=4.0,
24
+ no_repeat_ngram_size=3,
25
+ temperature=1.1,
26
+ top_k=50,
27
+ top_p=0.95,
28
  max_length=64
29
  )
30
  return tokenizer.batch_decode(outputs, skip_special_tokens=True)
31
 
32
+ def process_text(text):
33
+ paragraphs = text.split('\n\n')
34
+ results = []
35
+ for paragraph in paragraphs:
36
+ sentences = splitter.split(paragraph)
37
+ paragraph_results = []
38
+ for sentence in sentences:
39
+ titles = generate_title(sentence)
40
+ paragraph_results.append(f"Original: {sentence}\nParaphrases:\n" + "\n".join(titles))
41
+ results.append("\n\n".join(paragraph_results))
42
+ return "\n\n---\n\n".join(results)
43
+
44
  def gradio_generate_title(text):
45
+ return process_text(text)
 
46
 
47
  iface = gr.Interface(
48
  fn=gradio_generate_title,
49
+ inputs=gr.Textbox(lines=10, label="Input Text"),
50
+ outputs=gr.Textbox(lines=20, label="Generated Paraphrases"),
51
+ title="Diverse Paraphrase Generator",
52
+ description="Generate multiple diverse paraphrases for each sentence in the input text using NoaiGPT/777 model."
53
  )
54
 
55
  iface.launch()