Sibinraj commited on
Commit
90c4eb5
·
verified ·
1 Parent(s): 6b2f148

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -2
app.py CHANGED
@@ -20,14 +20,32 @@ def summarize_text(text, max_length, show_length):
20
  # Generate the summary
21
  summary_ids = model.generate(
22
  inputs,
23
- max_length=max_length + 2,
24
- min_length=max_length,
25
  num_beams=5,
 
 
26
  )
27
 
28
  # Decode the summary
29
  summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  # If show_length is True, append the length of the summary
32
  if show_length:
33
  summary_length = len(summary.split())
 
20
  # Generate the summary
21
  summary_ids = model.generate(
22
  inputs,
23
+ max_length=max_length + 20, # Allow some buffer
24
+ min_length=10, # Set a reasonable minimum length
25
  num_beams=5,
26
+ no_repeat_ngram_size=2,
27
+ early_stopping=True
28
  )
29
 
30
  # Decode the summary
31
  summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
32
 
33
+ # Trim the summary to the desired length
34
+ summary_words = summary.split()
35
+ if len(summary_words) > max_length:
36
+ summary = ' '.join(summary_words[:max_length])
37
+ elif len(summary_words) < max_length:
38
+ additional_tokens = model.generate(
39
+ tokenizer.encode(" ".join(summary_words), return_tensors='pt'),
40
+ max_length=max_length - len(summary_words) + len(summary_words),
41
+ min_length=max_length - len(summary_words) + len(summary_words),
42
+ num_beams=5,
43
+ no_repeat_ngram_size=2,
44
+ early_stopping=True
45
+ )
46
+ additional_summary = tokenizer.decode(additional_tokens[0], skip_special_tokens=True)
47
+ summary += ' ' + ' '.join(additional_summary.split()[len(summary_words):max_length])
48
+
49
  # If show_length is True, append the length of the summary
50
  if show_length:
51
  summary_length = len(summary.split())