Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -20,14 +20,32 @@ def summarize_text(text, max_length, show_length):
|
|
20 |
# Generate the summary
|
21 |
summary_ids = model.generate(
|
22 |
inputs,
|
23 |
-
max_length=max_length +
|
24 |
-
min_length=
|
25 |
num_beams=5,
|
|
|
|
|
26 |
)
|
27 |
|
28 |
# Decode the summary
|
29 |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
# If show_length is True, append the length of the summary
|
32 |
if show_length:
|
33 |
summary_length = len(summary.split())
|
|
|
20 |
# Generate the summary
|
21 |
summary_ids = model.generate(
|
22 |
inputs,
|
23 |
+
max_length=max_length + 20, # Allow some buffer
|
24 |
+
min_length=10, # Set a reasonable minimum length
|
25 |
num_beams=5,
|
26 |
+
no_repeat_ngram_size=2,
|
27 |
+
early_stopping=True
|
28 |
)
|
29 |
|
30 |
# Decode the summary
|
31 |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
32 |
|
33 |
+
# Trim the summary to the desired length
|
34 |
+
summary_words = summary.split()
|
35 |
+
if len(summary_words) > max_length:
|
36 |
+
summary = ' '.join(summary_words[:max_length])
|
37 |
+
elif len(summary_words) < max_length:
|
38 |
+
additional_tokens = model.generate(
|
39 |
+
tokenizer.encode(" ".join(summary_words), return_tensors='pt'),
|
40 |
+
max_length=max_length - len(summary_words) + len(summary_words),
|
41 |
+
min_length=max_length - len(summary_words) + len(summary_words),
|
42 |
+
num_beams=5,
|
43 |
+
no_repeat_ngram_size=2,
|
44 |
+
early_stopping=True
|
45 |
+
)
|
46 |
+
additional_summary = tokenizer.decode(additional_tokens[0], skip_special_tokens=True)
|
47 |
+
summary += ' ' + ' '.join(additional_summary.split()[len(summary_words):max_length])
|
48 |
+
|
49 |
# If show_length is True, append the length of the summary
|
50 |
if show_length:
|
51 |
summary_length = len(summary.split())
|