Spaces:

Sibinraj
/

dialogue_Text_Summarizer

Sleeping

Sibinraj commited on Jun 17, 2024

Commit

90c4eb5

verified ·

1 Parent(s): 6b2f148

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -20,14 +20,32 @@ def summarize_text(text, max_length, show_length):
     # Generate the summary
     summary_ids = model.generate(
         inputs,
-        max_length=max_length + 2,
-        min_length=max_length,
         num_beams=5,
     )
     # Decode the summary
     summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
     # If show_length is True, append the length of the summary
     if show_length:
         summary_length = len(summary.split())

     # Generate the summary
     summary_ids = model.generate(
         inputs,
+        max_length=max_length + 20,  # Allow some buffer
+        min_length=10,  # Set a reasonable minimum length
         num_beams=5,
+        no_repeat_ngram_size=2,
+        early_stopping=True
     )
     # Decode the summary
     summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
+    # Trim the summary to the desired length
+    summary_words = summary.split()
+    if len(summary_words) > max_length:
+        summary = ' '.join(summary_words[:max_length])
+    elif len(summary_words) < max_length:
+        additional_tokens = model.generate(
+            tokenizer.encode(" ".join(summary_words), return_tensors='pt'),
+            max_length=max_length - len(summary_words) + len(summary_words),
+            min_length=max_length - len(summary_words) + len(summary_words),
+            num_beams=5,
+            no_repeat_ngram_size=2,
+            early_stopping=True
+        )
+        additional_summary = tokenizer.decode(additional_tokens[0], skip_special_tokens=True)
+        summary += ' ' + ' '.join(additional_summary.split()[len(summary_words):max_length])
     # If show_length is True, append the length of the summary
     if show_length:
         summary_length = len(summary.split())