Spaces:

smfaiz
/

research-assistant

Sleeping

smfaiz commited on Sep 1, 2024

Commit

a1397bb

verified ·

1 Parent(s): 9205504

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -62,8 +62,8 @@ def extract_text_from_html(url):
     except Exception as e:
         return f"Error extracting text: {str(e)}"
-tokenizer_s = AutoTokenizer.from_pretrained("pszemraj/long-t5-tglobal-base-16384-book-summary")
-model_s = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/long-t5-tglobal-base-16384-book-summary")
 def summarize_article(article_text):
     """Summarize a given article's text."""
@@ -82,10 +82,10 @@ def summarize_article(article_text):
         # Generate the summary
         summary_ids = model_s.generate(
             **inputs,
-            max_new_tokens=400,  # Limit the length of the output
             min_length=100,      # Set a minimum length for the output
             # #length_penalty='1.0',  # Adjust length penalty to encourage longer output
-            # no_repeat_ngram_size=3,    # Avoid repetition of phrases
             early_stopping=True
         )

     except Exception as e:
         return f"Error extracting text: {str(e)}"
+tokenizer_s = AutoTokenizer.from_pretrained("t5-small")
+model_s = AutoModelForSeq2SeqLM.from_pretrained("t5-small")
 def summarize_article(article_text):
     """Summarize a given article's text."""
         # Generate the summary
         summary_ids = model_s.generate(
             **inputs,
+            max_new_tokens=800,  # Limit the length of the output
             min_length=100,      # Set a minimum length for the output
             # #length_penalty='1.0',  # Adjust length penalty to encourage longer output
+            no_repeat_ngram_size=3,    # Avoid repetition of phrases
             early_stopping=True
         )