Spaces:

smfaiz
/

research-assistant

Sleeping

App Files Files Community

smfaiz commited on Sep 1, 2024

Commit

8fc80cf

verified ·

1 Parent(s): bba1cf7

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -7

app.py CHANGED Viewed

@@ -62,8 +62,8 @@ def extract_text_from_html(url):
     except Exception as e:
         return f"Error extracting text: {str(e)}"
-tokenizer_s = AutoTokenizer.from_pretrained("scieditor/extractive-summarization")
-model_s = AutoModelForSeq2SeqLM.from_pretrained("scieditor/extractive-summarization")
 def summarize_article(article_text):
     """Summarize a given article's text."""
@@ -71,7 +71,7 @@ def summarize_article(article_text):
         if not article_text or len(article_text.split()) < 20:
             return None, "Article content is too short to summarize."
         # Ensure the input text is not too long
-        inputs = tokenizer_s(
             article_text,
             return_tensors="pt",
             truncation=True,
@@ -80,7 +80,7 @@ def summarize_article(article_text):
         )
         # Generate the summary
-        summary_ids = model_s.generate(
             **inputs,
             max_new_tokens=800,  # Limit the length of the output
             min_length=100,      # Set a minimum length for the output
@@ -90,15 +90,15 @@ def summarize_article(article_text):
         )
         # Decode the output to get the summary
-        summary = tokenizer_s.decode(summary_ids[0], skip_special_tokens=True)
         return summary, None
     except Exception as e:
         return None, f"Exception during summarization: {str(e)}"
 # Load tokenizer and model
-tokenizer = AutoTokenizer.from_pretrained("scieditor/citation-generation-t5")
-model = AutoModelForSeq2SeqLM.from_pretrained("scieditor/citation-generation-t5")
 def generate_citation_t5(article_title, citation_style, article_link):
     """Generate a citation using the T5 or LED model."""

     except Exception as e:
         return f"Error extracting text: {str(e)}"
+tokenizer = AutoTokenizer.from_pretrained("Pegasus-X")
+model = AutoModelForSeq2SeqLM.from_pretrained("Pegasus-X")
 def summarize_article(article_text):
     """Summarize a given article's text."""
         if not article_text or len(article_text.split()) < 20:
             return None, "Article content is too short to summarize."
         # Ensure the input text is not too long
+        inputs = tokenizer(
             article_text,
             return_tensors="pt",
             truncation=True,
         )
         # Generate the summary
+        summary_ids = model.generate(
             **inputs,
             max_new_tokens=800,  # Limit the length of the output
             min_length=100,      # Set a minimum length for the output
         )
         # Decode the output to get the summary
+        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
         return summary, None
     except Exception as e:
         return None, f"Exception during summarization: {str(e)}"
 # Load tokenizer and model
+# tokenizer = AutoTokenizer.from_pretrained("scieditor/citation-generation-t5")
+# model = AutoModelForSeq2SeqLM.from_pretrained("scieditor/citation-generation-t5")
 def generate_citation_t5(article_title, citation_style, article_link):
     """Generate a citation using the T5 or LED model."""