smfaiz commited on
Commit
8fc80cf
·
verified ·
1 Parent(s): bba1cf7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -62,8 +62,8 @@ def extract_text_from_html(url):
62
  except Exception as e:
63
  return f"Error extracting text: {str(e)}"
64
 
65
- tokenizer_s = AutoTokenizer.from_pretrained("scieditor/extractive-summarization")
66
- model_s = AutoModelForSeq2SeqLM.from_pretrained("scieditor/extractive-summarization")
67
 
68
  def summarize_article(article_text):
69
  """Summarize a given article's text."""
@@ -71,7 +71,7 @@ def summarize_article(article_text):
71
  if not article_text or len(article_text.split()) < 20:
72
  return None, "Article content is too short to summarize."
73
  # Ensure the input text is not too long
74
- inputs = tokenizer_s(
75
  article_text,
76
  return_tensors="pt",
77
  truncation=True,
@@ -80,7 +80,7 @@ def summarize_article(article_text):
80
  )
81
 
82
  # Generate the summary
83
- summary_ids = model_s.generate(
84
  **inputs,
85
  max_new_tokens=800, # Limit the length of the output
86
  min_length=100, # Set a minimum length for the output
@@ -90,15 +90,15 @@ def summarize_article(article_text):
90
  )
91
 
92
  # Decode the output to get the summary
93
- summary = tokenizer_s.decode(summary_ids[0], skip_special_tokens=True)
94
 
95
  return summary, None
96
  except Exception as e:
97
  return None, f"Exception during summarization: {str(e)}"
98
 
99
  # Load tokenizer and model
100
- tokenizer = AutoTokenizer.from_pretrained("scieditor/citation-generation-t5")
101
- model = AutoModelForSeq2SeqLM.from_pretrained("scieditor/citation-generation-t5")
102
 
103
  def generate_citation_t5(article_title, citation_style, article_link):
104
  """Generate a citation using the T5 or LED model."""
 
62
  except Exception as e:
63
  return f"Error extracting text: {str(e)}"
64
 
65
+ tokenizer = AutoTokenizer.from_pretrained("Pegasus-X")
66
+ model = AutoModelForSeq2SeqLM.from_pretrained("Pegasus-X")
67
 
68
  def summarize_article(article_text):
69
  """Summarize a given article's text."""
 
71
  if not article_text or len(article_text.split()) < 20:
72
  return None, "Article content is too short to summarize."
73
  # Ensure the input text is not too long
74
+ inputs = tokenizer(
75
  article_text,
76
  return_tensors="pt",
77
  truncation=True,
 
80
  )
81
 
82
  # Generate the summary
83
+ summary_ids = model.generate(
84
  **inputs,
85
  max_new_tokens=800, # Limit the length of the output
86
  min_length=100, # Set a minimum length for the output
 
90
  )
91
 
92
  # Decode the output to get the summary
93
+ summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
94
 
95
  return summary, None
96
  except Exception as e:
97
  return None, f"Exception during summarization: {str(e)}"
98
 
99
  # Load tokenizer and model
100
+ # tokenizer = AutoTokenizer.from_pretrained("scieditor/citation-generation-t5")
101
+ # model = AutoModelForSeq2SeqLM.from_pretrained("scieditor/citation-generation-t5")
102
 
103
  def generate_citation_t5(article_title, citation_style, article_link):
104
  """Generate a citation using the T5 or LED model."""