Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -62,8 +62,8 @@ def extract_text_from_html(url):
|
|
62 |
except Exception as e:
|
63 |
return f"Error extracting text: {str(e)}"
|
64 |
|
65 |
-
|
66 |
-
|
67 |
|
68 |
def summarize_article(article_text):
|
69 |
"""Summarize a given article's text."""
|
@@ -71,7 +71,7 @@ def summarize_article(article_text):
|
|
71 |
if not article_text or len(article_text.split()) < 20:
|
72 |
return None, "Article content is too short to summarize."
|
73 |
# Ensure the input text is not too long
|
74 |
-
inputs =
|
75 |
article_text,
|
76 |
return_tensors="pt",
|
77 |
truncation=True,
|
@@ -80,7 +80,7 @@ def summarize_article(article_text):
|
|
80 |
)
|
81 |
|
82 |
# Generate the summary
|
83 |
-
summary_ids =
|
84 |
**inputs,
|
85 |
max_new_tokens=800, # Limit the length of the output
|
86 |
min_length=100, # Set a minimum length for the output
|
@@ -90,15 +90,15 @@ def summarize_article(article_text):
|
|
90 |
)
|
91 |
|
92 |
# Decode the output to get the summary
|
93 |
-
summary =
|
94 |
|
95 |
return summary, None
|
96 |
except Exception as e:
|
97 |
return None, f"Exception during summarization: {str(e)}"
|
98 |
|
99 |
# Load tokenizer and model
|
100 |
-
tokenizer = AutoTokenizer.from_pretrained("scieditor/citation-generation-t5")
|
101 |
-
model = AutoModelForSeq2SeqLM.from_pretrained("scieditor/citation-generation-t5")
|
102 |
|
103 |
def generate_citation_t5(article_title, citation_style, article_link):
|
104 |
"""Generate a citation using the T5 or LED model."""
|
|
|
62 |
except Exception as e:
|
63 |
return f"Error extracting text: {str(e)}"
|
64 |
|
65 |
+
tokenizer = AutoTokenizer.from_pretrained("Pegasus-X")
|
66 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("Pegasus-X")
|
67 |
|
68 |
def summarize_article(article_text):
|
69 |
"""Summarize a given article's text."""
|
|
|
71 |
if not article_text or len(article_text.split()) < 20:
|
72 |
return None, "Article content is too short to summarize."
|
73 |
# Ensure the input text is not too long
|
74 |
+
inputs = tokenizer(
|
75 |
article_text,
|
76 |
return_tensors="pt",
|
77 |
truncation=True,
|
|
|
80 |
)
|
81 |
|
82 |
# Generate the summary
|
83 |
+
summary_ids = model.generate(
|
84 |
**inputs,
|
85 |
max_new_tokens=800, # Limit the length of the output
|
86 |
min_length=100, # Set a minimum length for the output
|
|
|
90 |
)
|
91 |
|
92 |
# Decode the output to get the summary
|
93 |
+
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
94 |
|
95 |
return summary, None
|
96 |
except Exception as e:
|
97 |
return None, f"Exception during summarization: {str(e)}"
|
98 |
|
99 |
# Load tokenizer and model
|
100 |
+
# tokenizer = AutoTokenizer.from_pretrained("scieditor/citation-generation-t5")
|
101 |
+
# model = AutoModelForSeq2SeqLM.from_pretrained("scieditor/citation-generation-t5")
|
102 |
|
103 |
def generate_citation_t5(article_title, citation_style, article_link):
|
104 |
"""Generate a citation using the T5 or LED model."""
|