Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -20,7 +20,7 @@ def summarize_article(url, min_len, max_len):
|
|
20 |
soup = BeautifulSoup(r.text, 'html.parser')
|
21 |
|
22 |
# To finds all the <h1> (header) and <p> (paragraph) elements in the HTML content
|
23 |
-
results = soup.find_all(['h1','p'])
|
24 |
|
25 |
# Extract the text content from each element and store it in a list called text
|
26 |
text = [result.text for result in results]
|
@@ -29,7 +29,6 @@ def summarize_article(url, min_len, max_len):
|
|
29 |
ARTICLE = ' '.join(text)
|
30 |
|
31 |
# Replace sentence-ending punctuation with a special token (<eos>) . This helps split the article into smaller chunks for summarization.
|
32 |
-
ARTICLE = ARTICLE.replace('\n', '')
|
33 |
ARTICLE = ARTICLE.replace('.', '.<eos>')
|
34 |
ARTICLE = ARTICLE.replace('?', '?<eos>')
|
35 |
ARTICLE = ARTICLE.replace('!', '!<eos>')
|
|
|
20 |
soup = BeautifulSoup(r.text, 'html.parser')
|
21 |
|
22 |
# To finds all the <h1> (header) and <p> (paragraph) elements in the HTML content
|
23 |
+
results = soup.find_all(['h1','h2','p'])
|
24 |
|
25 |
# Extract the text content from each element and store it in a list called text
|
26 |
text = [result.text for result in results]
|
|
|
29 |
ARTICLE = ' '.join(text)
|
30 |
|
31 |
# Replace sentence-ending punctuation with a special token (<eos>) . This helps split the article into smaller chunks for summarization.
|
|
|
32 |
ARTICLE = ARTICLE.replace('.', '.<eos>')
|
33 |
ARTICLE = ARTICLE.replace('?', '?<eos>')
|
34 |
ARTICLE = ARTICLE.replace('!', '!<eos>')
|