Spaces:

Kawthar12h
/

Text_Summarization

Sleeping

Kawthar12h commited on Sep 14, 2024

Commit

320ee31

verified ·

1 Parent(s): 2c04949

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ def summarize_article(url, min_len, max_len):
         soup = BeautifulSoup(r.text, 'html.parser')
         # To finds all the <h1> (header) and <p> (paragraph) elements in the HTML content
-        results = soup.find_all(['h1','p'])
         # Extract the text content from each element and store it in a list called text
         text = [result.text for result in results]
@@ -29,7 +29,6 @@ def summarize_article(url, min_len, max_len):
         ARTICLE = ' '.join(text)
         # Replace sentence-ending punctuation with a special token (<eos>) . This helps split the article into smaller chunks for summarization.
-        ARTICLE = ARTICLE.replace('\n', '')
         ARTICLE = ARTICLE.replace('.', '.<eos>')
         ARTICLE = ARTICLE.replace('?', '?<eos>')
         ARTICLE = ARTICLE.replace('!', '!<eos>')

         soup = BeautifulSoup(r.text, 'html.parser')
         # To finds all the <h1> (header) and <p> (paragraph) elements in the HTML content
+        results = soup.find_all(['h1','h2','p'])
         # Extract the text content from each element and store it in a list called text
         text = [result.text for result in results]
         ARTICLE = ' '.join(text)
         # Replace sentence-ending punctuation with a special token (<eos>) . This helps split the article into smaller chunks for summarization.
         ARTICLE = ARTICLE.replace('.', '.<eos>')
         ARTICLE = ARTICLE.replace('?', '?<eos>')
         ARTICLE = ARTICLE.replace('!', '!<eos>')