Kawthar12h commited on
Commit
320ee31
·
verified ·
1 Parent(s): 2c04949

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -2
app.py CHANGED
@@ -20,7 +20,7 @@ def summarize_article(url, min_len, max_len):
20
  soup = BeautifulSoup(r.text, 'html.parser')
21
 
22
  # To finds all the <h1> (header) and <p> (paragraph) elements in the HTML content
23
- results = soup.find_all(['h1','p'])
24
 
25
  # Extract the text content from each element and store it in a list called text
26
  text = [result.text for result in results]
@@ -29,7 +29,6 @@ def summarize_article(url, min_len, max_len):
29
  ARTICLE = ' '.join(text)
30
 
31
  # Replace sentence-ending punctuation with a special token (<eos>) . This helps split the article into smaller chunks for summarization.
32
- ARTICLE = ARTICLE.replace('\n', '')
33
  ARTICLE = ARTICLE.replace('.', '.<eos>')
34
  ARTICLE = ARTICLE.replace('?', '?<eos>')
35
  ARTICLE = ARTICLE.replace('!', '!<eos>')
 
20
  soup = BeautifulSoup(r.text, 'html.parser')
21
 
22
  # To finds all the <h1> (header) and <p> (paragraph) elements in the HTML content
23
+ results = soup.find_all(['h1','h2','p'])
24
 
25
  # Extract the text content from each element and store it in a list called text
26
  text = [result.text for result in results]
 
29
  ARTICLE = ' '.join(text)
30
 
31
  # Replace sentence-ending punctuation with a special token (<eos>) . This helps split the article into smaller chunks for summarization.
 
32
  ARTICLE = ARTICLE.replace('.', '.<eos>')
33
  ARTICLE = ARTICLE.replace('?', '?<eos>')
34
  ARTICLE = ARTICLE.replace('!', '!<eos>')