Alberto Carmona commited on
Commit
428a5aa
·
1 Parent(s): 0e87748

Add function to extract text from URL

Browse files
Files changed (2) hide show
  1. functions.py +8 -0
  2. requirements.txt +2 -0
functions.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+
4
+ def extract_text(url):
5
+ response = requests.get(url)
6
+ soup = BeautifulSoup(response.text, "html.parser")
7
+ text = '\n\n'.join(map(lambda p: p.text, soup.find_all('p')))
8
+ return text
requirements.txt CHANGED
@@ -0,0 +1,2 @@
 
 
 
1
+ requests
2
+ beautifulsoup4