Spaces:
Runtime error
Runtime error
Alberto Carmona
commited on
Commit
·
428a5aa
1
Parent(s):
0e87748
Add function to extract text from URL
Browse files- functions.py +8 -0
- requirements.txt +2 -0
functions.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
from bs4 import BeautifulSoup
|
3 |
+
|
4 |
+
def extract_text(url):
|
5 |
+
response = requests.get(url)
|
6 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
7 |
+
text = '\n\n'.join(map(lambda p: p.text, soup.find_all('p')))
|
8 |
+
return text
|
requirements.txt
CHANGED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
requests
|
2 |
+
beautifulsoup4
|