Spaces:
Running
Running
File size: 386 Bytes
f89a7d8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
import re
import requests
from bs4 import BeautifulSoup
def read_web(url: str) -> str:
if not url:
return ''
resp = requests.get(url)
soup = BeautifulSoup(resp.text, 'html.parser')
text = soup.get_text()
text = re.sub('\n{3,}', '\n\n', text)
return text
if __name__ == '__main__':
r = read_web('https://en.wikipedia.org/wiki/Wiki')
print(r)
|