File size: 386 Bytes
f89a7d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import re

import requests
from bs4 import BeautifulSoup


def read_web(url: str) -> str:
    if not url:
        return ''
    resp = requests.get(url)
    soup = BeautifulSoup(resp.text, 'html.parser')
    text = soup.get_text()
    text = re.sub('\n{3,}', '\n\n', text)
    return text


if __name__ == '__main__':
    r = read_web('https://en.wikipedia.org/wiki/Wiki')
    print(r)