Spaces:
Sleeping
Sleeping
UPDATE: urls
Browse files- functions.py +5 -1
functions.py
CHANGED
|
@@ -287,7 +287,11 @@ def getLinks(url: str, timeout = 30):
|
|
| 287 |
break
|
| 288 |
else:
|
| 289 |
uniqueLinks = uniqueLinks.union(set(getLinksFromPage(link)))
|
| 290 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
|
| 292 |
|
| 293 |
def getTextLength(text: str):
|
|
|
|
| 287 |
break
|
| 288 |
else:
|
| 289 |
uniqueLinks = uniqueLinks.union(set(getLinksFromPage(link)))
|
| 290 |
+
allLinks = {}
|
| 291 |
+
foundLinks = list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in uniqueLinks]))
|
| 292 |
+
for link in foundLinks:
|
| 293 |
+
allLinks[link] = len(BeautifulSoup(requests.get(link).text, "lxml").body.get_text(" ", strip = True))
|
| 294 |
+
return allLinks
|
| 295 |
|
| 296 |
|
| 297 |
def getTextLength(text: str):
|