Hansimov commited on
Commit
75589ce
·
1 Parent(s): 9d61879

:gem: [Feature] ProxyScanner and ProxyRowExtractor

Browse files
networks/__init__.py CHANGED
@@ -8,3 +8,4 @@ from .conversation_create_headers_constructor import (
8
  )
9
  from .message_outputer import OpenaiStreamOutputer
10
  from .message_parser import MessageParser
 
 
8
  )
9
  from .message_outputer import OpenaiStreamOutputer
10
  from .message_parser import MessageParser
11
+ from .proxy_scanner import ProxyScanner
networks/proxy_scanner.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cssutils
2
+ import re
3
+ from bs4 import BeautifulSoup
4
+ from DrissionPage import WebPage, ChromiumOptions
5
+ from pprint import pprint
6
+
7
+
8
+ class ProxyRowExtractor:
9
+ def __init__(self):
10
+ pass
11
+
12
+ def extract(self, table_html):
13
+ soup = BeautifulSoup(table_html, "html.parser")
14
+ rows = soup.find_all("tr")
15
+ keys = [
16
+ "ip",
17
+ "port",
18
+ "check_datetime_and_interval",
19
+ "bandwidth_and_latency",
20
+ "stability_and_samples",
21
+ "country",
22
+ "anonymity",
23
+ ]
24
+ row_dicts = []
25
+ for row in rows:
26
+ row_dict = {}
27
+ cells = row.find_all("td")
28
+ for key, cell in zip(keys, cells):
29
+ cell_text = re.sub(r"\s+", " ", cell.text.strip())
30
+ if key == "bandwidth_and_latency":
31
+ progress_bar = cell.find("div", class_="progress-bar-inner")
32
+ bandwidth = cssutils.parseStyle(progress_bar["style"])["width"]
33
+ latency = cell_text
34
+ row_dict["bandwidth"] = bandwidth
35
+ row_dict["latency"] = latency
36
+ elif key == "check_datetime_and_interval":
37
+ check_datetime = cell.find("time").attrs["datetime"]
38
+ check_interval = cell_text
39
+ row_dict["check_datetime"] = check_datetime
40
+ row_dict["check_interval"] = check_interval
41
+ elif key == "stability_and_samples":
42
+ res = re.match(r"(\d+%)\s*\((\d+)\)", cell_text)
43
+ stability = res.group(1)
44
+ samples = res.group(2)
45
+ row_dict["stability"] = stability
46
+ row_dict["samples"] = samples
47
+ else:
48
+ row_dict[key] = cell_text
49
+ pprint(row_dict)
50
+ row_dicts.append(row_dict)
51
+
52
+
53
+ class ProxyScanner:
54
+ def __init__(self, scan_proxy=None):
55
+ self.scan_proxy = scan_proxy
56
+ self.init_proxy_servers()
57
+
58
+ def init_proxy_servers(self):
59
+ # https://www.proxynova.com/proxy-server-list
60
+ self.proxy_server_list_url_base = (
61
+ "https://www.proxynova.com/proxy-server-list/country"
62
+ )
63
+ countries = ["ar", "br", "co", "de", "id", "in", "mx", "sg", "us"]
64
+ self.proxy_server_list_urls = [
65
+ f"{self.proxy_server_list_url_base}-{country}" for country in countries
66
+ ]
67
+
68
+ def run(self):
69
+ proxy_url = self.proxy_server_list_urls[-1]
70
+ options = ChromiumOptions()
71
+ options.set_argument("--incognito")
72
+ options.set_argument(f"--proxy-server", self.scan_proxy)
73
+ self.options = options
74
+ page = WebPage(driver_or_options=self.options)
75
+ page.get(proxy_url)
76
+ print(page.title)
77
+ page.wait.ele_display("#tbl_proxy_list")
78
+ ele = page.ele("#tbl_proxy_list")
79
+ # print(ele.html)
80
+ extractor = ProxyRowExtractor()
81
+ extractor.extract(ele.html)
82
+
83
+ self.page = page
84
+
85
+
86
+ if __name__ == "__main__":
87
+ scanner = ProxyScanner(scan_proxy="http://localhost:11111")
88
+ scanner.run()
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
  aiohttp
 
 
2
  fastapi
3
  httpx
4
  openai
 
1
  aiohttp
2
+ cssutils
3
+ DrissionPage
4
  fastapi
5
  httpx
6
  openai