Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -26,6 +26,7 @@ def get_root_domain(url):
|
|
26 |
else:
|
27 |
return domain
|
28 |
|
|
|
29 |
def filter_type(_type: str):
|
30 |
types = [
|
31 |
'application/javascript', 'application/x-javascript', 'text/css', 'webp', 'image/png', 'image/gif',
|
@@ -41,10 +42,10 @@ def main():
|
|
41 |
|
42 |
@app.get("/chrome")
|
43 |
def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
44 |
-
|
45 |
caps = {
|
46 |
"browserName": "chrome",
|
47 |
-
'goog:loggingPrefs': {'performance': 'ALL'}
|
48 |
}
|
49 |
|
50 |
# 必须有目标url
|
@@ -78,6 +79,8 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
78 |
|
79 |
# 设置为无头模式
|
80 |
options.add_argument('--headless')
|
|
|
|
|
81 |
for key, value in caps.items():
|
82 |
options.set_capability(key, value)
|
83 |
|
@@ -124,6 +127,7 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
124 |
# 完全加载完成时,页面是否有发生过 301 302 跳转过
|
125 |
is_jump = (target_url != current_url)
|
126 |
|
|
|
127 |
performance_log = driver.get_log('performance') # 获取名称为 performance 的日志
|
128 |
for packet in performance_log:
|
129 |
message = json.loads(packet.get('message')).get('message') # 获取message的数据
|
@@ -135,7 +139,10 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
135 |
requestId = message.get('params').get('requestId') # 唯一的请求标识符。相当于该请求的身份证
|
136 |
url = message.get('params').get('response').get('url') # 获取 该请求 url
|
137 |
try:
|
|
|
138 |
resp = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId}) # selenium调用 cdp
|
|
|
|
|
139 |
print(f'type: {packet_type} url: {url}')
|
140 |
print(f'response: {resp}')
|
141 |
print()
|
@@ -146,7 +153,8 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
146 |
"url": current_url,
|
147 |
"page_source": page_source,
|
148 |
"cookies": cookies,
|
149 |
-
"is_jump": is_jump
|
|
|
150 |
}
|
151 |
|
152 |
driver.quit()
|
|
|
26 |
else:
|
27 |
return domain
|
28 |
|
29 |
+
# 网络抓包内容过滤
|
30 |
def filter_type(_type: str):
|
31 |
types = [
|
32 |
'application/javascript', 'application/x-javascript', 'text/css', 'webp', 'image/png', 'image/gif',
|
|
|
42 |
|
43 |
@app.get("/chrome")
|
44 |
def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
45 |
+
# 设置日志性能监听参数
|
46 |
caps = {
|
47 |
"browserName": "chrome",
|
48 |
+
'goog:loggingPrefs': {'performance': 'ALL'}
|
49 |
}
|
50 |
|
51 |
# 必须有目标url
|
|
|
79 |
|
80 |
# 设置为无头模式
|
81 |
options.add_argument('--headless')
|
82 |
+
|
83 |
+
# 开启日志性能监听
|
84 |
for key, value in caps.items():
|
85 |
options.set_capability(key, value)
|
86 |
|
|
|
127 |
# 完全加载完成时,页面是否有发生过 301 302 跳转过
|
128 |
is_jump = (target_url != current_url)
|
129 |
|
130 |
+
network = []
|
131 |
performance_log = driver.get_log('performance') # 获取名称为 performance 的日志
|
132 |
for packet in performance_log:
|
133 |
message = json.loads(packet.get('message')).get('message') # 获取message的数据
|
|
|
139 |
requestId = message.get('params').get('requestId') # 唯一的请求标识符。相当于该请求的身份证
|
140 |
url = message.get('params').get('response').get('url') # 获取 该请求 url
|
141 |
try:
|
142 |
+
network.append({"url":url, "type":packet_type})
|
143 |
resp = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId}) # selenium调用 cdp
|
144 |
+
request_headers = driver.execute_cdp_cmd('Network.getRequestHeaders', {'requestId': requestId})
|
145 |
+
print(f'request_headers: {request_headers}')
|
146 |
print(f'type: {packet_type} url: {url}')
|
147 |
print(f'response: {resp}')
|
148 |
print()
|
|
|
153 |
"url": current_url,
|
154 |
"page_source": page_source,
|
155 |
"cookies": cookies,
|
156 |
+
"is_jump": is_jump,
|
157 |
+
"network": network,
|
158 |
}
|
159 |
|
160 |
driver.quit()
|