Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -26,6 +26,7 @@ def get_root_domain(url):
|
|
| 26 |
else:
|
| 27 |
return domain
|
| 28 |
|
|
|
|
| 29 |
def filter_type(_type: str):
|
| 30 |
types = [
|
| 31 |
'application/javascript', 'application/x-javascript', 'text/css', 'webp', 'image/png', 'image/gif',
|
|
@@ -41,10 +42,10 @@ def main():
|
|
| 41 |
|
| 42 |
@app.get("/chrome")
|
| 43 |
def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
| 44 |
-
|
| 45 |
caps = {
|
| 46 |
"browserName": "chrome",
|
| 47 |
-
'goog:loggingPrefs': {'performance': 'ALL'}
|
| 48 |
}
|
| 49 |
|
| 50 |
# 必须有目标url
|
|
@@ -78,6 +79,8 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
| 78 |
|
| 79 |
# 设置为无头模式
|
| 80 |
options.add_argument('--headless')
|
|
|
|
|
|
|
| 81 |
for key, value in caps.items():
|
| 82 |
options.set_capability(key, value)
|
| 83 |
|
|
@@ -124,6 +127,7 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
| 124 |
# 完全加载完成时,页面是否有发生过 301 302 跳转过
|
| 125 |
is_jump = (target_url != current_url)
|
| 126 |
|
|
|
|
| 127 |
performance_log = driver.get_log('performance') # 获取名称为 performance 的日志
|
| 128 |
for packet in performance_log:
|
| 129 |
message = json.loads(packet.get('message')).get('message') # 获取message的数据
|
|
@@ -135,7 +139,10 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
| 135 |
requestId = message.get('params').get('requestId') # 唯一的请求标识符。相当于该请求的身份证
|
| 136 |
url = message.get('params').get('response').get('url') # 获取 该请求 url
|
| 137 |
try:
|
|
|
|
| 138 |
resp = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId}) # selenium调用 cdp
|
|
|
|
|
|
|
| 139 |
print(f'type: {packet_type} url: {url}')
|
| 140 |
print(f'response: {resp}')
|
| 141 |
print()
|
|
@@ -146,7 +153,8 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
| 146 |
"url": current_url,
|
| 147 |
"page_source": page_source,
|
| 148 |
"cookies": cookies,
|
| 149 |
-
"is_jump": is_jump
|
|
|
|
| 150 |
}
|
| 151 |
|
| 152 |
driver.quit()
|
|
|
|
| 26 |
else:
|
| 27 |
return domain
|
| 28 |
|
| 29 |
+
# 网络抓包内容过滤
|
| 30 |
def filter_type(_type: str):
|
| 31 |
types = [
|
| 32 |
'application/javascript', 'application/x-javascript', 'text/css', 'webp', 'image/png', 'image/gif',
|
|
|
|
| 42 |
|
| 43 |
@app.get("/chrome")
|
| 44 |
def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
| 45 |
+
# 设置日志性能监听参数
|
| 46 |
caps = {
|
| 47 |
"browserName": "chrome",
|
| 48 |
+
'goog:loggingPrefs': {'performance': 'ALL'}
|
| 49 |
}
|
| 50 |
|
| 51 |
# 必须有目标url
|
|
|
|
| 79 |
|
| 80 |
# 设置为无头模式
|
| 81 |
options.add_argument('--headless')
|
| 82 |
+
|
| 83 |
+
# 开启日志性能监听
|
| 84 |
for key, value in caps.items():
|
| 85 |
options.set_capability(key, value)
|
| 86 |
|
|
|
|
| 127 |
# 完全加载完成时,页面是否有发生过 301 302 跳转过
|
| 128 |
is_jump = (target_url != current_url)
|
| 129 |
|
| 130 |
+
network = []
|
| 131 |
performance_log = driver.get_log('performance') # 获取名称为 performance 的日志
|
| 132 |
for packet in performance_log:
|
| 133 |
message = json.loads(packet.get('message')).get('message') # 获取message的数据
|
|
|
|
| 139 |
requestId = message.get('params').get('requestId') # 唯一的请求标识符。相当于该请求的身份证
|
| 140 |
url = message.get('params').get('response').get('url') # 获取 该请求 url
|
| 141 |
try:
|
| 142 |
+
network.append({"url":url, "type":packet_type})
|
| 143 |
resp = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId}) # selenium调用 cdp
|
| 144 |
+
request_headers = driver.execute_cdp_cmd('Network.getRequestHeaders', {'requestId': requestId})
|
| 145 |
+
print(f'request_headers: {request_headers}')
|
| 146 |
print(f'type: {packet_type} url: {url}')
|
| 147 |
print(f'response: {resp}')
|
| 148 |
print()
|
|
|
|
| 153 |
"url": current_url,
|
| 154 |
"page_source": page_source,
|
| 155 |
"cookies": cookies,
|
| 156 |
+
"is_jump": is_jump,
|
| 157 |
+
"network": network,
|
| 158 |
}
|
| 159 |
|
| 160 |
driver.quit()
|