Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -26,7 +26,6 @@ def get_root_domain(url):
|
|
| 26 |
else:
|
| 27 |
return domain
|
| 28 |
|
| 29 |
-
# 网络抓包内容过滤
|
| 30 |
def filter_type(_type: str):
|
| 31 |
types = [
|
| 32 |
'application/javascript', 'application/x-javascript', 'text/css', 'webp', 'image/png', 'image/gif',
|
|
@@ -42,10 +41,10 @@ def main():
|
|
| 42 |
|
| 43 |
@app.get("/chrome")
|
| 44 |
def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
| 45 |
-
|
| 46 |
caps = {
|
| 47 |
"browserName": "chrome",
|
| 48 |
-
'goog:loggingPrefs': {'performance': 'ALL'}
|
| 49 |
}
|
| 50 |
|
| 51 |
# 必须有目标url
|
|
@@ -79,8 +78,6 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
| 79 |
|
| 80 |
# 设置为无头模式
|
| 81 |
options.add_argument('--headless')
|
| 82 |
-
|
| 83 |
-
# 开启日志性能监听
|
| 84 |
for key, value in caps.items():
|
| 85 |
options.set_capability(key, value)
|
| 86 |
|
|
@@ -126,9 +123,7 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
| 126 |
|
| 127 |
# 完全加载完成时,页面是否有发生过 301 302 跳转过
|
| 128 |
is_jump = (target_url != current_url)
|
| 129 |
-
|
| 130 |
-
network = []
|
| 131 |
-
# 从日志性能监听中抓取网络包(Network),并进行过滤
|
| 132 |
performance_log = driver.get_log('performance') # 获取名称为 performance 的日志
|
| 133 |
for packet in performance_log:
|
| 134 |
message = json.loads(packet.get('message')).get('message') # 获取message的数据
|
|
@@ -141,10 +136,9 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
| 141 |
url = message.get('params').get('response').get('url') # 获取 该请求 url
|
| 142 |
try:
|
| 143 |
resp = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId}) # selenium调用 cdp
|
| 144 |
-
|
| 145 |
-
network.append({"url":url, "type":packet_type})
|
| 146 |
print(f'response: {resp}')
|
| 147 |
-
print(
|
| 148 |
except WebDriverException: # 忽略异常
|
| 149 |
pass
|
| 150 |
|
|
@@ -152,14 +146,11 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
| 152 |
"url": current_url,
|
| 153 |
"page_source": page_source,
|
| 154 |
"cookies": cookies,
|
| 155 |
-
"is_jump": is_jump
|
| 156 |
-
"network": network,
|
| 157 |
}
|
| 158 |
|
| 159 |
driver.quit()
|
| 160 |
return {"code": 200,"data":data}
|
| 161 |
|
| 162 |
if __name__ == '__main__':
|
| 163 |
-
uvicorn.run(app='app:app', host="0.0.0.0", port=7860)
|
| 164 |
-
|
| 165 |
-
|
|
|
|
| 26 |
else:
|
| 27 |
return domain
|
| 28 |
|
|
|
|
| 29 |
def filter_type(_type: str):
|
| 30 |
types = [
|
| 31 |
'application/javascript', 'application/x-javascript', 'text/css', 'webp', 'image/png', 'image/gif',
|
|
|
|
| 41 |
|
| 42 |
@app.get("/chrome")
|
| 43 |
def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
| 44 |
+
|
| 45 |
caps = {
|
| 46 |
"browserName": "chrome",
|
| 47 |
+
'goog:loggingPrefs': {'performance': 'ALL'} # 开启日志性能监听
|
| 48 |
}
|
| 49 |
|
| 50 |
# 必须有目标url
|
|
|
|
| 78 |
|
| 79 |
# 设置为无头模式
|
| 80 |
options.add_argument('--headless')
|
|
|
|
|
|
|
| 81 |
for key, value in caps.items():
|
| 82 |
options.set_capability(key, value)
|
| 83 |
|
|
|
|
| 123 |
|
| 124 |
# 完全加载完成时,页面是否有发生过 301 302 跳转过
|
| 125 |
is_jump = (target_url != current_url)
|
| 126 |
+
|
|
|
|
|
|
|
| 127 |
performance_log = driver.get_log('performance') # 获取名称为 performance 的日志
|
| 128 |
for packet in performance_log:
|
| 129 |
message = json.loads(packet.get('message')).get('message') # 获取message的数据
|
|
|
|
| 136 |
url = message.get('params').get('response').get('url') # 获取 该请求 url
|
| 137 |
try:
|
| 138 |
resp = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId}) # selenium调用 cdp
|
| 139 |
+
print(f'type: {packet_type} url: {url}')
|
|
|
|
| 140 |
print(f'response: {resp}')
|
| 141 |
+
print()
|
| 142 |
except WebDriverException: # 忽略异常
|
| 143 |
pass
|
| 144 |
|
|
|
|
| 146 |
"url": current_url,
|
| 147 |
"page_source": page_source,
|
| 148 |
"cookies": cookies,
|
| 149 |
+
"is_jump": is_jump
|
|
|
|
| 150 |
}
|
| 151 |
|
| 152 |
driver.quit()
|
| 153 |
return {"code": 200,"data":data}
|
| 154 |
|
| 155 |
if __name__ == '__main__':
|
| 156 |
+
uvicorn.run(app='app:app', host="0.0.0.0", port=7860)
|
|
|
|
|
|