Container commited on
Commit
4efdbc0
·
verified ·
1 Parent(s): ca9d760

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -3
app.py CHANGED
@@ -26,6 +26,7 @@ def get_root_domain(url):
26
  else:
27
  return domain
28
 
 
29
  def filter_type(_type: str):
30
  types = [
31
  'application/javascript', 'application/x-javascript', 'text/css', 'webp', 'image/png', 'image/gif',
@@ -41,10 +42,10 @@ def main():
41
 
42
  @app.get("/chrome")
43
  def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
44
-
45
  caps = {
46
  "browserName": "chrome",
47
- 'goog:loggingPrefs': {'performance': 'ALL'} # 开启日志性能监听
48
  }
49
 
50
  # 必须有目标url
@@ -78,6 +79,8 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
78
 
79
  # 设置为无头模式
80
  options.add_argument('--headless')
 
 
81
  for key, value in caps.items():
82
  options.set_capability(key, value)
83
 
@@ -124,6 +127,7 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
124
  # 完全加载完成时,页面是否有发生过 301 302 跳转过
125
  is_jump = (target_url != current_url)
126
 
 
127
  performance_log = driver.get_log('performance') # 获取名称为 performance 的日志
128
  for packet in performance_log:
129
  message = json.loads(packet.get('message')).get('message') # 获取message的数据
@@ -135,7 +139,10 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
135
  requestId = message.get('params').get('requestId') # 唯一的请求标识符。相当于该请求的身份证
136
  url = message.get('params').get('response').get('url') # 获取 该请求 url
137
  try:
 
138
  resp = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId}) # selenium调用 cdp
 
 
139
  print(f'type: {packet_type} url: {url}')
140
  print(f'response: {resp}')
141
  print()
@@ -146,7 +153,8 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
146
  "url": current_url,
147
  "page_source": page_source,
148
  "cookies": cookies,
149
- "is_jump": is_jump
 
150
  }
151
 
152
  driver.quit()
 
26
  else:
27
  return domain
28
 
29
+ # 网络抓包内容过滤
30
  def filter_type(_type: str):
31
  types = [
32
  'application/javascript', 'application/x-javascript', 'text/css', 'webp', 'image/png', 'image/gif',
 
42
 
43
  @app.get("/chrome")
44
  def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
45
+ # 设置日志性能监听参数
46
  caps = {
47
  "browserName": "chrome",
48
+ 'goog:loggingPrefs': {'performance': 'ALL'}
49
  }
50
 
51
  # 必须有目标url
 
79
 
80
  # 设置为无头模式
81
  options.add_argument('--headless')
82
+
83
+ # 开启日志性能监听
84
  for key, value in caps.items():
85
  options.set_capability(key, value)
86
 
 
127
  # 完全加载完成时,页面是否有发生过 301 302 跳转过
128
  is_jump = (target_url != current_url)
129
 
130
+ network = []
131
  performance_log = driver.get_log('performance') # 获取名称为 performance 的日志
132
  for packet in performance_log:
133
  message = json.loads(packet.get('message')).get('message') # 获取message的数据
 
139
  requestId = message.get('params').get('requestId') # 唯一的请求标识符。相当于该请求的身份证
140
  url = message.get('params').get('response').get('url') # 获取 该请求 url
141
  try:
142
+ network.append({"url":url, "type":packet_type})
143
  resp = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId}) # selenium调用 cdp
144
+ request_headers = driver.execute_cdp_cmd('Network.getRequestHeaders', {'requestId': requestId})
145
+ print(f'request_headers: {request_headers}')
146
  print(f'type: {packet_type} url: {url}')
147
  print(f'response: {resp}')
148
  print()
 
153
  "url": current_url,
154
  "page_source": page_source,
155
  "cookies": cookies,
156
+ "is_jump": is_jump,
157
+ "network": network,
158
  }
159
 
160
  driver.quit()