Container commited on
Commit
1be76cb
·
verified ·
1 Parent(s): 9917a76

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -9
app.py CHANGED
@@ -26,12 +26,13 @@ def get_root_domain(url):
26
  else:
27
  return domain
28
 
29
- def filter_type(_type: str):
 
30
  types = [
31
  'application/javascript', 'application/x-javascript', 'text/css', 'webp', 'image/png', 'image/gif',
32
  'image/jpeg', 'image/x-icon', 'application/octet-stream'
33
  ]
34
- if _type not in types:
35
  return True
36
  return False
37
 
@@ -41,10 +42,10 @@ def main():
41
 
42
  @app.get("/chrome")
43
  def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
44
-
45
  caps = {
46
  "browserName": "chrome",
47
- 'goog:loggingPrefs': {'performance': 'ALL'} # 开启日志性能监听
48
  }
49
 
50
  # 必须有目标url
@@ -78,6 +79,8 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
78
 
79
  # 设置为无头模式
80
  options.add_argument('--headless')
 
 
81
  for key, value in caps.items():
82
  options.set_capability(key, value)
83
 
@@ -123,22 +126,25 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
123
 
124
  # 完全加载完成时,页面是否有发生过 301 302 跳转过
125
  is_jump = (target_url != current_url)
126
-
 
 
127
  performance_log = driver.get_log('performance') # 获取名称为 performance 的日志
128
  for packet in performance_log:
129
  message = json.loads(packet.get('message')).get('message') # 获取message的数据
130
  if message.get('method') != 'Network.responseReceived': # 如果method 不是 responseReceived 类型就不往下执行
131
  continue
132
  packet_type = message.get('params').get('response').get('mimeType') # 获取该请求返回的type
133
- if not filter_type(_type=packet_type): # 过滤type
134
  continue
135
  requestId = message.get('params').get('requestId') # 唯一的请求标识符。相当于该请求的身份证
136
  url = message.get('params').get('response').get('url') # 获取 该请求 url
137
  try:
138
  resp = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId}) # selenium调用 cdp
139
- print(f'type: {packet_type} url: {url}')
 
140
  print(f'response: {resp}')
141
- print()
142
  except WebDriverException: # 忽略异常
143
  pass
144
 
@@ -146,7 +152,8 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
146
  "url": current_url,
147
  "page_source": page_source,
148
  "cookies": cookies,
149
- "is_jump": is_jump
 
150
  }
151
 
152
  driver.quit()
 
26
  else:
27
  return domain
28
 
29
+ # 网络抓包内容过滤
30
+ def filter_type(type: str):
31
  types = [
32
  'application/javascript', 'application/x-javascript', 'text/css', 'webp', 'image/png', 'image/gif',
33
  'image/jpeg', 'image/x-icon', 'application/octet-stream'
34
  ]
35
+ if type not in types:
36
  return True
37
  return False
38
 
 
42
 
43
  @app.get("/chrome")
44
  def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
45
+ # 设置日志性能监听参数
46
  caps = {
47
  "browserName": "chrome",
48
+ 'goog:loggingPrefs': {'performance': 'ALL'}
49
  }
50
 
51
  # 必须有目标url
 
79
 
80
  # 设置为无头模式
81
  options.add_argument('--headless')
82
+
83
+ # 开启日志性能监听
84
  for key, value in caps.items():
85
  options.set_capability(key, value)
86
 
 
126
 
127
  # 完全加载完成时,页面是否有发生过 301 302 跳转过
128
  is_jump = (target_url != current_url)
129
+
130
+ network = []
131
+ # 从日志性能监听中抓取网络包(Network),并进行过滤
132
  performance_log = driver.get_log('performance') # 获取名称为 performance 的日志
133
  for packet in performance_log:
134
  message = json.loads(packet.get('message')).get('message') # 获取message的数据
135
  if message.get('method') != 'Network.responseReceived': # 如果method 不是 responseReceived 类型就不往下执行
136
  continue
137
  packet_type = message.get('params').get('response').get('mimeType') # 获取该请求返回的type
138
+ if not filter_type(packet_type): # 过滤type
139
  continue
140
  requestId = message.get('params').get('requestId') # 唯一的请求标识符。相当于该请求的身份证
141
  url = message.get('params').get('response').get('url') # 获取 该请求 url
142
  try:
143
  resp = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId}) # selenium调用 cdp
144
+ request_headers = driver.execute_cdp_cmd('Network.getRequestHeaders', {'requestId': request_id})
145
+ network.append({"url":url, "type":packet_type})
146
  print(f'response: {resp}')
147
+ print(f'request_headers: {request_headers}')
148
  except WebDriverException: # 忽略异常
149
  pass
150
 
 
152
  "url": current_url,
153
  "page_source": page_source,
154
  "cookies": cookies,
155
+ "is_jump": is_jump,
156
+ "network": network,
157
  }
158
 
159
  driver.quit()