from seleniumwire import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import WebDriverException
from fastapi import FastAPI, Request
import uvicorn
import time
import json
from urllib.parse import unquote, urlparse

app = FastAPI()

# 解析cookie字符串为字典
def convert_cookies_to_dict(cookies):
    cookie_items = cookies.split("; ")
    parsed_cookies = {item.split("=", 1)[0].strip(): item.split("=", 1)[1].strip() if "=" in item else "" for item in cookie_items}
    return parsed_cookies#

# 获取域名字符串的根域
def get_root_domain(url):
    parsed_url = urlparse(url)
    domain = parsed_url.netloc
    
    parts = domain.split('.')
    if len(parts) > 1:
        return '.'.join(parts[-2:])
    else:
        return domain

def filter_type(_type: str):
    types = [
        'application/javascript', 'application/x-javascript', 'text/css', 'webp', 'image/png', 'image/gif',
        'image/jpeg', 'image/x-icon', 'application/octet-stream'
    ]
    if _type not in types:
        return True
    return False
    
@app.get("/")
def main():
    return {"code": 200,"msg":"Success"}
    
@app.get("/chrome")
def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
    
    caps = {
        "browserName": "chrome",
        'goog:loggingPrefs': {'performance': 'ALL'}  # 开启日志性能监听
    }
    
    # 必须有目标url
    if type(url) == str:
        target_url = unquote(url)
        target_domain = get_root_domain(target_url)
    else:
        return {"code": 500,"msg":"No target URL"}

    # 等待时间必须在 0 到 30 之间
    if wait in range(0, 31):
        wait_time = wait
    else:
        return {"code": 500,"msg":"The waiting time must be between 0 and 30"}

    header_array = {}

    # header可以覆写,但必须传入json
    try:
        if type(header) == str:
            header_array.update(json.loads(unquote(header)))
    except Exception as e:
        return {"code": 500,"msg":"The header field is not JSON"}
        
    # 如果输入了cookie
    if type(cookie) == str:
        header_array.update({"cookie":unquote(cookie)})
    
    # 初始化浏览器
    options = Options()

    # 设置为无头模式
    options.add_argument('--headless')
    for key, value in caps.items():
        options.set_capability(key, value)

    # 实例化
    driver = webdriver.Chrome(options=options)

    # 需要打开网址页面,才能用 driver.add_cookie 进行cookie追加
    driver.get(target_url)

    # 清除本次打开网址页面,可能存储在本地的cookie、sessionStorage、localStorage
    driver.delete_all_cookies()
    driver.execute_script("window.sessionStorage.clear();")
    driver.execute_script("window.localStorage.clear();")

    # 对浏览器追加我们传递进来的cookie
    if 'cookie' in header_array:
        cookie_array = convert_cookies_to_dict(header_array['cookie'])
        del header_array['cookie']
        for key, value in cookie_array.items():
            driver.add_cookie({"name": key, "value": value, "domain": f'.{target_domain}', "path": "/", "secure": False})

    # 把下次访问中的请求头修改成我们需要的样式(没有修改的项目则保持原样)
    driver.header_overrides = header_array

    # 再次访问网址
    driver.get(target_url)

    # 输出此时访问的网页源码
    print(driver.page_source)

    # 等待多少秒,来预估网页完全的加载完成(执行完内部的所有js,因为部分js可能涉及到请求后的动态处理,或者延时跳转)
    if wait_time > 0:
        time.sleep(wait_time)

    # 获取完全加载完成时,页面的URL
    current_url = driver.current_url
    
    # 获取完全加载完成时,页面的源代码
    page_source = driver.page_source
    
    # 获取完全加载完成时,页面的cookie
    cookies = driver.get_cookies()

    # 完全加载完成时,页面是否有发生过 301 302 跳转过
    is_jump = (target_url != current_url)
    
    performance_log = driver.get_log('performance')  # 获取名称为 performance 的日志
    for packet in performance_log:
        message = json.loads(packet.get('message')).get('message')  # 获取message的数据
        if message.get('method') != 'Network.responseReceived':  # 如果method 不是 responseReceived 类型就不往下执行
            continue
        packet_type = message.get('params').get('response').get('mimeType')  # 获取该请求返回的type
        if not filter_type(_type=packet_type):  # 过滤type
            continue
        requestId = message.get('params').get('requestId')  # 唯一的请求标识符。相当于该请求的身份证
        url = message.get('params').get('response').get('url')  # 获取 该请求  url
        try:
            resp = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId})  # selenium调用 cdp
            print(f'type: {packet_type} url: {url}')
            print(f'response: {resp}')
            print()
        except WebDriverException:  # 忽略异常
            pass

    data = {
        "url": current_url,
        "page_source": page_source,
        "cookies": cookies,
        "is_jump": is_jump
    }
    
    driver.quit()
    return {"code": 200,"data":data}

if __name__ == '__main__':
    uvicorn.run(app='app:app', host="0.0.0.0", port=7860)