File size: 2,885 Bytes
3b2ecd2
494cdcf
973d2bc
 
ca11d6d
baf3cbb
aee7417
e37298f
973d2bc
e37298f
aee7417
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
973d2bc
 
65230b0
 
 
3895bea
65230b0
247ee08
 
 
 
65230b0
 
247ee08
77269d1
247ee08
 
02fa17e
 
3895bea
247ee08
 
65230b0
247ee08
 
65230b0
3895bea
247ee08
 
 
 
3895bea
aee7417
 
 
 
 
 
 
3895bea
 
 
 
 
 
8996078
 
 
 
7274bf1
 
 
8996078
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e37298f
973d2bc
 
 
e37298f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from seleniumwire import webdriver
from selenium.webdriver.chrome.options import Options
from fastapi import FastAPI, Request
import uvicorn
import time
import json
from urllib.parse import unquote, urlparse

app = FastAPI()

def convert_cookies_to_dict(cookies):
    cookies = dict([l.split("=", 1) for l in cookies.split("; ")])
    return cookies

def get_root_domain(url):
    # 解析URL
    parsed_url = urlparse(url)
    # 获取域名部分
    domain = parsed_url.netloc
    
    # 分割域名部分以获取根域名
    # 假设根域名是域名的最后两个部分
    parts = domain.split('.')
    if len(parts) > 1:
        # 返回根域名部分
        return '.'.join(parts[-2:])
    else:
        # 如果域名部分少于两个部分,返回整个域名
        return domain
        
@app.get("/")
def main():
    return {"code": 200,"msg":"Success"}
    
@app.get("/chrome")
def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):

    # 必须有目标url
    if type(url) == str:
        target_url = unquote(url)
    else:
        return {"code": 500,"msg":"No target URL"}

    # 等待时间必须在 0 到 30 之间
    if wait in range(0, 31):
        wait_time = wait
    else:
        return {"code": 500,"msg":"The waiting time must be between 0 and 30"}

    header_array = {}

    # header可以覆写,但必须传入json
    try:
        if type(header) == str:
            header_array.update(json.loads(unquote(header)))
    except Exception as e:
        return {"code": 500,"msg":"The header field is not JSON"}
        
    # 如果输入了cookie
    if type(cookie) == str:
        header_array.update({"cookie":unquote(cookie)})

    if 'cookie' in header_array:
        cookie_array = header_array['cookie']
        del header_array['cookie']
        cookie_domain = f'.{get_root_domain(target_url)}'
        for key, value in cookie_array.items():
            driver.execute_script(f'document.cookie = "{key}={value}; path=/; domain={cookie_domain}; expires=Thu, 01 Jan 2050 00:00:00 GMT;');

    options = Options()
    options.add_argument('--headless')
    
    driver = webdriver.Chrome(options=options)

    driver.header_overrides = header_array
    
    driver.get(target_url)

    print(driver.page_source)

    if wait_time > 0:
        time.sleep(wait_time)

    # 获取当前URL
    current_url = driver.current_url
    
    # 获取页面源代码
    page_source = driver.page_source
    
    # 获取cookie
    cookies = driver.get_cookies()

    # 是否有跳转过
    is_jump = (target_url != current_url)
    
    data = {
        "url": current_url,
        "page_source": page_source,
        "cookies": cookies,
        "is_jump": is_jump
    }
    
    driver.quit()
    return {"code": 200,"data":data}

if __name__ == '__main__':
    uvicorn.run(app='app:app', host="0.0.0.0", port=7860)