File size: 3,634 Bytes
5d66f59
f8390d3
 
 
 
 
5d66f59
 
 
 
 
 
 
 
 
 
 
 
 
 
f8390d3
5d66f59
 
 
 
 
 
 
 
 
 
 
 
f8390d3
 
5d66f59
 
f8390d3
 
5d66f59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8390d3
 
 
5d66f59
f8390d3
 
 
5d66f59
f8390d3
 
 
 
 
 
 
 
 
5d66f59
 
f8390d3
 
5d66f59
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import re
from flask import Flask, request, Response
import requests

app = Flask(__name__)

# --- 白名单过滤规则 (保持不变) ---
# 这些规则现在将应用到从路径中解析出的完整URL上
ALLOWED_PATTERNS = [
    re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:releases|archive)/.*$', re.IGNORECASE),
    re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:blob|raw)/.*$', re.IGNORECASE),
    re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:info|git-).*/.*$', re.IGNORECASE),
    re.compile(r'^https://raw\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
    re.compile(r'^https://gist\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
    re.compile(r'^https://github\.com/[^/]+/[^/]+/tags.*$', re.IGNORECASE),
    re.compile(r'^https://avatars\.githubusercontent\.com/.*$', re.IGNORECASE),
    re.compile(r'^https://github\.githubassets\.com/.*$', re.IGNORECASE),
    re.compile(r'^https://github\.com/[^/]+/?$', re.IGNORECASE),
    re.compile(r'^https://github\.com/[^/]+/[^/]+/?$', re.IGNORECASE),
]

def is_url_allowed(url):
    """检查给定的URL是否匹配白名单中的任何一个模式。"""
    for pattern in ALLOWED_PATTERNS:
        if pattern.match(url):
            return True
    return False

# --- 核心代理逻辑 ---

# 我们现在使用一个更通用的路由来捕获所有请求
@app.route('/', defaults={'path': ''})
@app.route('/<path:path>')
def proxy(path):
    """
    一个通用的反向代理,它将目标URL作为路径的一部分。
    例如: /https://github.com/user/repo
    """
    
    # --- 1. 从请求路径中构建目标URL ---
    # 使用 request.full_path 来获取完整的路径和查询参数, e.g., /https://github.com/user/repo?service=...
    target_path = request.full_path
    
    # 移除开头的斜杠
    if target_path.startswith('/'):
        target_path = target_path[1:]

    # 如果路径本身不是一个完整的URL,则为其添加 https://
    if not target_path.startswith(('http://', 'https://')):
        target_url = 'https://' + target_path
    else:
        target_url = target_path
        
    # --- 2. 执行安全过滤检查 ---
    if not is_url_allowed(target_url):
        error_message = (
            "<h1>403 Forbidden</h1>"
            "<p>This request is blocked by the proxy's security policy.</p>"
            f"<p>Blocked URL: {target_url}</p>"
        )
        return error_message, 403

    # --- 3. 转发请求 ---
    # 从目标URL中解析出Host头
    try:
        from urllib.parse import urlparse
        target_host = urlparse(target_url).hostname
    except Exception:
        return "Invalid target URL in path", 400

    headers = {key: value for (key, value) in request.headers if key.lower() != 'host'}
    headers['Host'] = target_host

    try:
        resp = requests.request(
            method=request.method,
            url=target_url,
            headers=headers,
            data=request.get_data(),
            cookies=request.cookies,
            allow_redirects=False,
            stream=True
        )

        excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection']
        response_headers = [(name, value) for (name, value) in resp.raw.headers.items() if name.lower() not in excluded_headers]

        return Response(resp.iter_content(chunk_size=8192), status=resp.status_code, headers=response_headers)

    except requests.exceptions.RequestException as e:
        return f"An error occurred while proxying: {e}", 502


if __name__ == '__main__':
    app.run(host='0.0.0.0', port=7860)