megatrump commited on
Commit
bc3acf5
·
verified ·
1 Parent(s): 915b179

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -35
app.py CHANGED
@@ -4,16 +4,19 @@ from flask import Flask, request, Response
4
  import requests
5
  from urllib.parse import urlparse
6
 
7
- # --- 设置日志,使其能输出到Hugging Face的控制台 ---
8
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
9
 
10
  app = Flask(__name__)
11
 
12
- # --- 白名单过滤规则 (保持不变) ---
13
  ALLOWED_PATTERNS = [
14
  re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:releases|archive)/.*$', re.IGNORECASE),
15
  re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:blob|raw)/.*$', re.IGNORECASE),
16
- re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:info|git-).*/.*$', re.IGNORECASE),
 
 
 
17
  re.compile(r'^https://raw\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
18
  re.compile(r'^https://gist\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
19
  re.compile(r'^https://github\.com/[^/]+/[^/]+/tags.*$', re.IGNORECASE),
@@ -24,61 +27,42 @@ ALLOWED_PATTERNS = [
24
  ]
25
 
26
  def is_url_allowed(url):
27
- logging.info(f"Checking URL against whitelist: {url}")
28
- for i, pattern in enumerate(ALLOWED_PATTERNS):
29
  if pattern.match(url):
30
- logging.info(f"URL Matched! Pattern index: {i}")
31
  return True
32
- logging.warning(f"URL Denied! No pattern matched: {url}")
33
  return False
34
 
35
  # --- 核心代理逻辑 ---
36
-
37
  @app.route('/', defaults={'path': ''}, methods=['GET', 'POST', 'PUT', 'DELETE'])
38
  @app.route('/<path:path>', methods=['GET', 'POST', 'PUT', 'DELETE'])
39
  def proxy(path):
40
- logging.info("="*50)
41
- logging.info(f"--- New request received ---")
42
-
43
- # 打印最原始的请求信息
44
- logging.info(f"Request Method: {request.method}")
45
- logging.info(f"Request Path (from Flask): /<path:path> = /{path}")
46
- logging.info(f"Request Full Path (raw): {request.full_path}")
47
- logging.info(f"Request Headers:\n{request.headers}")
48
-
49
- # --- 1. 从请求路径中构建目标URL ---
50
  target_path = request.full_path
51
  if target_path.startswith('/'):
52
  target_path = target_path[1:]
53
 
54
  if not target_path:
55
- logging.info("Root path request. Returning info page.")
56
- return ("<p>GitHub reverse proxy is active. Use proxy_url/target_url format.</p>"), 200
57
 
58
  if not target_path.startswith(('http://', 'https://')):
59
  target_url = 'https://' + target_path
60
  else:
61
  target_url = target_path
62
 
63
- logging.info(f"Constructed target URL: {target_url}")
64
-
65
- # --- 2. 执行安全过滤检查 ---
66
  if not is_url_allowed(target_url):
 
67
  error_message = "<h1>403 Forbidden</h1><p>Request blocked by proxy security policy.</p>"
68
  return error_message, 403
69
 
70
- # --- 3. 转发请求 ---
71
  try:
72
  target_host = urlparse(target_url).hostname
73
  if not target_host:
74
  raise ValueError("Could not parse hostname")
75
  except Exception as e:
76
- logging.error(f"Failed to parse hostname from URL '{target_url}': {e}")
77
  return f"Invalid target URL in path: {e}", 400
78
 
79
  headers = {key: value for (key, value) in request.headers if key.lower() != 'host'}
80
  headers['Host'] = target_host
81
- logging.info(f"Forwarding request to {target_url} with headers:\n{headers}")
82
 
83
  try:
84
  resp = requests.request(
@@ -91,19 +75,12 @@ def proxy(path):
91
  stream=True,
92
  timeout=30
93
  )
94
- logging.info(f"Received response with status code: {resp.status_code}")
95
-
96
  excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection']
97
  response_headers = [(name, value) for (name, value) in resp.raw.headers.items() if name.lower() not in excluded_headers]
98
- logging.info("Streaming response back to client.")
99
- logging.info("="*50 + "\n")
100
  return Response(resp.iter_content(chunk_size=8192), status=resp.status_code, headers=response_headers)
101
-
102
  except requests.exceptions.RequestException as e:
103
- logging.error(f"Error while proxying request to {target_url}: {e}")
104
- logging.info("="*50 + "\n")
105
  return f"An error occurred while proxying: {e}", 502
106
 
107
-
108
  if __name__ == '__main__':
109
- app.run(host='0.0.0.0', port=7860)
 
 
4
  import requests
5
  from urllib.parse import urlparse
6
 
7
+ # --- 设置日志 ---
8
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
9
 
10
  app = Flask(__name__)
11
 
12
+ # --- 白名单过滤规则 ---
13
  ALLOWED_PATTERNS = [
14
  re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:releases|archive)/.*$', re.IGNORECASE),
15
  re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:blob|raw)/.*$', re.IGNORECASE),
16
+
17
+ # --- 这里是修正后的一行 ---
18
+ re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:info|git-).*$', re.IGNORECASE),
19
+
20
  re.compile(r'^https://raw\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
21
  re.compile(r'^https://gist\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
22
  re.compile(r'^https://github\.com/[^/]+/[^/]+/tags.*$', re.IGNORECASE),
 
27
  ]
28
 
29
  def is_url_allowed(url):
30
+ """检查给定的URL是否匹配白名单中的任何一个模式。"""
31
+ for pattern in ALLOWED_PATTERNS:
32
  if pattern.match(url):
 
33
  return True
 
34
  return False
35
 
36
  # --- 核心代理逻辑 ---
 
37
  @app.route('/', defaults={'path': ''}, methods=['GET', 'POST', 'PUT', 'DELETE'])
38
  @app.route('/<path:path>', methods=['GET', 'POST', 'PUT', 'DELETE'])
39
  def proxy(path):
 
 
 
 
 
 
 
 
 
 
40
  target_path = request.full_path
41
  if target_path.startswith('/'):
42
  target_path = target_path[1:]
43
 
44
  if not target_path:
45
+ return ("<p>GitHub reverse proxy is active.</p>"), 200
 
46
 
47
  if not target_path.startswith(('http://', 'https://')):
48
  target_url = 'https://' + target_path
49
  else:
50
  target_url = target_path
51
 
 
 
 
52
  if not is_url_allowed(target_url):
53
+ logging.warning(f"URL Denied! No pattern matched: {target_url}")
54
  error_message = "<h1>403 Forbidden</h1><p>Request blocked by proxy security policy.</p>"
55
  return error_message, 403
56
 
 
57
  try:
58
  target_host = urlparse(target_url).hostname
59
  if not target_host:
60
  raise ValueError("Could not parse hostname")
61
  except Exception as e:
 
62
  return f"Invalid target URL in path: {e}", 400
63
 
64
  headers = {key: value for (key, value) in request.headers if key.lower() != 'host'}
65
  headers['Host'] = target_host
 
66
 
67
  try:
68
  resp = requests.request(
 
75
  stream=True,
76
  timeout=30
77
  )
 
 
78
  excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection']
79
  response_headers = [(name, value) for (name, value) in resp.raw.headers.items() if name.lower() not in excluded_headers]
 
 
80
  return Response(resp.iter_content(chunk_size=8192), status=resp.status_code, headers=response_headers)
 
81
  except requests.exceptions.RequestException as e:
 
 
82
  return f"An error occurred while proxying: {e}", 502
83
 
 
84
  if __name__ == '__main__':
85
+ app.run(host='0.0.0.0', port=7860)
86
+