megatrump commited on
Commit
915b179
·
verified ·
1 Parent(s): 1dbbd42

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -30
app.py CHANGED
@@ -1,15 +1,19 @@
1
  import re
 
2
  from flask import Flask, request, Response
3
  import requests
4
  from urllib.parse import urlparse
5
 
 
 
 
6
  app = Flask(__name__)
7
 
8
- # --- Whitelist filtering rules (Unchanged) ---
9
  ALLOWED_PATTERNS = [
10
  re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:releases|archive)/.*$', re.IGNORECASE),
11
  re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:blob|raw)/.*$', re.IGNORECASE),
12
- re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:info|git-).*/.*$', re.IGNORECASE), # This is key for git clone
13
  re.compile(r'^https://raw\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
14
  re.compile(r'^https://gist\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
15
  re.compile(r'^https://github\.com/[^/]+/[^/]+/tags.*$', re.IGNORECASE),
@@ -20,61 +24,61 @@ ALLOWED_PATTERNS = [
20
  ]
21
 
22
  def is_url_allowed(url):
23
- """Checks if the given URL matches any pattern in the whitelist."""
24
- for pattern in ALLOWED_PATTERNS:
25
  if pattern.match(url):
 
26
  return True
 
27
  return False
28
 
29
- # --- Core Proxy Logic ---
30
 
31
- # A single, consolidated route to capture all requests and methods
32
  @app.route('/', defaults={'path': ''}, methods=['GET', 'POST', 'PUT', 'DELETE'])
33
  @app.route('/<path:path>', methods=['GET', 'POST', 'PUT', 'DELETE'])
34
  def proxy(path):
35
- """
36
- A universal reverse proxy that takes the target URL as part of the path.
37
- """
38
 
39
- # --- 1. Construct target URL from the request path ---
40
- # request.full_path includes the query string, which is essential for git
 
 
 
 
 
41
  target_path = request.full_path
42
-
43
- # Remove the leading slash
44
  if target_path.startswith('/'):
45
  target_path = target_path[1:]
46
 
47
- # If the path is empty (root request), return a simple landing page
48
  if not target_path:
49
- return ("<p>This is a GitHub reverse proxy. Usage:</p>"
50
- "<p><code>&lt;proxy_url&gt;/&lt;target_github_url&gt;</code></p>"
51
- "<p>Example: <code>/github.com/python/cpython.git</code></p>"), 200
52
 
53
- # Prepend https:// if no scheme is present
54
  if not target_path.startswith(('http://', 'https://')):
55
  target_url = 'https://' + target_path
56
  else:
57
  target_url = target_path
58
-
59
- # --- 2. Perform security filter check ---
 
 
60
  if not is_url_allowed(target_url):
61
- error_message = (
62
- "<h1>403 Forbidden</h1>"
63
- "<p>This request is blocked by the proxy's security policy.</p>"
64
- f"<p>Blocked URL: {target_url}</p>"
65
- )
66
  return error_message, 403
67
 
68
- # --- 3. Forward the request ---
69
  try:
70
  target_host = urlparse(target_url).hostname
71
  if not target_host:
72
- raise ValueError("Could not parse hostname from target URL")
73
  except Exception as e:
 
74
  return f"Invalid target URL in path: {e}", 400
75
 
76
  headers = {key: value for (key, value) in request.headers if key.lower() != 'host'}
77
  headers['Host'] = target_host
 
78
 
79
  try:
80
  resp = requests.request(
@@ -85,18 +89,21 @@ def proxy(path):
85
  cookies=request.cookies,
86
  allow_redirects=False,
87
  stream=True,
88
- timeout=30 # Added a timeout for robustness
89
  )
 
90
 
91
  excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection']
92
  response_headers = [(name, value) for (name, value) in resp.raw.headers.items() if name.lower() not in excluded_headers]
93
-
 
94
  return Response(resp.iter_content(chunk_size=8192), status=resp.status_code, headers=response_headers)
95
 
96
  except requests.exceptions.RequestException as e:
 
 
97
  return f"An error occurred while proxying: {e}", 502
98
 
99
 
100
  if __name__ == '__main__':
101
- # For production, use a proper WSGI server like Gunicorn
102
  app.run(host='0.0.0.0', port=7860)
 
1
  import re
2
+ import logging
3
  from flask import Flask, request, Response
4
  import requests
5
  from urllib.parse import urlparse
6
 
7
+ # --- 设置日志,使其能输出到Hugging Face的控制台 ---
8
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
9
+
10
  app = Flask(__name__)
11
 
12
+ # --- 白名单过滤规则 (保持不变) ---
13
  ALLOWED_PATTERNS = [
14
  re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:releases|archive)/.*$', re.IGNORECASE),
15
  re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:blob|raw)/.*$', re.IGNORECASE),
16
+ re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:info|git-).*/.*$', re.IGNORECASE),
17
  re.compile(r'^https://raw\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
18
  re.compile(r'^https://gist\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
19
  re.compile(r'^https://github\.com/[^/]+/[^/]+/tags.*$', re.IGNORECASE),
 
24
  ]
25
 
26
  def is_url_allowed(url):
27
+ logging.info(f"Checking URL against whitelist: {url}")
28
+ for i, pattern in enumerate(ALLOWED_PATTERNS):
29
  if pattern.match(url):
30
+ logging.info(f"URL Matched! Pattern index: {i}")
31
  return True
32
+ logging.warning(f"URL Denied! No pattern matched: {url}")
33
  return False
34
 
35
+ # --- 核心代理逻辑 ---
36
 
 
37
  @app.route('/', defaults={'path': ''}, methods=['GET', 'POST', 'PUT', 'DELETE'])
38
  @app.route('/<path:path>', methods=['GET', 'POST', 'PUT', 'DELETE'])
39
  def proxy(path):
40
+ logging.info("="*50)
41
+ logging.info(f"--- New request received ---")
 
42
 
43
+ # 打印最原始的请求信息
44
+ logging.info(f"Request Method: {request.method}")
45
+ logging.info(f"Request Path (from Flask): /<path:path> = /{path}")
46
+ logging.info(f"Request Full Path (raw): {request.full_path}")
47
+ logging.info(f"Request Headers:\n{request.headers}")
48
+
49
+ # --- 1. 从请求路径中构建目标URL ---
50
  target_path = request.full_path
 
 
51
  if target_path.startswith('/'):
52
  target_path = target_path[1:]
53
 
 
54
  if not target_path:
55
+ logging.info("Root path request. Returning info page.")
56
+ return ("<p>GitHub reverse proxy is active. Use proxy_url/target_url format.</p>"), 200
 
57
 
 
58
  if not target_path.startswith(('http://', 'https://')):
59
  target_url = 'https://' + target_path
60
  else:
61
  target_url = target_path
62
+
63
+ logging.info(f"Constructed target URL: {target_url}")
64
+
65
+ # --- 2. 执行安全过滤检查 ---
66
  if not is_url_allowed(target_url):
67
+ error_message = "<h1>403 Forbidden</h1><p>Request blocked by proxy security policy.</p>"
 
 
 
 
68
  return error_message, 403
69
 
70
+ # --- 3. 转发请求 ---
71
  try:
72
  target_host = urlparse(target_url).hostname
73
  if not target_host:
74
+ raise ValueError("Could not parse hostname")
75
  except Exception as e:
76
+ logging.error(f"Failed to parse hostname from URL '{target_url}': {e}")
77
  return f"Invalid target URL in path: {e}", 400
78
 
79
  headers = {key: value for (key, value) in request.headers if key.lower() != 'host'}
80
  headers['Host'] = target_host
81
+ logging.info(f"Forwarding request to {target_url} with headers:\n{headers}")
82
 
83
  try:
84
  resp = requests.request(
 
89
  cookies=request.cookies,
90
  allow_redirects=False,
91
  stream=True,
92
+ timeout=30
93
  )
94
+ logging.info(f"Received response with status code: {resp.status_code}")
95
 
96
  excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection']
97
  response_headers = [(name, value) for (name, value) in resp.raw.headers.items() if name.lower() not in excluded_headers]
98
+ logging.info("Streaming response back to client.")
99
+ logging.info("="*50 + "\n")
100
  return Response(resp.iter_content(chunk_size=8192), status=resp.status_code, headers=response_headers)
101
 
102
  except requests.exceptions.RequestException as e:
103
+ logging.error(f"Error while proxying request to {target_url}: {e}")
104
+ logging.info("="*50 + "\n")
105
  return f"An error occurred while proxying: {e}", 502
106
 
107
 
108
  if __name__ == '__main__':
 
109
  app.run(host='0.0.0.0', port=7860)