megatrump commited on
Commit
200c491
·
verified ·
1 Parent(s): bc3acf5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -18
app.py CHANGED
@@ -2,85 +2,142 @@ import re
2
  import logging
3
  from flask import Flask, request, Response
4
  import requests
5
- from urllib.parse import urlparse
6
 
7
- # --- 设置日志 ---
8
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
9
-
10
  app = Flask(__name__)
11
 
12
- # --- 白名单过滤规则 ---
 
 
13
  ALLOWED_PATTERNS = [
 
14
  re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:releases|archive)/.*$', re.IGNORECASE),
15
  re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:blob|raw)/.*$', re.IGNORECASE),
16
 
17
- # --- 这里是修正后的一行 ---
18
  re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:info|git-).*$', re.IGNORECASE),
19
 
 
20
  re.compile(r'^https://raw\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
21
  re.compile(r'^https://gist\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
 
 
22
  re.compile(r'^https://github\.com/[^/]+/[^/]+/tags.*$', re.IGNORECASE),
23
  re.compile(r'^https://avatars\.githubusercontent\.com/.*$', re.IGNORECASE),
24
  re.compile(r'^https://github\.githubassets\.com/.*$', re.IGNORECASE),
 
 
25
  re.compile(r'^https://github\.com/[^/]+/?$', re.IGNORECASE),
26
  re.compile(r'^https://github\.com/[^/]+/[^/]+/?$', re.IGNORECASE),
27
  ]
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def is_url_allowed(url):
30
- """检查给定的URL是否匹配白名单中的任何一个模式。"""
31
  for pattern in ALLOWED_PATTERNS:
32
  if pattern.match(url):
33
  return True
34
  return False
35
 
36
- # --- 核心代理逻辑 ---
37
  @app.route('/', defaults={'path': ''}, methods=['GET', 'POST', 'PUT', 'DELETE'])
38
  @app.route('/<path:path>', methods=['GET', 'POST', 'PUT', 'DELETE'])
39
  def proxy(path):
40
- target_path = request.full_path
 
 
 
 
 
41
  if target_path.startswith('/'):
42
  target_path = target_path[1:]
43
 
 
44
  if not target_path:
45
- return ("<p>GitHub reverse proxy is active.</p>"), 200
46
 
 
47
  if not target_path.startswith(('http://', 'https://')):
48
  target_url = 'https://' + target_path
49
  else:
50
  target_url = target_path
51
 
 
52
  if not is_url_allowed(target_url):
53
  logging.warning(f"URL Denied! No pattern matched: {target_url}")
54
- error_message = "<h1>403 Forbidden</h1><p>Request blocked by proxy security policy.</p>"
55
- return error_message, 403
56
 
57
  try:
58
  target_host = urlparse(target_url).hostname
59
  if not target_host:
60
- raise ValueError("Could not parse hostname")
61
  except Exception as e:
 
62
  return f"Invalid target URL in path: {e}", 400
63
 
 
64
  headers = {key: value for (key, value) in request.headers if key.lower() != 'host'}
65
  headers['Host'] = target_host
66
 
67
  try:
 
68
  resp = requests.request(
69
  method=request.method,
70
  url=target_url,
71
  headers=headers,
72
  data=request.get_data(),
73
  cookies=request.cookies,
74
- allow_redirects=False,
75
  stream=True,
76
- timeout=30
77
  )
 
 
78
  excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection']
79
- response_headers = [(name, value) for (name, value) in resp.raw.headers.items() if name.lower() not in excluded_headers]
80
- return Response(resp.iter_content(chunk_size=8192), status=resp.status_code, headers=response_headers)
 
 
 
 
81
  except requests.exceptions.RequestException as e:
82
- return f"An error occurred while proxying: {e}", 502
 
83
 
84
  if __name__ == '__main__':
85
  app.run(host='0.0.0.0', port=7860)
86
-
 
2
  import logging
3
  from flask import Flask, request, Response
4
  import requests
5
+ from urllib.parse import urlparse, unquote
6
 
7
+ # --- Basic Configuration ---
8
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
9
  app = Flask(__name__)
10
 
11
+ # --- Whitelisted URL Patterns for GitHub ---
12
+ # This list defines all URL patterns that are permitted to be proxied.
13
+ # It's a security measure to prevent the proxy from being used to access unintended domains.
14
  ALLOWED_PATTERNS = [
15
+ # Repositories: releases, archives, blobs, raw content
16
  re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:releases|archive)/.*$', re.IGNORECASE),
17
  re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:blob|raw)/.*$', re.IGNORECASE),
18
 
19
+ # Git operations (clone, pull, push)
20
  re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:info|git-).*$', re.IGNORECASE),
21
 
22
+ # Raw content from various GitHub domains
23
  re.compile(r'^https://raw\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
24
  re.compile(r'^https://gist\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
25
+
26
+ # Repository tags and assets
27
  re.compile(r'^https://github\.com/[^/]+/[^/]+/tags.*$', re.IGNORECASE),
28
  re.compile(r'^https://avatars\.githubusercontent\.com/.*$', re.IGNORECASE),
29
  re.compile(r'^https://github\.githubassets\.com/.*$', re.IGNORECASE),
30
+
31
+ # Main repository/user pages
32
  re.compile(r'^https://github\.com/[^/]+/?$', re.IGNORECASE),
33
  re.compile(r'^https://github\.com/[^/]+/[^/]+/?$', re.IGNORECASE),
34
  ]
35
 
36
+ # --- Custom Index Page ---
37
+ INDEX_PAGE_HTML = """
38
+ <!DOCTYPE html>
39
+ <html lang="en">
40
+ <head>
41
+ <meta charset="UTF-8">
42
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
43
+ <title>GitHub Proxy</title>
44
+ <style>
45
+ body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; line-height: 1.6; color: #333; max-width: 800px; margin: 40px auto; padding: 20px; }
46
+ .container { border: 1px solid #ddd; border-radius: 8px; padding: 20px 40px; background-color: #f9f9f9; }
47
+ .warning { color: #856404; background-color: #fff3cd; border: 1px solid #ffeeba; padding: 15px; border-radius: 4px; margin-bottom: 20px; }
48
+ h1, h2 { border-bottom: 1px solid #eaecef; padding-bottom: 0.3em; }
49
+ code { background-color: #eef; padding: 2px 4px; border-radius: 3px; }
50
+ </style>
51
+ </head>
52
+ <body>
53
+ <div class="container">
54
+ <h1>GitHub Reverse Proxy</h1>
55
+ <div class="warning">
56
+ <strong>Warning:</strong> You are accessing GitHub content through a reverse proxy.
57
+ All content served from this page is provided directly by GitHub.
58
+ </div>
59
+ <h2>How to Use</h2>
60
+ <p>To access GitHub content, simply append the GitHub URL to this proxy's address.</p>
61
+ <p>For example, to clone a repository:</p>
62
+ <code>git clone {YOUR_PROXY_URL}/https://github.com/owner/repo.git</code>
63
+ <p>Or to view a repository page:</p>
64
+ <code>{YOUR_PROXY_URL}/https://github.com/owner/repo</code>
65
+ </div>
66
+ </body>
67
+ </html>
68
+ """
69
+
70
  def is_url_allowed(url):
71
+ """Check if the given URL matches any pattern in the whitelist."""
72
  for pattern in ALLOWED_PATTERNS:
73
  if pattern.match(url):
74
  return True
75
  return False
76
 
77
+ # --- Core Proxy Logic ---
78
  @app.route('/', defaults={'path': ''}, methods=['GET', 'POST', 'PUT', 'DELETE'])
79
  @app.route('/<path:path>', methods=['GET', 'POST', 'PUT', 'DELETE'])
80
  def proxy(path):
81
+ """
82
+ Proxies requests to GitHub after validating them against a whitelist.
83
+ It streams the response back to the client.
84
+ """
85
+ # The full path might be URL-encoded, so we decode it.
86
+ target_path = unquote(request.full_path)
87
  if target_path.startswith('/'):
88
  target_path = target_path[1:]
89
 
90
+ # For the root path, display the custom warning page.
91
  if not target_path:
92
+ return INDEX_PAGE_HTML, 200
93
 
94
+ # Prepend 'https://' if the scheme is missing.
95
  if not target_path.startswith(('http://', 'https://')):
96
  target_url = 'https://' + target_path
97
  else:
98
  target_url = target_path
99
 
100
+ # Security check: Ensure the URL is in the whitelist.
101
  if not is_url_allowed(target_url):
102
  logging.warning(f"URL Denied! No pattern matched: {target_url}")
103
+ return "<h1>403 Forbidden</h1><p>Request blocked by proxy security policy.</p>", 403
 
104
 
105
  try:
106
  target_host = urlparse(target_url).hostname
107
  if not target_host:
108
+ raise ValueError("Hostname could not be parsed from the target URL.")
109
  except Exception as e:
110
+ logging.error(f"Invalid target URL provided: {target_url} | Error: {e}")
111
  return f"Invalid target URL in path: {e}", 400
112
 
113
+ # Forward headers, but set the 'Host' header to the target's hostname.
114
  headers = {key: value for (key, value) in request.headers if key.lower() != 'host'}
115
  headers['Host'] = target_host
116
 
117
  try:
118
+ # Stream the request to handle large files efficiently.
119
  resp = requests.request(
120
  method=request.method,
121
  url=target_url,
122
  headers=headers,
123
  data=request.get_data(),
124
  cookies=request.cookies,
125
+ allow_redirects=False, # Redirects are handled by the client.
126
  stream=True,
127
+ timeout=30 # Timeout for the connection.
128
  )
129
+
130
+ # Exclude headers that can interfere with streaming.
131
  excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection']
132
+ response_headers = [(name, value) for (name, value) in resp.raw.headers.items()
133
+ if name.lower() not in excluded_headers]
134
+
135
+ # Stream the response back to the original client.
136
+ return Response(resp.iter_content(chunk_size=8192), resp.status_code, response_headers)
137
+
138
  except requests.exceptions.RequestException as e:
139
+ logging.error(f"Error while proxying to {target_url}: {e}")
140
+ return "An error occurred while proxying the request.", 502
141
 
142
  if __name__ == '__main__':
143
  app.run(host='0.0.0.0', port=7860)