Spaces:

megatrump
/

deno

Running

App Files Files Community

megatrump commited on Jul 6

Commit

1dbbd42

verified ·

1 Parent(s): 5d66f59

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -22

app.py CHANGED Viewed

@@ -1,15 +1,15 @@
 import re
 from flask import Flask, request, Response
 import requests
 app = Flask(__name__)
-# --- 白名单过滤规则 (保持不变) ---
-# 这些规则现在将应用到从路径中解析出的完整URL上
 ALLOWED_PATTERNS = [
     re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:releases|archive)/.*$', re.IGNORECASE),
     re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:blob|raw)/.*$', re.IGNORECASE),
-    re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:info|git-).*/.*$', re.IGNORECASE),
     re.compile(r'^https://raw\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
     re.compile(r'^https://gist\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
     re.compile(r'^https://github\.com/[^/]+/[^/]+/tags.*$', re.IGNORECASE),
@@ -20,38 +20,43 @@ ALLOWED_PATTERNS = [
 ]
 def is_url_allowed(url):
-    """检查给定的URL是否匹配白名单中的任何一个模式。"""
     for pattern in ALLOWED_PATTERNS:
         if pattern.match(url):
             return True
     return False
-# --- 核心代理逻辑 ---
-# 我们现在使用一个更通用的路由来捕获所有请求
-@app.route('/', defaults={'path': ''})
-@app.route('/<path:path>')
 def proxy(path):
     """
-    一个通用的反向代理，它将目标URL作为路径的一部分。
-    例如: /https://github.com/user/repo
     """
-    # --- 1. 从请求路径中构建目标URL ---
-    # 使用 request.full_path 来获取完整的路径和查询参数, e.g., /https://github.com/user/repo?service=...
     target_path = request.full_path
-    # 移除开头的斜杠
     if target_path.startswith('/'):
         target_path = target_path[1:]
-    # 如果路径本身不是一个完整的URL，则为其添加 https://
     if not target_path.startswith(('http://', 'https://')):
         target_url = 'https://' + target_path
     else:
         target_url = target_path
-    # --- 2. 执行安全过滤检查 ---
     if not is_url_allowed(target_url):
         error_message = (
             "<h1>403 Forbidden</h1>"
@@ -60,13 +65,13 @@ def proxy(path):
         )
         return error_message, 403
-    # --- 3. 转发请求 ---
-    # 从目标URL中解析出Host头
     try:
-        from urllib.parse import urlparse
         target_host = urlparse(target_url).hostname
-    except Exception:
-        return "Invalid target URL in path", 400
     headers = {key: value for (key, value) in request.headers if key.lower() != 'host'}
     headers['Host'] = target_host
@@ -79,7 +84,8 @@ def proxy(path):
             data=request.get_data(),
             cookies=request.cookies,
             allow_redirects=False,
-            stream=True
         )
         excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection']
@@ -92,4 +98,5 @@ def proxy(path):
 if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=7860)

 import re
 from flask import Flask, request, Response
 import requests
+from urllib.parse import urlparse
 app = Flask(__name__)
+# --- Whitelist filtering rules (Unchanged) ---
 ALLOWED_PATTERNS = [
     re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:releases|archive)/.*$', re.IGNORECASE),
     re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:blob|raw)/.*$', re.IGNORECASE),
+    re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:info|git-).*/.*$', re.IGNORECASE), # This is key for git clone
     re.compile(r'^https://raw\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
     re.compile(r'^https://gist\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
     re.compile(r'^https://github\.com/[^/]+/[^/]+/tags.*$', re.IGNORECASE),
 ]
 def is_url_allowed(url):
+    """Checks if the given URL matches any pattern in the whitelist."""
     for pattern in ALLOWED_PATTERNS:
         if pattern.match(url):
             return True
     return False
+# --- Core Proxy Logic ---
+# A single, consolidated route to capture all requests and methods
+@app.route('/', defaults={'path': ''}, methods=['GET', 'POST', 'PUT', 'DELETE'])
+@app.route('/<path:path>', methods=['GET', 'POST', 'PUT', 'DELETE'])
 def proxy(path):
     """
+    A universal reverse proxy that takes the target URL as part of the path.
     """
+    # --- 1. Construct target URL from the request path ---
+    # request.full_path includes the query string, which is essential for git
     target_path = request.full_path
+    # Remove the leading slash
     if target_path.startswith('/'):
         target_path = target_path[1:]
+    # If the path is empty (root request), return a simple landing page
+    if not target_path:
+        return ("<p>This is a GitHub reverse proxy. Usage:</p>"
+                "<p><code>&lt;proxy_url&gt;/&lt;target_github_url&gt;</code></p>"
+                "<p>Example: <code>/github.com/python/cpython.git</code></p>"), 200
+    # Prepend https:// if no scheme is present
     if not target_path.startswith(('http://', 'https://')):
         target_url = 'https://' + target_path
     else:
         target_url = target_path
+    # --- 2. Perform security filter check ---
     if not is_url_allowed(target_url):
         error_message = (
             "<h1>403 Forbidden</h1>"
         )
         return error_message, 403
+    # --- 3. Forward the request ---
     try:
         target_host = urlparse(target_url).hostname
+        if not target_host:
+            raise ValueError("Could not parse hostname from target URL")
+    except Exception as e:
+        return f"Invalid target URL in path: {e}", 400
     headers = {key: value for (key, value) in request.headers if key.lower() != 'host'}
     headers['Host'] = target_host
             data=request.get_data(),
             cookies=request.cookies,
             allow_redirects=False,
+            stream=True,
+            timeout=30  # Added a timeout for robustness
         )
         excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection']
 if __name__ == '__main__':
+    # For production, use a proper WSGI server like Gunicorn
+    app.run(host='0.0.0.0', port=7860)