File size: 8,419 Bytes
5d66f59 8c984b6 c4edceb f8390d3 200c491 f8390d3 200c491 915b179 f8390d3 c4edceb 8c984b6 200c491 c4edceb 5d66f59 200c491 5d66f59 bc3acf5 200c491 bc3acf5 200c491 5d66f59 200c491 5d66f59 200c491 5d66f59 f8390d3 c4edceb 200c491 8c984b6 200c491 c4edceb 200c491 8c984b6 c4edceb 200c491 c4edceb 200c491 c4edceb 200c491 c4edceb 200c491 c4edceb 42175a7 c4edceb 200c491 5d66f59 200c491 bc3acf5 5d66f59 c4edceb 1dbbd42 f8390d3 200c491 c4edceb 200c491 c4edceb 200c491 1dbbd42 c4edceb 8c984b6 c4edceb 200c491 c4edceb 5d66f59 c4edceb 915b179 200c491 5d66f59 c4edceb 200c491 5d66f59 1dbbd42 200c491 1dbbd42 200c491 1dbbd42 5d66f59 200c491 5d66f59 f8390d3 c4edceb f8390d3 5d66f59 f8390d3 c4edceb 1dbbd42 c4edceb f8390d3 c4edceb f8390d3 200c491 c4edceb 200c491 f8390d3 200c491 5d66f59 f8390d3 c4edceb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
import re
import os
import logging
from flask import Flask, request, Response
import requests
from urllib.parse import urlparse, unquote
# --- Basic Configuration ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
app = Flask(__name__)
# --- Load Secret Key from Environment ---
PROXY_SECRET_KEY = os.environ.get('PROXY_SECRET_KEY')
if not PROXY_SECRET_KEY:
logging.error("PROXY_SECRET_KEY environment variable is not set!")
exit(1)
logging.info(f"Proxy service initialized with secret key authentication")
# --- Whitelisted URL Patterns for GitHub ---
# This list defines all URL patterns that are permitted to be proxied.
# It's a security measure to prevent the proxy from being used to access unintended domains.
ALLOWED_PATTERNS = [
# Repositories: releases, archives, blobs, raw content
re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:releases|archive)/.*$', re.IGNORECASE),
re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:blob|raw)/.*$', re.IGNORECASE),
# Git operations (clone, pull, push)
re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:info|git-).*$', re.IGNORECASE),
# Raw content from various GitHub domains
re.compile(r'^https://raw\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
re.compile(r'^https://gist\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
# Repository tags and assets
re.compile(r'^https://github\.com/[^/]+/[^/]+/tags.*$', re.IGNORECASE),
re.compile(r'^https://avatars\.githubusercontent\.com/.*$', re.IGNORECASE),
re.compile(r'^https://github\.githubassets\.com/.*$', re.IGNORECASE),
# Main repository/user pages
re.compile(r'^https://github\.com/[^/]+/?$', re.IGNORECASE),
re.compile(r'^https://github\.com/[^/]+/[^/]+/?$', re.IGNORECASE),
]
# --- Custom Index Page ---
INDEX_PAGE_HTML = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Private GitHub Proxy</title>
<style>
body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; line-height: 1.6; color: #333; max-width: 800px; margin: 40px auto; padding: 20px; }
.container { border: 1px solid #ddd; border-radius: 8px; padding: 20px 40px; background-color: #f9f9f9; }
.warning { color: #856404; background-color: #fff3cd; border: 1px solid #ffeeba; padding: 15px; border-radius: 4px; margin-bottom: 20px; }
.security { color: #721c24; background-color: #f8d7da; border: 1px solid #f5c6cb; padding: 15px; border-radius: 4px; margin-bottom: 20px; }
h1, h2 { border-bottom: 1px solid #eaecef; padding-bottom: 0.3em; }
code { background-color: #eef; padding: 2px 4px; border-radius: 3px; }
</style>
</head>
<body>
<div class="container">
<h1>Private GitHub Reverse Proxy</h1>
<div class="security">
<strong>Security Notice:</strong> This is a private proxy service that requires authentication.
</div>
<div class="warning">
<strong>Warning:</strong> You are accessing GitHub content through a reverse proxy.
All content served from this page is provided directly by GitHub.
</div>
<h2>How to Use</h2>
<p>To access GitHub content, you need to include the secret key in the URL path:</p>
<p>For example, to clone a repository:</p>
<code>git clone {YOUR_PROXY_URL}/<SECRET_KEY>/https://github.com/owner/repo.git</code>
<p>Or to view a repository page:</p>
<code>{YOUR_PROXY_URL}/<SECRET_KEY>/https://github.com/owner/repo</code>
<p>Example:</p>
<code>https://megatrump-deno.hf.space/<SECRER_KEY>/https://github.com/python/cpython.git</code>
<h2>Authentication Required</h2>
<p>This proxy requires a valid secret key to access. Contact the administrator for access credentials.</p>
</div>
</body>
</html>
"""
def is_url_allowed(url):
"""Check if the given URL matches any pattern in the whitelist."""
for pattern in ALLOWED_PATTERNS:
if pattern.match(url):
return True
return False
def extract_secret_and_url(path):
"""Extract secret key and target URL from the path."""
# Remove leading slash if present
if path.startswith('/'):
path = path[1:]
# Split the path to extract secret key and URL
parts = path.split('/', 1)
if len(parts) < 2:
return None, None
secret_key = parts[0]
target_url = parts[1]
return secret_key, target_url
# --- Core Proxy Logic ---
@app.route('/', defaults={'path': ''}, methods=['GET', 'POST', 'PUT', 'DELETE'])
@app.route('/<path:path>', methods=['GET', 'POST', 'PUT', 'DELETE'])
def proxy(path):
"""
Proxies requests to GitHub after validating secret key and URL whitelist.
It streams the response back to the client.
"""
# The full path might be URL-encoded, so we decode it.
target_path = unquote(request.full_path)
if target_path.startswith('/'):
target_path = target_path[1:]
# For the root path, display the custom warning page.
# Handle both '' (for a request to '/') and '?' (for a request to '/?').
if not target_path or target_path == '?':
return INDEX_PAGE_HTML, 200
# Extract secret key and target URL
provided_secret, target_url_path = extract_secret_and_url(target_path)
# Validate secret key
if not provided_secret or provided_secret != PROXY_SECRET_KEY:
logging.warning(f"Access denied: Invalid or missing secret key from IP: {request.remote_addr}")
return "<h1>403 Forbidden</h1><p>Access denied: Invalid or missing authentication key.</p>", 403
if not target_url_path:
logging.warning(f"Access denied: No target URL provided from IP: {request.remote_addr}")
return "<h1>400 Bad Request</h1><p>No target URL provided.</p>", 400
# Prepend 'https://' if the scheme is missing.
if not target_url_path.startswith(('http://', 'https://')):
target_url = 'https://' + target_url_path
else:
target_url = target_url_path
# Security check: Ensure the URL is in the whitelist.
if not is_url_allowed(target_url):
logging.warning(f"URL Denied! No pattern matched: {target_url} from IP: {request.remote_addr}")
return "<h1>403 Forbidden</h1><p>Request blocked by proxy security policy.</p>", 403
try:
target_host = urlparse(target_url).hostname
if not target_host:
raise ValueError("Hostname could not be parsed from the target URL.")
except Exception as e:
logging.error(f"Invalid target URL provided: {target_url} | Error: {e}")
return f"Invalid target URL in path: {e}", 400
# Forward headers, but set the 'Host' header to the target's hostname.
headers = {key: value for (key, value) in request.headers if key.lower() != 'host'}
headers['Host'] = target_host
try:
# Log successful proxy request
logging.info(f"Proxying request to: {target_url} from IP: {request.remote_addr}")
# Stream the request to handle large files efficiently.
resp = requests.request(
method=request.method,
url=target_url,
headers=headers,
data=request.get_data(),
cookies=request.cookies,
allow_redirects=False, # Redirects are handled by the client.
stream=True,
timeout=30 # Timeout for the connection.
)
# Exclude headers that can interfere with streaming.
excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection']
response_headers = [(name, value) for (name, value) in resp.raw.headers.items()
if name.lower() not in excluded_headers]
# Stream the response back to the original client.
return Response(resp.iter_content(chunk_size=8192), resp.status_code, response_headers)
except requests.exceptions.RequestException as e:
logging.error(f"Error while proxying to {target_url}: {e}")
return "An error occurred while proxying the request.", 502
if __name__ == '__main__':
app.run(host='0.0.0.0', port=7860)
|