import re import os import logging from flask import Flask, request, Response import requests from urllib.parse import urlparse, unquote # --- Basic Configuration --- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') app = Flask(__name__) # --- Load Secret Key from Environment --- PROXY_SECRET_KEY = os.environ.get('PROXY_SECRET_KEY') if not PROXY_SECRET_KEY: logging.error("PROXY_SECRET_KEY environment variable is not set!") exit(1) logging.info(f"Proxy service initialized with secret key authentication") # --- Whitelisted URL Patterns for GitHub --- # This list defines all URL patterns that are permitted to be proxied. # It's a security measure to prevent the proxy from being used to access unintended domains. ALLOWED_PATTERNS = [ # Repositories: releases, archives, blobs, raw content re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:releases|archive)/.*$', re.IGNORECASE), re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:blob|raw)/.*$', re.IGNORECASE), # Git operations (clone, pull, push) re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:info|git-).*$', re.IGNORECASE), # Raw content from various GitHub domains re.compile(r'^https://raw\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE), re.compile(r'^https://gist\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE), # Repository tags and assets re.compile(r'^https://github\.com/[^/]+/[^/]+/tags.*$', re.IGNORECASE), re.compile(r'^https://avatars\.githubusercontent\.com/.*$', re.IGNORECASE), re.compile(r'^https://github\.githubassets\.com/.*$', re.IGNORECASE), # Main repository/user pages re.compile(r'^https://github\.com/[^/]+/?$', re.IGNORECASE), re.compile(r'^https://github\.com/[^/]+/[^/]+/?$', re.IGNORECASE), ] # --- Custom Index Page --- INDEX_PAGE_HTML = """
To access GitHub content, you need to include the secret key in the URL path:
For example, to clone a repository:
git clone {YOUR_PROXY_URL}/<SECRET_KEY>/https://github.com/owner/repo.git
Or to view a repository page:
{YOUR_PROXY_URL}/<SECRET_KEY>/https://github.com/owner/repo
Example:
https://megatrump-deno.hf.space/<SECRER_KEY>/https://github.com/python/cpython.git
This proxy requires a valid secret key to access. Contact the administrator for access credentials.
Access denied: Invalid or missing authentication key.
", 403 if not target_url_path: logging.warning(f"Access denied: No target URL provided from IP: {request.remote_addr}") return "No target URL provided.
", 400 # Prepend 'https://' if the scheme is missing. if not target_url_path.startswith(('http://', 'https://')): target_url = 'https://' + target_url_path else: target_url = target_url_path # Security check: Ensure the URL is in the whitelist. if not is_url_allowed(target_url): logging.warning(f"URL Denied! No pattern matched: {target_url} from IP: {request.remote_addr}") return "Request blocked by proxy security policy.
", 403 try: target_host = urlparse(target_url).hostname if not target_host: raise ValueError("Hostname could not be parsed from the target URL.") except Exception as e: logging.error(f"Invalid target URL provided: {target_url} | Error: {e}") return f"Invalid target URL in path: {e}", 400 # Forward headers, but set the 'Host' header to the target's hostname. headers = {key: value for (key, value) in request.headers if key.lower() != 'host'} headers['Host'] = target_host try: # Log successful proxy request logging.info(f"Proxying request to: {target_url} from IP: {request.remote_addr}") # Stream the request to handle large files efficiently. resp = requests.request( method=request.method, url=target_url, headers=headers, data=request.get_data(), cookies=request.cookies, allow_redirects=False, # Redirects are handled by the client. stream=True, timeout=30 # Timeout for the connection. ) # Exclude headers that can interfere with streaming. excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection'] response_headers = [(name, value) for (name, value) in resp.raw.headers.items() if name.lower() not in excluded_headers] # Stream the response back to the original client. return Response(resp.iter_content(chunk_size=8192), resp.status_code, response_headers) except requests.exceptions.RequestException as e: logging.error(f"Error while proxying to {target_url}: {e}") return "An error occurred while proxying the request.", 502 if __name__ == '__main__': app.run(host='0.0.0.0', port=7860)