import re import logging from flask import Flask, request, Response import requests from urllib.parse import urlparse, unquote # --- Basic Configuration --- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') app = Flask(__name__) # --- Whitelisted URL Patterns for GitHub --- # This list defines all URL patterns that are permitted to be proxied. # It's a security measure to prevent the proxy from being used to access unintended domains. ALLOWED_PATTERNS = [ # Repositories: releases, archives, blobs, raw content re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:releases|archive)/.*$', re.IGNORECASE), re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:blob|raw)/.*$', re.IGNORECASE), # Git operations (clone, pull, push) re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:info|git-).*$', re.IGNORECASE), # Raw content from various GitHub domains re.compile(r'^https://raw\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE), re.compile(r'^https://gist\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE), # Repository tags and assets re.compile(r'^https://github\.com/[^/]+/[^/]+/tags.*$', re.IGNORECASE), re.compile(r'^https://avatars\.githubusercontent\.com/.*$', re.IGNORECASE), re.compile(r'^https://github\.githubassets\.com/.*$', re.IGNORECASE), # Main repository/user pages re.compile(r'^https://github\.com/[^/]+/?$', re.IGNORECASE), re.compile(r'^https://github\.com/[^/]+/[^/]+/?$', re.IGNORECASE), ] # --- Custom Index Page --- INDEX_PAGE_HTML = """
To access GitHub content, simply append the GitHub URL to this proxy's address.
For example, to clone a repository:
git clone {YOUR_PROXY_URL}/https://github.com/owner/repo.git
Or to view a repository page:
{YOUR_PROXY_URL}/https://github.com/owner/repo
Request blocked by proxy security policy.
", 403 try: target_host = urlparse(target_url).hostname if not target_host: raise ValueError("Hostname could not be parsed from the target URL.") except Exception as e: logging.error(f"Invalid target URL provided: {target_url} | Error: {e}") return f"Invalid target URL in path: {e}", 400 # Forward headers, but set the 'Host' header to the target's hostname. headers = {key: value for (key, value) in request.headers if key.lower() != 'host'} headers['Host'] = target_host try: # Stream the request to handle large files efficiently. resp = requests.request( method=request.method, url=target_url, headers=headers, data=request.get_data(), cookies=request.cookies, allow_redirects=False, # Redirects are handled by the client. stream=True, timeout=30 # Timeout for the connection. ) # Exclude headers that can interfere with streaming. excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection'] response_headers = [(name, value) for (name, value) in resp.raw.headers.items() if name.lower() not in excluded_headers] # Stream the response back to the original client. return Response(resp.iter_content(chunk_size=8192), resp.status_code, response_headers) except requests.exceptions.RequestException as e: logging.error(f"Error while proxying to {target_url}: {e}") return "An error occurred while proxying the request.", 502 if __name__ == '__main__': app.run(host='0.0.0.0', port=7860)