File size: 8,419 Bytes
5d66f59
8c984b6
c4edceb
f8390d3
 
200c491
f8390d3
200c491
915b179
f8390d3
 
c4edceb
 
 
 
 
 
 
8c984b6
200c491
c4edceb
 
5d66f59
200c491
5d66f59
 
bc3acf5
200c491
bc3acf5
 
200c491
5d66f59
 
200c491
 
5d66f59
 
 
200c491
 
5d66f59
 
 
f8390d3
c4edceb
 
200c491
 
 
 
 
8c984b6
200c491
c4edceb
 
 
 
 
 
200c491
 
 
 
8c984b6
c4edceb
 
 
200c491
c4edceb
 
200c491
 
c4edceb
200c491
c4edceb
200c491
c4edceb
42175a7
 
c4edceb
 
200c491
 
 
 
 
5d66f59
200c491
bc3acf5
5d66f59
 
 
 
c4edceb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1dbbd42
 
f8390d3
200c491
c4edceb
 
200c491
c4edceb
 
 
 
 
 
 
 
200c491
1dbbd42
c4edceb
 
 
 
 
 
 
8c984b6
c4edceb
 
 
 
200c491
c4edceb
 
5d66f59
c4edceb
915b179
200c491
5d66f59
c4edceb
200c491
5d66f59
 
 
1dbbd42
200c491
1dbbd42
200c491
1dbbd42
5d66f59
200c491
5d66f59
 
 
f8390d3
c4edceb
 
 
 
f8390d3
 
5d66f59
f8390d3
 
 
c4edceb
1dbbd42
c4edceb
f8390d3
c4edceb
 
f8390d3
200c491
 
c4edceb
 
200c491
 
f8390d3
200c491
 
5d66f59
f8390d3
c4edceb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import re
import os
import logging
from flask import Flask, request, Response
import requests
from urllib.parse import urlparse, unquote

# --- Basic Configuration ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
app = Flask(__name__)

# --- Load Secret Key from Environment ---
PROXY_SECRET_KEY = os.environ.get('PROXY_SECRET_KEY')
if not PROXY_SECRET_KEY:
    logging.error("PROXY_SECRET_KEY environment variable is not set!")
    exit(1)

logging.info(f"Proxy service initialized with secret key authentication")

# --- Whitelisted URL Patterns for GitHub ---
# This list defines all URL patterns that are permitted to be proxied.
# It's a security measure to prevent the proxy from being used to access unintended domains.
ALLOWED_PATTERNS = [
    # Repositories: releases, archives, blobs, raw content
    re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:releases|archive)/.*$', re.IGNORECASE),
    re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:blob|raw)/.*$', re.IGNORECASE),
    
    # Git operations (clone, pull, push)
    re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:info|git-).*$', re.IGNORECASE),
    
    # Raw content from various GitHub domains
    re.compile(r'^https://raw\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
    re.compile(r'^https://gist\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
    
    # Repository tags and assets
    re.compile(r'^https://github\.com/[^/]+/[^/]+/tags.*$', re.IGNORECASE),
    re.compile(r'^https://avatars\.githubusercontent\.com/.*$', re.IGNORECASE),
    re.compile(r'^https://github\.githubassets\.com/.*$', re.IGNORECASE),

    # Main repository/user pages
    re.compile(r'^https://github\.com/[^/]+/?$', re.IGNORECASE),
    re.compile(r'^https://github\.com/[^/]+/[^/]+/?$', re.IGNORECASE),
]

# --- Custom Index Page ---
INDEX_PAGE_HTML = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Private GitHub Proxy</title>
    <style>
        body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; line-height: 1.6; color: #333; max-width: 800px; margin: 40px auto; padding: 20px; }
        .container { border: 1px solid #ddd; border-radius: 8px; padding: 20px 40px; background-color: #f9f9f9; }
        .warning { color: #856404; background-color: #fff3cd; border: 1px solid #ffeeba; padding: 15px; border-radius: 4px; margin-bottom: 20px; }
        .security { color: #721c24; background-color: #f8d7da; border: 1px solid #f5c6cb; padding: 15px; border-radius: 4px; margin-bottom: 20px; }
        h1, h2 { border-bottom: 1px solid #eaecef; padding-bottom: 0.3em; }
        code { background-color: #eef; padding: 2px 4px; border-radius: 3px; }
    </style>
</head>
<body>
    <div class="container">
        <h1>Private GitHub Reverse Proxy</h1>
        <div class="security">
            <strong>Security Notice:</strong> This is a private proxy service that requires authentication.
        </div>
        <div class="warning">
            <strong>Warning:</strong> You are accessing GitHub content through a reverse proxy. 
            All content served from this page is provided directly by GitHub.
        </div>
        <h2>How to Use</h2>
        <p>To access GitHub content, you need to include the secret key in the URL path:</p>
        <p>For example, to clone a repository:</p>
        <code>git clone {YOUR_PROXY_URL}/&lt;SECRET_KEY&gt;/https://github.com/owner/repo.git</code>
        <p>Or to view a repository page:</p>
        <code>{YOUR_PROXY_URL}/&lt;SECRET_KEY&gt;/https://github.com/owner/repo</code>
        <p>Example:</p>
        <code>https://megatrump-deno.hf.space/&lt;SECRER_KEY&gt;/https://github.com/python/cpython.git</code>
        <h2>Authentication Required</h2>
        <p>This proxy requires a valid secret key to access. Contact the administrator for access credentials.</p>
    </div>
</body>
</html>
"""

def is_url_allowed(url):
    """Check if the given URL matches any pattern in the whitelist."""
    for pattern in ALLOWED_PATTERNS:
        if pattern.match(url):
            return True
    return False

def extract_secret_and_url(path):
    """Extract secret key and target URL from the path."""
    # Remove leading slash if present
    if path.startswith('/'):
        path = path[1:]
    
    # Split the path to extract secret key and URL
    parts = path.split('/', 1)
    if len(parts) < 2:
        return None, None
    
    secret_key = parts[0]
    target_url = parts[1]
    
    return secret_key, target_url

# --- Core Proxy Logic ---
@app.route('/', defaults={'path': ''}, methods=['GET', 'POST', 'PUT', 'DELETE'])
@app.route('/<path:path>', methods=['GET', 'POST', 'PUT', 'DELETE'])
def proxy(path):
    """
    Proxies requests to GitHub after validating secret key and URL whitelist.
    It streams the response back to the client.
    """
    # The full path might be URL-encoded, so we decode it.
    target_path = unquote(request.full_path)
    if target_path.startswith('/'):
        target_path = target_path[1:]

    # For the root path, display the custom warning page.
    # Handle both '' (for a request to '/') and '?' (for a request to '/?').
    if not target_path or target_path == '?':
        return INDEX_PAGE_HTML, 200

    # Extract secret key and target URL
    provided_secret, target_url_path = extract_secret_and_url(target_path)
    
    # Validate secret key
    if not provided_secret or provided_secret != PROXY_SECRET_KEY:
        logging.warning(f"Access denied: Invalid or missing secret key from IP: {request.remote_addr}")
        return "<h1>403 Forbidden</h1><p>Access denied: Invalid or missing authentication key.</p>", 403
    
    if not target_url_path:
        logging.warning(f"Access denied: No target URL provided from IP: {request.remote_addr}")
        return "<h1>400 Bad Request</h1><p>No target URL provided.</p>", 400

    # Prepend 'https://' if the scheme is missing.
    if not target_url_path.startswith(('http://', 'https://')):
        target_url = 'https://' + target_url_path
    else:
        target_url = target_url_path
    
    # Security check: Ensure the URL is in the whitelist.
    if not is_url_allowed(target_url):
        logging.warning(f"URL Denied! No pattern matched: {target_url} from IP: {request.remote_addr}")
        return "<h1>403 Forbidden</h1><p>Request blocked by proxy security policy.</p>", 403

    try:
        target_host = urlparse(target_url).hostname
        if not target_host:
            raise ValueError("Hostname could not be parsed from the target URL.")
    except Exception as e:
        logging.error(f"Invalid target URL provided: {target_url} | Error: {e}")
        return f"Invalid target URL in path: {e}", 400

    # Forward headers, but set the 'Host' header to the target's hostname.
    headers = {key: value for (key, value) in request.headers if key.lower() != 'host'}
    headers['Host'] = target_host

    try:
        # Log successful proxy request
        logging.info(f"Proxying request to: {target_url} from IP: {request.remote_addr}")
        
        # Stream the request to handle large files efficiently.
        resp = requests.request(
            method=request.method,
            url=target_url,
            headers=headers,
            data=request.get_data(),
            cookies=request.cookies,
            allow_redirects=False, # Redirects are handled by the client.
            stream=True,
            timeout=30  # Timeout for the connection.
        )

        # Exclude headers that can interfere with streaming.
        excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection']
        response_headers = [(name, value) for (name, value) in resp.raw.headers.items() 
                            if name.lower() not in excluded_headers]

        # Stream the response back to the original client.
        return Response(resp.iter_content(chunk_size=8192), resp.status_code, response_headers)
    
    except requests.exceptions.RequestException as e:
        logging.error(f"Error while proxying to {target_url}: {e}")
        return "An error occurred while proxying the request.", 502

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=7860)