Arkm20 commited on
Commit
a321cf9
·
verified ·
1 Parent(s): 495dc0c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -0
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, Response
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import os
5
+
6
+ app = Flask(__name__)
7
+
8
+ def create_proxy_url(original_url, base_proxy_url):
9
+ """Creates a full proxy URL for a given original URL."""
10
+ # Ensure the URL is absolute
11
+ if original_url.startswith('//'):
12
+ original_url = 'https://' + original_url[2:]
13
+ return f"{base_proxy_url}?url={requests.utils.quote(original_url)}"
14
+
15
+ @app.route('/')
16
+ def proxy():
17
+ target_url = request.args.get('url')
18
+ if not target_url:
19
+ # Simple landing page for the proxy itself
20
+ return """
21
+ <!DOCTYPE html>
22
+ <html lang="en">
23
+ <head>
24
+ <meta charset="UTF-8">
25
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
26
+ <title>Proxy Service</title>
27
+ <style>
28
+ body { font-family: sans-serif; background-color: #121212; color: #e0e0e0; display: flex; justify-content: center; align-items: center; height: 100vh; margin: 0; }
29
+ .container { text-align: center; padding: 2rem; background-color: #1e1e1e; border-radius: 8px; }
30
+ code { background-color: #2d2d2d; padding: 0.2rem 0.4rem; border-radius: 4px; }
31
+ </style>
32
+ </head>
33
+ <body>
34
+ <div class="container">
35
+ <h1>Proxy Service is Active</h1>
36
+ <p>Use this service by appending <code>?url=<website_url></code> to the URL.</p>
37
+ </div>
38
+ </body>
39
+ </html>
40
+ """, 200
41
+
42
+ try:
43
+ # Use a session to handle cookies properly
44
+ session = requests.Session()
45
+ session.headers.update({
46
+ 'User-Agent': request.headers.get('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36')
47
+ })
48
+
49
+ # Make the request to the target URL
50
+ resp = session.get(target_url, stream=True)
51
+
52
+ # Get the content type to check if it's HTML
53
+ content_type = resp.headers.get('Content-Type', '').lower()
54
+
55
+ # These headers should be removed to allow embedding
56
+ excluded_headers = ['content-security-policy', 'x-frame-options', 'content-encoding']
57
+ headers = [(name, value) for (name, value) in resp.raw.headers.items() if name.lower() not in excluded_headers]
58
+
59
+ if 'text/html' in content_type:
60
+ # Get the base URL of the Hugging Face Space for rewriting links
61
+ # The SPACE_HOST variable is automatically provided by Hugging Face
62
+ proxy_base_url = f"https://{os.environ.get('SPACE_HOST')}/"
63
+
64
+ soup = BeautifulSoup(resp.content, 'html.parser')
65
+ base_tag = soup.new_tag('base', href=target_url)
66
+ soup.head.insert(0, base_tag)
67
+
68
+ # Rewrite all links and resource URLs to go through the proxy
69
+ for tag, attr in [('a', 'href'), ('link', 'href'), ('script', 'src'), ('img', 'src')]:
70
+ for t in soup.find_all(tag, **{attr: True}):
71
+ original_url = t[attr]
72
+ # Make relative URLs absolute before proxying
73
+ absolute_url = requests.compat.urljoin(target_url, original_url)
74
+ t[attr] = create_proxy_url(absolute_url, proxy_base_url)
75
+
76
+ content = str(soup)
77
+ return Response(content, resp.status_code, headers)
78
+ else:
79
+ # For non-HTML content (images, CSS, JS), stream it directly
80
+ return Response(resp.iter_content(chunk_size=1024), resp.status_code, headers)
81
+
82
+ except requests.exceptions.RequestException as e:
83
+ return f"Error fetching URL: {e}", 500
84
+
85
+ if __name__ == '__main__':
86
+ app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))