devendergarg14 commited on
Commit
0a0bd8b
·
verified ·
1 Parent(s): 1a666ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -78
app.py CHANGED
@@ -11,33 +11,34 @@ import socket
11
  app = Flask(__name__, static_folder='.', static_url_path='')
12
 
13
  # --- Configuration ---
14
- DATA_FILE = "/tmp/data.json" # MODIFIED LINE: Use the /tmp directory for writing
15
- PING_INTERVAL_SECONDS = 60 # Backend pings every 60 seconds
16
- HISTORY_DURATION_SECONDS = 60 * 60 # Store history for 1 hour
17
 
18
  # --- Data Store ---
19
  # Structure: { "id": "uuid", "url": "string", "status": "pending/ok/error/checking",
20
  # "ip": "string", "responseTime": float_ms, "lastChecked": "iso_string_utc",
21
  # "history": [{"timestamp": float_unix_ts_seconds, "status": "ok/error"}],
 
22
  # "_thread": threading.Thread_object, "_stop_event": threading.Event_object }
23
- monitored_urls_store = {} # In-memory store: id -> url_data
24
- lock = threading.Lock() # To protect access to monitored_urls_store
25
 
26
  # --- Helper Functions ---
27
  def save_data_to_json():
28
- # This function must be called with 'lock' acquired
29
- serializable_data = {}
30
- for url_id, data in monitored_urls_store.items():
31
- s_data = data.copy()
32
- s_data.pop("_thread", None)
33
- s_data.pop("_stop_event", None)
34
- serializable_data[url_id] = s_data
35
- try:
36
- with open(DATA_FILE, 'w') as f:
37
- json.dump(serializable_data, f, indent=2)
38
- print(f"Data saved to {DATA_FILE}")
39
- except IOError as e:
40
- print(f"Error saving data to {DATA_FILE}: {e}") # This error will now show the /tmp path
41
 
42
  def load_data_from_json():
43
  global monitored_urls_store
@@ -49,7 +50,6 @@ def load_data_from_json():
49
 
50
  temp_store = {}
51
  for url_id_key, data_item in loaded_json_data.items():
52
- # Ensure essential fields and use 'id' from data if present, else key
53
  data_item.setdefault('id', url_id_key)
54
  current_id = data_item['id']
55
  data_item.setdefault('status', 'pending')
@@ -57,9 +57,10 @@ def load_data_from_json():
57
  data_item.setdefault('responseTime', None)
58
  data_item.setdefault('lastChecked', None)
59
  data_item.setdefault('history', data_item.get('history', []))
 
60
  temp_store[current_id] = data_item
61
 
62
- with lock: # Lock before modifying global monitored_urls_store
63
  monitored_urls_store = temp_store
64
 
65
  except json.JSONDecodeError:
@@ -74,18 +75,19 @@ def load_data_from_json():
74
 
75
  url_ids_to_start_monitoring = []
76
  with lock:
 
 
77
  url_ids_to_start_monitoring = list(monitored_urls_store.keys())
78
 
79
  for url_id in url_ids_to_start_monitoring:
80
  start_url_monitoring_thread(url_id)
81
 
 
82
  def get_host_ip_address(hostname_str):
83
  try:
84
- # Check if hostname_str is already a valid IP address
85
- socket.inet_aton(hostname_str) # Throws an OSError if not a valid IPv4 string
86
  return hostname_str
87
  except OSError:
88
- # It's not an IP, so try to resolve it as a hostname
89
  try:
90
  ip_address = socket.gethostbyname(hostname_str)
91
  return ip_address
@@ -97,7 +99,6 @@ def get_host_ip_address(hostname_str):
97
  return 'N/A'
98
 
99
  def prune_url_history(url_data_entry):
100
- # Assumes 'lock' is acquired or called from the thread managing this entry
101
  cutoff_time = time.time() - HISTORY_DURATION_SECONDS
102
  url_data_entry['history'] = [
103
  entry for entry in url_data_entry.get('history', []) if entry['timestamp'] >= cutoff_time
@@ -113,21 +114,19 @@ def execute_url_check(url_id_to_check):
113
 
114
  print(f"Checking {current_url_data['url']} (ID: {url_id_to_check})...")
115
  current_url_data['status'] = 'checking'
116
- url_config_snapshot = current_url_data.copy() # Snapshot for use outside lock
117
 
118
  if not url_config_snapshot: return
119
 
120
  check_start_time = time.perf_counter()
121
  final_check_status = 'error'
122
  http_response_time_ms = None
123
- # Identify your bot to website owners
124
  http_headers = {'User-Agent': 'URLPinger/1.0 (HuggingFace Space Bot)'}
125
 
126
  try:
127
- # Attempt HEAD request first
128
  try:
129
  head_response = requests.head(url_config_snapshot['url'], timeout=10, allow_redirects=True, headers=http_headers)
130
- if 200 <= head_response.status_code < 400: # OK or Redirect
131
  final_check_status = 'ok'
132
  else:
133
  print(f"HEAD for {url_config_snapshot['url']} returned {head_response.status_code}. Trying GET.")
@@ -136,11 +135,10 @@ def execute_url_check(url_id_to_check):
136
  except requests.RequestException as e_head:
137
  print(f"HEAD failed for {url_config_snapshot['url']}: {e_head}. Trying GET...")
138
 
139
- # If HEAD was not conclusive, try GET
140
  if final_check_status != 'ok':
141
  try:
142
  get_response = requests.get(url_config_snapshot['url'], timeout=15, allow_redirects=True, headers=http_headers)
143
- if get_response.ok: # Only 2xx status codes
144
  final_check_status = 'ok'
145
  else:
146
  print(f"GET for {url_config_snapshot['url']} status: {get_response.status_code}")
@@ -160,15 +158,15 @@ def execute_url_check(url_id_to_check):
160
  final_check_status = 'error'
161
 
162
  with lock:
163
- if url_id_to_check not in monitored_urls_store: return # URL might have been removed during check
164
 
165
  live_url_data = monitored_urls_store[url_id_to_check]
166
  live_url_data['status'] = final_check_status
167
  live_url_data['responseTime'] = round(http_response_time_ms) if http_response_time_ms is not None else None
168
- live_url_data['lastChecked'] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) # ISO 8601 UTC
169
 
170
  current_history_list = live_url_data.get('history', [])
171
- current_history_list.append({'timestamp': time.time(), 'status': final_check_status}) # timestamp in seconds
172
  live_url_data['history'] = current_history_list
173
  prune_url_history(live_url_data)
174
 
@@ -178,7 +176,6 @@ def execute_url_check(url_id_to_check):
178
  def pinger_thread_function(url_id_param, stop_event_param):
179
  while not stop_event_param.is_set():
180
  execute_url_check(url_id_param)
181
- # Sleep for PING_INTERVAL_SECONDS, but check stop_event periodically
182
  for _ in range(PING_INTERVAL_SECONDS):
183
  if stop_event_param.is_set(): break
184
  time.sleep(1)
@@ -192,15 +189,13 @@ def start_url_monitoring_thread(target_url_id):
192
 
193
  url_data_entry = monitored_urls_store[target_url_id]
194
 
195
- # Stop existing thread if it's alive
196
  if "_thread" in url_data_entry and url_data_entry["_thread"].is_alive():
197
  print(f"Monitor for URL ID {target_url_id} already running. Attempting to restart.")
198
- if "_stop_event" in url_data_entry and url_data_entry["_stop_event"]: # Check if _stop_event exists
199
  url_data_entry["_stop_event"].set()
200
- url_data_entry["_thread"].join(timeout=3) # Wait for thread to stop
201
 
202
  new_stop_event = threading.Event()
203
- # daemon=True allows main program to exit even if threads are running
204
  new_thread = threading.Thread(target=pinger_thread_function, args=(target_url_id, new_stop_event), daemon=True)
205
 
206
  url_data_entry["_thread"] = new_thread
@@ -215,9 +210,8 @@ def stop_url_monitoring_thread(target_url_id):
215
  url_data_entry = monitored_urls_store[target_url_id]
216
  if "_thread" in url_data_entry and url_data_entry["_thread"].is_alive():
217
  print(f"Signaling stop for monitor thread of URL ID {target_url_id}")
218
- if "_stop_event" in url_data_entry and url_data_entry["_stop_event"]: # Check if _stop_event exists
219
  url_data_entry["_stop_event"].set()
220
- # Not joining here to keep API responsive, daemon thread will exit.
221
  url_data_entry.pop("_thread", None)
222
  url_data_entry.pop("_stop_event", None)
223
 
@@ -227,19 +221,28 @@ def serve_index():
227
  return send_from_directory(app.static_folder, 'index.html')
228
 
229
  @app.route('/api/urls', methods=['GET'])
230
- def get_all_urls():
 
 
 
 
 
231
  with lock:
232
- # Prepare data for sending: list of url data, no thread objects
233
  response_list = []
234
  for data_item in monitored_urls_store.values():
235
- display_item = data_item.copy()
236
- display_item.pop("_thread", None)
237
- display_item.pop("_stop_event", None)
238
- response_list.append(display_item)
 
239
  return jsonify(response_list)
240
 
241
  @app.route('/api/urls', methods=['POST'])
242
- def add_new_url():
 
 
 
 
243
  request_data = request.get_json()
244
  if not request_data or 'url' not in request_data:
245
  return jsonify({"error": "URL is required"}), 400
@@ -247,7 +250,7 @@ def add_new_url():
247
  input_url = request_data['url'].strip()
248
 
249
  if not input_url.startswith('http://') and not input_url.startswith('https://'):
250
- input_url = 'https://' + input_url # Default to https
251
 
252
  try:
253
  parsed_input_url = urlparse(input_url)
@@ -258,66 +261,62 @@ def add_new_url():
258
  return jsonify({"error": "Invalid URL format"}), 400
259
 
260
  with lock:
261
- # Check for duplicates (case-insensitive, ignoring trailing slashes)
262
  normalized_new_url = input_url.rstrip('/').lower()
263
- for existing_url_id in list(monitored_urls_store.keys()): # Iterate over keys to avoid issues if store is modified
264
- existing_url_data = monitored_urls_store.get(existing_url_id)
265
- if existing_url_data and existing_url_data['url'].rstrip('/').lower() == normalized_new_url:
266
- return jsonify({"error": "URL already monitored"}), 409 # Conflict
 
267
 
268
  new_url_id = str(uuid.uuid4())
269
  resolved_ip = get_host_ip_address(url_hostname) if url_hostname else 'N/A'
270
 
271
  url_entry_to_add = {
272
  "id": new_url_id, "url": input_url, "status": 'pending',
273
- "ip": resolved_ip, "responseTime": None, "lastChecked": None, "history": []
 
274
  }
275
 
276
- # Make a copy of the entry for the response *before* it's potentially modified
277
- # by start_url_monitoring_thread with non-serializable objects.
278
  response_payload = url_entry_to_add.copy()
279
-
280
- monitored_urls_store[new_url_id] = url_entry_to_add # url_entry_to_add will be modified by start_url_monitoring_thread
281
  save_data_to_json()
282
 
283
- start_url_monitoring_thread(new_url_id) # This will add _thread and _stop_event to monitored_urls_store[new_url_id]
284
-
285
- # Return the clean response_payload, which does not have _thread or _stop_event
286
  return jsonify(response_payload), 201
287
 
288
 
289
  @app.route('/api/urls/<string:target_url_id>', methods=['DELETE'])
290
- def delete_existing_url(target_url_id):
 
 
 
 
291
  with lock:
292
  if target_url_id in monitored_urls_store:
 
 
 
 
 
 
 
293
  stop_url_monitoring_thread(target_url_id)
294
  removed_url_entry = monitored_urls_store.pop(target_url_id)
295
  save_data_to_json()
296
 
297
- # Prepare data for response (without thread objects)
298
- response_data = removed_url_entry.copy() # Copy before potential modification if stop_url_monitoring_thread didn't pop everything
299
  response_data.pop("_thread", None)
300
  response_data.pop("_stop_event", None)
301
- print(f"Deleted URL ID {target_url_id}")
302
  return jsonify({"message": "URL removed", "url": response_data}), 200
303
  else:
304
  return jsonify({"error": "URL not found"}), 404
305
 
306
  # --- Main Execution / Gunicorn Entry Point ---
307
- # Load data once when the application module is initialized
308
- # This handles both `flask run` and gunicorn scenarios.
309
- if os.environ.get('WERKZEUG_RUN_MAIN') != 'true': # Avoids double load in Flask debug mode
310
  load_data_from_json()
311
 
312
  if __name__ == '__main__':
313
- # This block is for local development (e.g., `python app.py`)
314
- # `load_data_from_json()` is called above unless Werkzeug reloader is active.
315
- # If using Flask's reloader, load_data_from_json will be called twice:
316
- # once by the main process, once by the reloader's child process.
317
- # The check for WERKZEUG_RUN_MAIN ensures it only loads in the main one or the child.
318
- if os.environ.get('WERKZEUG_RUN_MAIN') == 'true': # Ensure data is loaded in the reloaded process too
319
  load_data_from_json()
320
- app.run(debug=True, host='0.0.0.0', port=7860)
321
-
322
- # When run with Gunicorn, Gunicorn imports `app` from this `app.py` file.
323
- # `load_data_from_json()` will have been called during that import (due to the WERKZEUG_RUN_MAIN check).
 
11
  app = Flask(__name__, static_folder='.', static_url_path='')
12
 
13
  # --- Configuration ---
14
+ DATA_FILE = "/tmp/data.json"
15
+ PING_INTERVAL_SECONDS = 60
16
+ HISTORY_DURATION_SECONDS = 60 * 60
17
 
18
  # --- Data Store ---
19
  # Structure: { "id": "uuid", "url": "string", "status": "pending/ok/error/checking",
20
  # "ip": "string", "responseTime": float_ms, "lastChecked": "iso_string_utc",
21
  # "history": [{"timestamp": float_unix_ts_seconds, "status": "ok/error"}],
22
+ # "userId": "string_user_uuid", # ADDED: To associate URL with a user
23
  # "_thread": threading.Thread_object, "_stop_event": threading.Event_object }
24
+ monitored_urls_store = {}
25
+ lock = threading.Lock()
26
 
27
  # --- Helper Functions ---
28
  def save_data_to_json():
29
+ with lock: # Ensure lock is acquired before calling if called externally
30
+ serializable_data = {}
31
+ for url_id, data in monitored_urls_store.items():
32
+ s_data = data.copy()
33
+ s_data.pop("_thread", None)
34
+ s_data.pop("_stop_event", None)
35
+ serializable_data[url_id] = s_data
36
+ try:
37
+ with open(DATA_FILE, 'w') as f:
38
+ json.dump(serializable_data, f, indent=2)
39
+ print(f"Data saved to {DATA_FILE}")
40
+ except IOError as e:
41
+ print(f"Error saving data to {DATA_FILE}: {e}")
42
 
43
  def load_data_from_json():
44
  global monitored_urls_store
 
50
 
51
  temp_store = {}
52
  for url_id_key, data_item in loaded_json_data.items():
 
53
  data_item.setdefault('id', url_id_key)
54
  current_id = data_item['id']
55
  data_item.setdefault('status', 'pending')
 
57
  data_item.setdefault('responseTime', None)
58
  data_item.setdefault('lastChecked', None)
59
  data_item.setdefault('history', data_item.get('history', []))
60
+ data_item.setdefault('userId', None) # ADDED: Handle old data without userId
61
  temp_store[current_id] = data_item
62
 
63
+ with lock:
64
  monitored_urls_store = temp_store
65
 
66
  except json.JSONDecodeError:
 
75
 
76
  url_ids_to_start_monitoring = []
77
  with lock:
78
+ # We don't know the user context here, so all threads for loaded URLs are started.
79
+ # The GET endpoint will filter by user.
80
  url_ids_to_start_monitoring = list(monitored_urls_store.keys())
81
 
82
  for url_id in url_ids_to_start_monitoring:
83
  start_url_monitoring_thread(url_id)
84
 
85
+
86
  def get_host_ip_address(hostname_str):
87
  try:
88
+ socket.inet_aton(hostname_str)
 
89
  return hostname_str
90
  except OSError:
 
91
  try:
92
  ip_address = socket.gethostbyname(hostname_str)
93
  return ip_address
 
99
  return 'N/A'
100
 
101
  def prune_url_history(url_data_entry):
 
102
  cutoff_time = time.time() - HISTORY_DURATION_SECONDS
103
  url_data_entry['history'] = [
104
  entry for entry in url_data_entry.get('history', []) if entry['timestamp'] >= cutoff_time
 
114
 
115
  print(f"Checking {current_url_data['url']} (ID: {url_id_to_check})...")
116
  current_url_data['status'] = 'checking'
117
+ url_config_snapshot = current_url_data.copy()
118
 
119
  if not url_config_snapshot: return
120
 
121
  check_start_time = time.perf_counter()
122
  final_check_status = 'error'
123
  http_response_time_ms = None
 
124
  http_headers = {'User-Agent': 'URLPinger/1.0 (HuggingFace Space Bot)'}
125
 
126
  try:
 
127
  try:
128
  head_response = requests.head(url_config_snapshot['url'], timeout=10, allow_redirects=True, headers=http_headers)
129
+ if 200 <= head_response.status_code < 400:
130
  final_check_status = 'ok'
131
  else:
132
  print(f"HEAD for {url_config_snapshot['url']} returned {head_response.status_code}. Trying GET.")
 
135
  except requests.RequestException as e_head:
136
  print(f"HEAD failed for {url_config_snapshot['url']}: {e_head}. Trying GET...")
137
 
 
138
  if final_check_status != 'ok':
139
  try:
140
  get_response = requests.get(url_config_snapshot['url'], timeout=15, allow_redirects=True, headers=http_headers)
141
+ if get_response.ok:
142
  final_check_status = 'ok'
143
  else:
144
  print(f"GET for {url_config_snapshot['url']} status: {get_response.status_code}")
 
158
  final_check_status = 'error'
159
 
160
  with lock:
161
+ if url_id_to_check not in monitored_urls_store: return
162
 
163
  live_url_data = monitored_urls_store[url_id_to_check]
164
  live_url_data['status'] = final_check_status
165
  live_url_data['responseTime'] = round(http_response_time_ms) if http_response_time_ms is not None else None
166
+ live_url_data['lastChecked'] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
167
 
168
  current_history_list = live_url_data.get('history', [])
169
+ current_history_list.append({'timestamp': time.time(), 'status': final_check_status})
170
  live_url_data['history'] = current_history_list
171
  prune_url_history(live_url_data)
172
 
 
176
  def pinger_thread_function(url_id_param, stop_event_param):
177
  while not stop_event_param.is_set():
178
  execute_url_check(url_id_param)
 
179
  for _ in range(PING_INTERVAL_SECONDS):
180
  if stop_event_param.is_set(): break
181
  time.sleep(1)
 
189
 
190
  url_data_entry = monitored_urls_store[target_url_id]
191
 
 
192
  if "_thread" in url_data_entry and url_data_entry["_thread"].is_alive():
193
  print(f"Monitor for URL ID {target_url_id} already running. Attempting to restart.")
194
+ if "_stop_event" in url_data_entry and url_data_entry["_stop_event"]:
195
  url_data_entry["_stop_event"].set()
196
+ url_data_entry["_thread"].join(timeout=3)
197
 
198
  new_stop_event = threading.Event()
 
199
  new_thread = threading.Thread(target=pinger_thread_function, args=(target_url_id, new_stop_event), daemon=True)
200
 
201
  url_data_entry["_thread"] = new_thread
 
210
  url_data_entry = monitored_urls_store[target_url_id]
211
  if "_thread" in url_data_entry and url_data_entry["_thread"].is_alive():
212
  print(f"Signaling stop for monitor thread of URL ID {target_url_id}")
213
+ if "_stop_event" in url_data_entry and url_data_entry["_stop_event"]:
214
  url_data_entry["_stop_event"].set()
 
215
  url_data_entry.pop("_thread", None)
216
  url_data_entry.pop("_stop_event", None)
217
 
 
221
  return send_from_directory(app.static_folder, 'index.html')
222
 
223
  @app.route('/api/urls', methods=['GET'])
224
+ def get_all_urls_for_user():
225
+ user_id = request.headers.get('X-User-ID')
226
+ if not user_id:
227
+ # Return empty list if no user ID is provided, or could be 400 error
228
+ return jsonify([]), 200
229
+
230
  with lock:
 
231
  response_list = []
232
  for data_item in monitored_urls_store.values():
233
+ if data_item.get('userId') == user_id: # Filter by userId
234
+ display_item = data_item.copy()
235
+ display_item.pop("_thread", None)
236
+ display_item.pop("_stop_event", None)
237
+ response_list.append(display_item)
238
  return jsonify(response_list)
239
 
240
  @app.route('/api/urls', methods=['POST'])
241
+ def add_new_url_for_user():
242
+ user_id = request.headers.get('X-User-ID')
243
+ if not user_id:
244
+ return jsonify({"error": "User ID (X-User-ID header) is required"}), 400
245
+
246
  request_data = request.get_json()
247
  if not request_data or 'url' not in request_data:
248
  return jsonify({"error": "URL is required"}), 400
 
250
  input_url = request_data['url'].strip()
251
 
252
  if not input_url.startswith('http://') and not input_url.startswith('https://'):
253
+ input_url = 'https://' + input_url
254
 
255
  try:
256
  parsed_input_url = urlparse(input_url)
 
261
  return jsonify({"error": "Invalid URL format"}), 400
262
 
263
  with lock:
 
264
  normalized_new_url = input_url.rstrip('/').lower()
265
+ for existing_url_data in monitored_urls_store.values():
266
+ # Check for duplicates only for the current user
267
+ if existing_url_data.get('userId') == user_id and \
268
+ existing_url_data['url'].rstrip('/').lower() == normalized_new_url:
269
+ return jsonify({"error": "URL already monitored by you"}), 409
270
 
271
  new_url_id = str(uuid.uuid4())
272
  resolved_ip = get_host_ip_address(url_hostname) if url_hostname else 'N/A'
273
 
274
  url_entry_to_add = {
275
  "id": new_url_id, "url": input_url, "status": 'pending',
276
+ "ip": resolved_ip, "responseTime": None, "lastChecked": None, "history": [],
277
+ "userId": user_id # Associate with the user
278
  }
279
 
 
 
280
  response_payload = url_entry_to_add.copy()
281
+ monitored_urls_store[new_url_id] = url_entry_to_add
 
282
  save_data_to_json()
283
 
284
+ start_url_monitoring_thread(new_url_id)
 
 
285
  return jsonify(response_payload), 201
286
 
287
 
288
  @app.route('/api/urls/<string:target_url_id>', methods=['DELETE'])
289
+ def delete_existing_url_for_user(target_url_id):
290
+ user_id = request.headers.get('X-User-ID')
291
+ if not user_id:
292
+ return jsonify({"error": "User ID (X-User-ID header) is required"}), 400
293
+
294
  with lock:
295
  if target_url_id in monitored_urls_store:
296
+ url_entry_to_check = monitored_urls_store[target_url_id]
297
+
298
+ # Check if the URL belongs to the requesting user
299
+ if url_entry_to_check.get('userId') != user_id:
300
+ # Do not reveal existence of URL; treat as if not found for this user
301
+ return jsonify({"error": "URL not found or permission denied"}), 404
302
+
303
  stop_url_monitoring_thread(target_url_id)
304
  removed_url_entry = monitored_urls_store.pop(target_url_id)
305
  save_data_to_json()
306
 
307
+ response_data = removed_url_entry.copy()
 
308
  response_data.pop("_thread", None)
309
  response_data.pop("_stop_event", None)
310
+ print(f"Deleted URL ID {target_url_id} for user {user_id}")
311
  return jsonify({"message": "URL removed", "url": response_data}), 200
312
  else:
313
  return jsonify({"error": "URL not found"}), 404
314
 
315
  # --- Main Execution / Gunicorn Entry Point ---
316
+ if os.environ.get('WERKZEUG_RUN_MAIN') != 'true':
 
 
317
  load_data_from_json()
318
 
319
  if __name__ == '__main__':
320
+ if os.environ.get('WERKZEUG_RUN_MAIN') == 'true':
 
 
 
 
 
321
  load_data_from_json()
322
+ app.run(debug=True, host='0.0.0.0', port=7860)