hermanda commited on
Commit
53cee63
·
verified ·
1 Parent(s): 50e41c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +357 -197
app.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  import gradio as gr
2
  import polars as pl
3
  import time
@@ -17,19 +20,38 @@ import traceback
17
  from concurrent.futures import ThreadPoolExecutor, as_completed
18
 
19
 
 
 
 
 
 
20
 
21
- def get_geo_optimal_stop(method, selected_stops, show_top=20):
 
 
 
 
 
 
 
 
 
 
 
22
  global DISTANCE_TABLE
23
-
24
  dfs = []
25
- for si, stop in tqdm(enumerate(selected_stops), desc="Calculating optimal stops", total=len(selected_stops)):
 
 
 
 
26
  df = (
27
- DISTANCE_TABLE
28
- .filter(pl.col("from") == stop)
29
  .drop("from")
30
  .with_columns(
31
  pl.col("to").alias("target_stop"),
32
- pl.col("distance_in_km").alias(f"distance_in_km_{si}")
33
  )
34
  .select("target_stop", f"distance_in_km_{si}")
35
  )
@@ -42,8 +64,12 @@ def get_geo_optimal_stop(method, selected_stops, show_top=20):
42
 
43
  print("Finidng optimal stops ...")
44
  df = df.with_columns(
45
- pl.max_horizontal(*[f"distance_in_km_{si}" for si in range(len(selected_stops))]).alias("worst_case_km"),
46
- pl.sum_horizontal(*[f"distance_in_km_{si}" for si in range(len(selected_stops))]).alias("total_km")
 
 
 
 
47
  )
48
 
49
  if method == "minimize-worst-case":
@@ -51,42 +77,124 @@ def get_geo_optimal_stop(method, selected_stops, show_top=20):
51
  elif method == "minimize-total":
52
  df = df.sort("total_km")
53
 
54
- return df.head(show_top)
55
 
56
- def validate_date(date_str):
 
 
57
  """
58
- Validates that the date string is in DD/MM/YYYY format and represents a valid date.
 
 
 
 
 
 
 
 
 
 
 
 
59
  """
60
- try:
61
- datetime.strptime(date_str, "%d/%m/%Y")
62
- return True
63
- except ValueError:
64
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
- def validate_time(time_str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  """
68
- Validates that the time string is in HH:MM format and represents a valid time.
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  """
70
- try:
71
- datetime.strptime(time_str, "%H:%M")
72
- return True
73
- except ValueError:
74
- return False
 
 
 
 
 
 
 
75
 
76
- def validate_date_time(date_str, time_str):
77
- """
78
- Validates that:
79
- 1. The date string is in DD/MM/YYYY format and represents a valid date.
80
- 2. The time string is in HH:MM format and represents a valid time.
81
- 3. The combined datetime is in the future and not more than 3 months ahead.
82
 
 
 
 
 
 
 
 
 
 
 
 
83
  Returns:
84
- Tuple[bool, str]: (is_valid, error_message)
 
85
  """
86
  try:
87
  event_datetime = datetime.strptime(f"{date_str} {time_str}", "%d/%m/%Y %H:%M")
88
  except ValueError:
89
- return False, "Invalid date or time format. Please ensure date is DD/MM/YYYY and time is HH:MM."
 
 
 
90
 
91
  now = datetime.now()
92
  three_months_later = now + timedelta(days=90) # Approximation of 3 months
@@ -94,17 +202,30 @@ def validate_date_time(date_str, time_str):
94
  if event_datetime <= now:
95
  return False, "The selected date and time must be in the future."
96
  if event_datetime > three_months_later:
97
- return False, "The selected date and time must not be more than 3 months in the future."
 
 
 
98
 
99
  return True, ""
100
 
101
 
102
  def get_next_meetup_time(target_weekday: int, target_hour: int) -> datetime:
103
  """
104
- Returns the next meetup datetime.
105
-
106
- :param target_weekday: The target weekday (0 = Monday, 1 = Tuesday, ..., 6 = Sunday).
107
- :param target_hour: The target hour (0-23).
 
 
 
 
 
 
 
 
 
 
108
  """
109
  start_dt = datetime.now()
110
 
@@ -113,11 +234,11 @@ def get_next_meetup_time(target_weekday: int, target_hour: int) -> datetime:
113
 
114
  if days_ahead == 0:
115
  if start_dt.time() >= dt_time(target_hour, 0):
116
- days_ahead = 7
117
  else:
118
- days_ahead = 0
119
  elif days_ahead < 0:
120
- days_ahead += 7
121
 
122
  next_dt = start_dt + timedelta(days=days_ahead)
123
  next_dt = next_dt.replace(hour=target_hour, minute=0, second=0, microsecond=0)
@@ -126,35 +247,26 @@ def get_next_meetup_time(target_weekday: int, target_hour: int) -> datetime:
126
 
127
  def parse_time_to_minutes(time_str: str) -> int:
128
  """
129
- Parses a time string and returns the total number of minutes as an integer.
130
-
131
- Supported formats:
132
- - "X hod Y min" (e.g., "1 hod 1 min")
133
- - "X hod" (e.g., "2 hod")
134
- - "Y min" (e.g., "20 min")
135
-
136
- Forbidden inputs:
137
- - Negative minutes or hours (e.g., "-1 min")
138
- - Minutes equal to or exceeding 60 (e.g., "61 min")
139
- - Incorrect formats
140
 
141
  Args:
142
- time_str (str): The time string to parse.
143
 
144
  Returns:
145
- int: Total number of minutes.
146
 
147
  Raises:
148
- ValueError: If the input format is invalid or contains forbidden values.
 
149
  """
150
- pattern = r'^\s*(?:(\d+)\s*hod)?(?:\s*(\d+)\s*min)?\s*$'
151
  match = re.match(pattern, time_str, re.IGNORECASE)
152
-
153
  if not match:
154
  raise ValueError(f"Invalid time format: '{time_str}'")
155
-
156
  hours_str, minutes_str = match.groups()
157
-
158
  hours = int(hours_str) if hours_str else 0
159
  minutes = int(minutes_str) if minutes_str else 0
160
 
@@ -164,10 +276,11 @@ def parse_time_to_minutes(time_str: str) -> int:
164
  raise ValueError("Minutes cannot be negative.")
165
  if minutes >= 60:
166
  raise ValueError("Minutes must be less than 60.")
167
-
168
  total_minutes = hours * 60 + minutes
169
  return total_minutes
170
 
 
171
  def get_total_minutes(from_stop: str, to_stop: str, dt: datetime) -> int:
172
  """
173
  Sends a POST request to the specified URL using Webshare's rotating proxy and parses the response to extract time in minutes.
@@ -184,95 +297,115 @@ def get_total_minutes(from_stop: str, to_stop: str, dt: datetime) -> int:
184
  requests.HTTPError: If the HTTP request returned an unsuccessful status code.
185
  ValueError: If expected HTML elements are not found in the response.
186
  """
187
-
188
  if from_stop == to_stop:
189
  return 0
190
-
191
  day_abbreviations = {
192
- 0: 'po', # Monday -> po
193
- 1: 'út', # Tuesday -> út
194
- 2: 'st', # Wednesday -> st
195
- 3: 'čt', # Thursday -> čt
196
- 4: '', # Friday -> pá
197
- 5: 'so', # Saturday -> so
198
- 6: 'ne' # Sunday -> ne
199
  }
200
-
201
  day = dt.day
202
  month = dt.month
203
  year = dt.year
204
- weekday = dt.weekday()
205
- abbreviation = day_abbreviations.get(weekday, '')
206
  date_str = f"{day}.{month}.{year} {abbreviation}"
207
- time_str = dt.strftime('%H:%M')
208
-
209
-
210
  headers = {
211
- 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
212
- 'accept-language': 'en-US,en;q=0.9',
213
- 'cache-control': 'max-age=0',
214
- 'content-type': 'application/x-www-form-urlencoded',
215
- 'dnt': '1',
216
- 'origin': 'https://idos.cz',
217
- 'priority': 'u=0, i',
218
- 'referer': 'https://idos.cz/pid/spojeni/',
219
  }
220
 
221
  data = [
222
- ('From', from_stop),
223
- ('positionACPosition', ''),
224
- ('To', to_stop),
225
- ('positionACPosition', ''),
226
- ('AdvancedForm.Via[0]', ''),
227
- ('AdvancedForm.ViaHidden[0]', ''),
228
- ('Date', date_str),
229
- ('Time', time_str),
230
- ('IsArr', 'True'),
231
  ]
232
 
233
- url = 'https://idos.cz/pid/spojeni/'
234
-
235
  proxy_domain = os.getenv("PROXY_DOMAIN")
236
  proxy_port = os.getenv("PROXY_PORT")
237
  proxy_username = os.getenv("PROXY_USERNAME")
238
  proxy_password = os.getenv("PROXY_PASSWORD")
239
 
240
- # Construct the proxy URL with authentication
241
  proxy_url = f"http://{proxy_username}:{proxy_password}@{proxy_domain}:{proxy_port}"
242
-
243
  proxies = {
244
- 'http': proxy_url,
245
- 'https': proxy_url,
246
  }
247
 
248
  try:
249
  if proxy_domain is None:
250
  response = requests.post(url, headers=headers, data=data, timeout=15)
251
  else:
252
- response = requests.post(url, headers=headers, data=data, proxies=proxies, timeout=15)
253
- response.raise_for_status()
 
 
254
  except requests.RequestException as e:
255
  raise requests.HTTPError(f"Failed to retrieve data from {url}.") from e
256
 
257
- soup = BeautifulSoup(response.content, 'html.parser')
258
- connection_head = soup.find(class_='connection-head')
259
 
260
  if not connection_head:
261
  raise ValueError("No elements found with the class 'connection-head'.")
262
 
263
- strong_tag = connection_head.find('strong')
264
 
265
  if not strong_tag:
266
- raise ValueError("No <strong> tag found within the first 'connection-head' element.")
 
 
267
 
268
  time_str_response = strong_tag.get_text(strip=True)
269
  total_minutes = parse_time_to_minutes(time_str_response)
270
  return total_minutes
271
 
272
- @cached(cache=TTLCache(maxsize=10**6, ttl=24*60*60))
273
- def get_total_minutes_with_retries(from_stop: str, to_stop: str, dt: datetime) -> int:
274
- max_retries = 3
275
- retry_delay = 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  attempt = 0
277
 
278
  while attempt < max_retries:
@@ -282,106 +415,132 @@ def get_total_minutes_with_retries(from_stop: str, to_stop: str, dt: datetime) -
282
  except Exception as e:
283
  attempt += 1
284
  if attempt < max_retries:
285
- print(f"Error processing pair ({from_stop}, {to_stop}): {e}. Retrying in {retry_delay} seconds... (Attempt {attempt}/{max_retries})")
 
 
286
  time.sleep(retry_delay)
287
  else:
288
- print(f"Failed to process pair ({from_stop}, {to_stop}) after {max_retries} attempts.")
 
 
289
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
 
291
- def get_time_optimal_stop(method, selected_stops, target_stops, event_datetime, show_top=20):
292
  def process_target_stop(args):
293
  target_stop, selected_stops, event_datetime = args
294
  row = {"target_stop": target_stop}
295
  for si, from_stop in enumerate(selected_stops):
296
  try:
297
- total_minutes = get_total_minutes_with_retries(from_stop, target_stop, event_datetime)
 
 
298
  row[f"total_minutes_{si}"] = total_minutes
299
  except Exception as e:
300
  print(f"Error processing pair ({from_stop}, {target_stop}): {e}")
301
  traceback.print_exc()
302
  row[f"total_minutes_{si}"] = None
303
  return row
304
-
305
  rows = []
306
- arguments = [(target_stop, selected_stops, event_datetime) for target_stop in target_stops]
 
 
307
  with ThreadPoolExecutor(max_workers=5) as executor:
308
- futures = {executor.submit(process_target_stop, arg): arg[0] for arg in arguments}
 
 
309
  for future in tqdm(as_completed(futures), total=len(arguments)):
310
  try:
311
  result = future.result()
312
  rows.append(result)
313
  except Exception as e:
314
  print(f"An error occurred with target_stop={futures[future]}: {e}")
315
-
316
- df_times = (
317
- pl.DataFrame(rows)
318
- .with_columns(
319
- pl.max_horizontal(*[f"total_minutes_{si}" for si in range(len(selected_stops))]).alias("worst_case_minutes"),
320
- pl.sum_horizontal(*[f"total_minutes_{si}" for si in range(len(selected_stops))]).alias("total_minutes")
321
- )
 
322
  )
323
 
324
  if method == "minimize-worst-case":
325
  df_times = df_times.sort("worst_case_minutes")
326
  elif method == "minimize-total":
327
  df_times = df_times.sort("total_minutes")
328
-
329
- df_times = df_times.rename({"worst_case_minutes": "Worst Case Minutes", "total_minutes": "Total Minutes"})
 
 
330
  for si in range(len(selected_stops)):
331
  df_times = df_times.rename({f"total_minutes_{si}": f"t{si+1} mins"})
332
-
333
  df_times = df_times.drop_nulls()
334
-
335
  return df_times.head(show_top)
336
 
 
337
  def cerate_app():
338
  with gr.Blocks() as app:
339
  gr.Markdown("## Optimal Public Transport Stop Finder in Prague")
340
- gr.Markdown("""
 
341
  Consider you are in Prague and you want to meet with your friends. What is the optimal stop to meet? Now you can find that with this app!
342
 
343
- Time table data are being scraped from IDOS API, IDOS uses PID timetable data.""")
 
344
 
345
- # Slider to select the number of people
346
  number_of_stops = gr.Slider(
347
- minimum=2,
348
- maximum=12,
349
- step=1,
350
- value=3,
351
- label="Number of People"
352
  )
353
 
354
- # Radio buttons to select the optimization method
355
  method = gr.Radio(
356
  choices=["Minimize worst case for each", "Minimize total time"],
357
  value="Minimize worst case for each",
358
- label="Optimization Method"
359
  )
360
 
361
  next_dt = get_next_meetup_time(4, 20) # Friday 20:00
362
  next_date = next_dt.strftime("%d/%m/%Y")
363
  next_time = next_dt.strftime("%H:%M")
364
- # Date input in DD/MM/YYYY format
365
  date_input = gr.Textbox(
366
- label="Date (DD/MM/YYYY)",
367
- placeholder=f"e.g., {next_date}",
368
- value=next_date
369
  )
370
 
371
- # Time input in HH:MM format
372
  time_input = gr.Textbox(
373
- label="Time (HH:MM)",
374
- placeholder=f"e.g., {next_time}",
375
- value=next_time
376
  )
377
 
378
- # Dropdowns for selecting starting stops, initially hidden
379
  dropdowns = []
380
  for i in range(12):
381
  dd = gr.Dropdown(
382
- choices=ALL_STOPS,
383
- label=f"Choose Starting Stop #{i+1}",
384
- visible=False # Start hidden; we will unhide as needed
385
  )
386
  dropdowns.append(dd)
387
 
@@ -394,103 +553,104 @@ def cerate_app():
394
  updates.append(gr.update(visible=False))
395
  return updates
396
 
397
- # Update the visibility of dropdowns based on the number of stops selected
398
  number_of_stops.change(
399
- fn=update_dropdowns,
400
- inputs=number_of_stops,
401
- outputs=dropdowns
402
  )
403
 
404
- # Search button to trigger the optimization
405
  search_button = gr.Button("Search")
406
 
407
- def search_optimal_stop(num_stops, chosen_method, date_str, time_str, *all_stops):
408
- # Validate Date
 
409
  is_valid, error_message = validate_date_time(date_str, time_str)
410
  if not is_valid:
411
  raise gr.Error(error_message)
412
-
413
- # Extract selected stops based on the number of stops
414
  selected_stops = [stop for stop in all_stops[:num_stops] if stop]
415
  print("Number of stops:", num_stops)
416
  print("Method selected:", chosen_method)
417
  print("Selected stops:", selected_stops)
418
  print("Selected date:", date_str)
419
  print("Selected time:", time_str)
420
-
421
  if chosen_method == "Minimize worst case for each":
422
  method = "minimize-worst-case"
423
  else:
424
  method = "minimize-total"
425
-
426
- # Here, you can modify how date_str and time_str are used in your logic
427
- # For example, you might want to convert them to datetime objects
428
  try:
429
- # Optionally parse date and time to datetime objects
430
- event_datetime = datetime.strptime(f"{date_str} {time_str}", "%d/%m/%Y %H:%M")
 
431
  print("Event DateTime:", event_datetime)
432
  except ValueError as e:
433
- # This should not happen due to prior validation, but added for safety
434
  raise gr.Error(f"Error parsing date and time: {e}")
435
-
436
- df_top = get_geo_optimal_stop(method, selected_stops, show_top=SHOW_TOP+5)
437
- target_stops = df_top["target_stop"].to_list()
438
- df_times = get_time_optimal_stop(method, selected_stops, target_stops, event_datetime, show_top=SHOW_TOP)
 
 
439
  df_times = df_times.with_row_index("#", offset=1)
440
-
441
  return df_times
442
 
443
- # Dataframe to display the results
444
  results_table = gr.Dataframe(
445
  headers=["Target Stop", "Worst Case Minutes", "Total Minutes"],
446
  datatype=["str", "number", "str"],
447
- label="Optimal Stops"
448
  )
449
 
450
- # Configure the search button to call the callback with the new inputs
451
  search_button.click(
452
  fn=search_optimal_stop,
453
  inputs=[number_of_stops, method, date_input, time_input] + dropdowns,
454
- outputs=results_table
455
  )
456
 
457
- # On load, display the first 3 dropdowns and hide the rest
458
  app.load(
459
- lambda: [gr.update(visible=True) for _ in range(3)] + [gr.update(visible=False) for _ in range(9)],
 
460
  inputs=[],
461
- outputs=dropdowns
462
  )
463
-
464
  gr.Markdown("---")
465
- gr.Markdown("""
 
466
  Created by [Daniel Herman](https://www.hermandaniel.com), check out the code [detrin/pub-finder](https://github.com/detrin/pub-finder).
467
- """)
 
468
  return app
469
 
470
- print("Loading time table file ...")
471
- prague_stops = pl.read_csv('Prague_stops_geo.csv')
472
- print("Calculating distances between stops ...")
473
- stops_geo_dist = (
474
- prague_stops.join(prague_stops, how='cross')
475
- .with_columns(
476
- pl.struct(['lat', 'lon', 'lat_right', 'lon_right']).map_elements(
477
- lambda x: geopy.distance.geodesic((x['lat'], x['lon']), (x['lat_right'], x['lon_right'])).km,
478
- return_dtype=pl.Float64
479
- ).alias('distance_in_km')
480
- )
481
- .rename({"name": "from", "name_right": "to"})
482
- .select(["from", "to", "distance_in_km"])
483
- )
484
- # stops_geo_dist = pl.read_csv("Prague_stops_geo_dist.csv")
 
 
 
 
 
 
 
485
  DISTANCE_TABLE = stops_geo_dist
486
  from_stops = DISTANCE_TABLE["from"].unique().sort().to_list()
487
  to_stops = DISTANCE_TABLE["to"].unique().sort().to_list()
488
  ALL_STOPS = sorted(list(set(from_stops) & set(to_stops)))
489
  SHOW_TOP = 15
490
- # DISTANCE_TABLE = None
491
- # ALL_STOPS = None
492
 
493
- if __name__ == "__main__":
494
  app = cerate_app()
495
- print("starting app ...")
496
- app.launch()
 
1
+ from typing import List, Dict, Any
2
+ from typing import Tuple
3
+
4
  import gradio as gr
5
  import polars as pl
6
  import time
 
20
  from concurrent.futures import ThreadPoolExecutor, as_completed
21
 
22
 
23
+ def get_geo_optimal_stop(
24
+ method: str, selected_stops: List[str], show_top: int = 20
25
+ ) -> List[str]:
26
+ """
27
+ Calculate and return the top optimal geographic stops based on a specified method.
28
 
29
+ Args:
30
+ method (str): Optimization method, either "minimize-worst-case" or "minimize-total".
31
+ selected_stops (List[str]): A list of selected stop identifiers.
32
+ show_top (int, optional): Number of top results to return. Defaults to 20.
33
+
34
+ Returns:
35
+ List[str]: A list of the top optimal stops based on the selected method.
36
+
37
+ Raises:
38
+ ValueError: If the method is not recognized.
39
+
40
+ """
41
  global DISTANCE_TABLE
42
+
43
  dfs = []
44
+ for si, stop in tqdm(
45
+ enumerate(selected_stops),
46
+ desc="Calculating optimal stops",
47
+ total=len(selected_stops),
48
+ ):
49
  df = (
50
+ DISTANCE_TABLE.filter(pl.col("from") == stop)
 
51
  .drop("from")
52
  .with_columns(
53
  pl.col("to").alias("target_stop"),
54
+ pl.col("distance_in_km").alias(f"distance_in_km_{si}"),
55
  )
56
  .select("target_stop", f"distance_in_km_{si}")
57
  )
 
64
 
65
  print("Finidng optimal stops ...")
66
  df = df.with_columns(
67
+ pl.max_horizontal(
68
+ *[f"distance_in_km_{si}" for si in range(len(selected_stops))]
69
+ ).alias("worst_case_km"),
70
+ pl.sum_horizontal(
71
+ *[f"distance_in_km_{si}" for si in range(len(selected_stops))]
72
+ ).alias("total_km"),
73
  )
74
 
75
  if method == "minimize-worst-case":
 
77
  elif method == "minimize-total":
78
  df = df.sort("total_km")
79
 
80
+ return df.head(show_top)["target_stop"].to_list()
81
 
82
+ def get_time_optimal_stop(
83
+ method: str, selected_stops: List[str], show_top: int = 20
84
+ ) -> List[str]:
85
  """
86
+ Calculate and return the top optimal geographic stops based on a specified method using total travel time.
87
+
88
+ Args:
89
+ method (str): Optimization method, either "minimize-worst-case" or "minimize-total".
90
+ selected_stops (List[str]): A list of selected stop identifiers.
91
+ show_top (int, optional): Number of top results to return. Defaults to 20.
92
+
93
+ Returns:
94
+ List[str]: A list of the top optimal stops based on the selected method
95
+
96
+ Raises:
97
+ ValueError: If the method is not recognized.
98
+
99
  """
100
+ global DISTANCE_TABLE
101
+
102
+ dfs = []
103
+ for si, stop in tqdm(
104
+ enumerate(selected_stops),
105
+ desc="Calculating optimal stops",
106
+ total=len(selected_stops),
107
+ ):
108
+ df = (
109
+ DISTANCE_TABLE.filter(pl.col("from") == stop)
110
+ .drop("from")
111
+ .with_columns(
112
+ pl.col("to").alias("target_stop"),
113
+ pl.col("total_minutes").alias(f"total_minutes_{si}"),
114
+ )
115
+ .select("target_stop", f"total_minutes_{si}")
116
+ )
117
+ dfs.append(df)
118
+
119
+ print("Joining dataframes ...")
120
+ df = dfs[0]
121
+ for i in range(1, len(dfs)):
122
+ df = df.join(dfs[i], on="target_stop")
123
 
124
+ print("Finding optimal stops ...")
125
+ df = df.with_columns(
126
+ pl.max_horizontal(
127
+ *[f"total_minutes_{si}" for si in range(len(selected_stops))]
128
+ ).alias("worst_case_minutes"),
129
+ pl.sum_horizontal(
130
+ *[f"total_minutes_{si}" for si in range(len(selected_stops))]
131
+ ).alias("total_minutes"),
132
+ )
133
+
134
+ if method == "minimize-worst-case":
135
+ df = df.sort("worst_case_minutes")
136
+ elif method == "minimize-total":
137
+ df = df.sort("total_minutes")
138
+ else:
139
+ raise ValueError(f"Unknown method: {method}")
140
+
141
+ return df.head(show_top)["target_stop"].to_list()
142
+
143
+ def get_optimal_stop(
144
+ method: str, selected_stops: List[str], show_top_geo: int = 20, show_top_time: int = 20
145
+ ) -> List[str]:
146
  """
147
+ Calculate and return the top optimal geographic stops based on a specified method.
148
+
149
+ Args:
150
+ method (str): Optimization method, either "minimize-worst-case" or "minimize-total".
151
+ selected_stops (List[str]): A list of selected stop identifiers.
152
+ show_top_geo (int, optional): Number of top results to return for geographic optimization. Defaults to 20.
153
+ show_top_time (int, optional): Number of top results to return for time optimization. Defaults to 20.
154
+
155
+ Returns:
156
+ List[str]: A list of the top optimal stops based on the selected method.
157
+
158
+ Raises:
159
+ ValueError: If the method is not recognized.
160
+
161
  """
162
+ global DISTANCE_TABLE
163
+
164
+ geo_optimal_stops = get_geo_optimal_stop(method, selected_stops, show_top_geo)
165
+ print(geo_optimal_stops)
166
+ time_optimal_stops = get_time_optimal_stop(method, selected_stops, show_top_time)
167
+ print(time_optimal_stops)
168
+ print()
169
+ print([s for s in geo_optimal_stops if s not in time_optimal_stops])
170
+ print([s for s in time_optimal_stops if s not in geo_optimal_stops])
171
+ print()
172
+ print(list(set(geo_optimal_stops) & set(time_optimal_stops)))
173
+ print(list(set(geo_optimal_stops) | set(time_optimal_stops)))
174
 
 
 
 
 
 
 
175
 
176
+ return list(set(geo_optimal_stops) | set(time_optimal_stops))
177
+
178
+
179
+ def validate_date_time(date_str: str, time_str: str) -> Tuple[bool, str]:
180
+ """
181
+ Validates a date and time string against specific criteria.
182
+
183
+ Args:
184
+ date_str (str): The date string to validate, in the format 'DD/MM/YYYY'.
185
+ time_str (str): The time string to validate, in the format 'HH:MM'.
186
+
187
  Returns:
188
+ Tuple[bool, str]: A tuple containing a boolean indicating if the input is valid,
189
+ and a string message indicating the error if invalid, or an empty string if valid.
190
  """
191
  try:
192
  event_datetime = datetime.strptime(f"{date_str} {time_str}", "%d/%m/%Y %H:%M")
193
  except ValueError:
194
+ return (
195
+ False,
196
+ "Invalid date or time format. Please ensure date is DD/MM/YYYY and time is HH:MM.",
197
+ )
198
 
199
  now = datetime.now()
200
  three_months_later = now + timedelta(days=90) # Approximation of 3 months
 
202
  if event_datetime <= now:
203
  return False, "The selected date and time must be in the future."
204
  if event_datetime > three_months_later:
205
+ return (
206
+ False,
207
+ "The selected date and time must not be more than 3 months in the future.",
208
+ )
209
 
210
  return True, ""
211
 
212
 
213
  def get_next_meetup_time(target_weekday: int, target_hour: int) -> datetime:
214
  """
215
+ Calculate the next occurrence of a meetup based on the target weekday and hour.
216
+
217
+ Args:
218
+ target_weekday (int): The day of the week for the meetup, where Monday is 0
219
+ and Sunday is 6.
220
+ target_hour (int): The hour of the day for the meetup (24-hour format).
221
+
222
+ Returns:
223
+ datetime: A datetime object representing the next occurrence of the meetup
224
+ with the specified weekday and hour.
225
+
226
+ Raises:
227
+ ValueError: If `target_hour` is not between 0 and 23 inclusive.
228
+
229
  """
230
  start_dt = datetime.now()
231
 
 
234
 
235
  if days_ahead == 0:
236
  if start_dt.time() >= dt_time(target_hour, 0):
237
+ days_ahead = 7
238
  else:
239
+ days_ahead = 0
240
  elif days_ahead < 0:
241
+ days_ahead += 7
242
 
243
  next_dt = start_dt + timedelta(days=days_ahead)
244
  next_dt = next_dt.replace(hour=target_hour, minute=0, second=0, microsecond=0)
 
247
 
248
  def parse_time_to_minutes(time_str: str) -> int:
249
  """
250
+ Parses a time string and converts it to a total number of minutes.
 
 
 
 
 
 
 
 
 
 
251
 
252
  Args:
253
+ time_str (str): A string representing the time in hours and/or minutes.
254
 
255
  Returns:
256
+ int: The total number of minutes calculated from the given time string.
257
 
258
  Raises:
259
+ ValueError: If the time string is in an invalid format, or if negative values
260
+ or invalid values for hours or minutes are provided.
261
  """
262
+ pattern = r"^\s*(?:(\d+)\s*hod)?(?:\s*(\d+)\s*min)?\s*$"
263
  match = re.match(pattern, time_str, re.IGNORECASE)
264
+
265
  if not match:
266
  raise ValueError(f"Invalid time format: '{time_str}'")
267
+
268
  hours_str, minutes_str = match.groups()
269
+
270
  hours = int(hours_str) if hours_str else 0
271
  minutes = int(minutes_str) if minutes_str else 0
272
 
 
276
  raise ValueError("Minutes cannot be negative.")
277
  if minutes >= 60:
278
  raise ValueError("Minutes must be less than 60.")
279
+
280
  total_minutes = hours * 60 + minutes
281
  return total_minutes
282
 
283
+
284
  def get_total_minutes(from_stop: str, to_stop: str, dt: datetime) -> int:
285
  """
286
  Sends a POST request to the specified URL using Webshare's rotating proxy and parses the response to extract time in minutes.
 
297
  requests.HTTPError: If the HTTP request returned an unsuccessful status code.
298
  ValueError: If expected HTML elements are not found in the response.
299
  """
300
+
301
  if from_stop == to_stop:
302
  return 0
303
+
304
  day_abbreviations = {
305
+ 0: "po", # Monday -> po
306
+ 1: "út", # Tuesday -> út
307
+ 2: "st", # Wednesday -> st
308
+ 3: "čt", # Thursday -> čt
309
+ 4: "", # Friday -> pá
310
+ 5: "so", # Saturday -> so
311
+ 6: "ne", # Sunday -> ne
312
  }
313
+
314
  day = dt.day
315
  month = dt.month
316
  year = dt.year
317
+ weekday = dt.weekday()
318
+ abbreviation = day_abbreviations.get(weekday, "")
319
  date_str = f"{day}.{month}.{year} {abbreviation}"
320
+ time_str = dt.strftime("%H:%M")
321
+
 
322
  headers = {
323
+ "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
324
+ "accept-language": "en-US,en;q=0.9",
325
+ "cache-control": "max-age=0",
326
+ "content-type": "application/x-www-form-urlencoded",
327
+ "dnt": "1",
328
+ "origin": "https://idos.cz",
329
+ "priority": "u=0, i",
330
+ "referer": "https://idos.cz/pid/spojeni/",
331
  }
332
 
333
  data = [
334
+ ("From", from_stop),
335
+ ("positionACPosition", ""),
336
+ ("To", to_stop),
337
+ ("positionACPosition", ""),
338
+ ("AdvancedForm.Via[0]", ""),
339
+ ("AdvancedForm.ViaHidden[0]", ""),
340
+ ("Date", date_str),
341
+ ("Time", time_str),
342
+ ("IsArr", "True"),
343
  ]
344
 
345
+ url = "https://idos.cz/pid/spojeni/"
346
+
347
  proxy_domain = os.getenv("PROXY_DOMAIN")
348
  proxy_port = os.getenv("PROXY_PORT")
349
  proxy_username = os.getenv("PROXY_USERNAME")
350
  proxy_password = os.getenv("PROXY_PASSWORD")
351
 
 
352
  proxy_url = f"http://{proxy_username}:{proxy_password}@{proxy_domain}:{proxy_port}"
353
+
354
  proxies = {
355
+ "http": proxy_url,
356
+ "https": proxy_url,
357
  }
358
 
359
  try:
360
  if proxy_domain is None:
361
  response = requests.post(url, headers=headers, data=data, timeout=15)
362
  else:
363
+ response = requests.post(
364
+ url, headers=headers, data=data, proxies=proxies, timeout=15
365
+ )
366
+ response.raise_for_status()
367
  except requests.RequestException as e:
368
  raise requests.HTTPError(f"Failed to retrieve data from {url}.") from e
369
 
370
+ soup = BeautifulSoup(response.content, "html.parser")
371
+ connection_head = soup.find(class_="connection-head")
372
 
373
  if not connection_head:
374
  raise ValueError("No elements found with the class 'connection-head'.")
375
 
376
+ strong_tag = connection_head.find("strong")
377
 
378
  if not strong_tag:
379
+ raise ValueError(
380
+ "No <strong> tag found within the first 'connection-head' element."
381
+ )
382
 
383
  time_str_response = strong_tag.get_text(strip=True)
384
  total_minutes = parse_time_to_minutes(time_str_response)
385
  return total_minutes
386
 
387
+
388
+ @cached(cache=TTLCache(maxsize=10**6, ttl=24 * 60 * 60))
389
+ def get_total_minutes_with_retries(
390
+ from_stop: str,
391
+ to_stop: str,
392
+ dt: datetime,
393
+ max_retries: int = 3,
394
+ retry_delay: int = 2,
395
+ ) -> int:
396
+ """
397
+ Calculate the total travel time in minutes between two stops with retry functionality.
398
+
399
+ Parameters:
400
+ from_stop (str): The name of the starting stop.
401
+ to_stop (str): The name of the destination stop.
402
+ dt (datetime): The date and time for which the travel time is being calculated.
403
+ max_retries (int, optional): Maximum number of retry attempts if an error occurs. Default is 3.
404
+ retry_delay (int, optional): Delay in seconds between retry attempts. Default is 2 seconds.
405
+
406
+ Returns:
407
+ int: The total travel time in minutes if successful, or `None` if all attempts fail.
408
+ """
409
  attempt = 0
410
 
411
  while attempt < max_retries:
 
415
  except Exception as e:
416
  attempt += 1
417
  if attempt < max_retries:
418
+ print(
419
+ f"Error processing pair ({from_stop}, {to_stop}): {e}. Retrying in {retry_delay} seconds... (Attempt {attempt}/{max_retries})"
420
+ )
421
  time.sleep(retry_delay)
422
  else:
423
+ print(
424
+ f"Failed to process pair ({from_stop}, {to_stop}) after {max_retries} attempts."
425
+ )
426
  return None
427
+ return None
428
+
429
+
430
+ def get_actual_time_optimal_stop(
431
+ method: str,
432
+ selected_stops: List[str],
433
+ target_stops: List[str],
434
+ event_datetime: datetime,
435
+ show_top: int = 20,
436
+ ) -> pl.DataFrame:
437
+ """Calculate optimal stop times for a list of target stops.
438
+
439
+ Args:
440
+ method (str): The method for optimization. Can be 'minimize-worst-case' or 'minimize-total'.
441
+ selected_stops (List[str]): A list of selected stops to calculate travel times from.
442
+ target_stops (List[str]): A list of target stops to calculate travel times to.
443
+ event_datetime (datetime.datetime): The date and time of the event for which travel times are calculated.
444
+ show_top (int, optional): The number of top optimal stops to display, defaults to 20.
445
+
446
+ Returns:
447
+ polars.DataFrame: A DataFrame containing the calculated stop times, sorted according to the selected method.
448
+
449
+ Raises:
450
+ Exception: If there's an error processing any stop pair, it's logged, and the function continues.
451
+
452
+ """
453
 
 
454
  def process_target_stop(args):
455
  target_stop, selected_stops, event_datetime = args
456
  row = {"target_stop": target_stop}
457
  for si, from_stop in enumerate(selected_stops):
458
  try:
459
+ total_minutes = get_total_minutes_with_retries(
460
+ from_stop, target_stop, event_datetime
461
+ )
462
  row[f"total_minutes_{si}"] = total_minutes
463
  except Exception as e:
464
  print(f"Error processing pair ({from_stop}, {target_stop}): {e}")
465
  traceback.print_exc()
466
  row[f"total_minutes_{si}"] = None
467
  return row
468
+
469
  rows = []
470
+ arguments = [
471
+ (target_stop, selected_stops, event_datetime) for target_stop in target_stops
472
+ ]
473
  with ThreadPoolExecutor(max_workers=5) as executor:
474
+ futures = {
475
+ executor.submit(process_target_stop, arg): arg[0] for arg in arguments
476
+ }
477
  for future in tqdm(as_completed(futures), total=len(arguments)):
478
  try:
479
  result = future.result()
480
  rows.append(result)
481
  except Exception as e:
482
  print(f"An error occurred with target_stop={futures[future]}: {e}")
483
+
484
+ df_times = pl.DataFrame(rows).with_columns(
485
+ pl.max_horizontal(
486
+ *[f"total_minutes_{si}" for si in range(len(selected_stops))]
487
+ ).alias("worst_case_minutes"),
488
+ pl.sum_horizontal(
489
+ *[f"total_minutes_{si}" for si in range(len(selected_stops))]
490
+ ).alias("total_minutes"),
491
  )
492
 
493
  if method == "minimize-worst-case":
494
  df_times = df_times.sort("worst_case_minutes")
495
  elif method == "minimize-total":
496
  df_times = df_times.sort("total_minutes")
497
+
498
+ df_times = df_times.rename(
499
+ {"worst_case_minutes": "Worst Case Minutes", "total_minutes": "Total Minutes"}
500
+ )
501
  for si in range(len(selected_stops)):
502
  df_times = df_times.rename({f"total_minutes_{si}": f"t{si+1} mins"})
503
+
504
  df_times = df_times.drop_nulls()
505
+
506
  return df_times.head(show_top)
507
 
508
+
509
  def cerate_app():
510
  with gr.Blocks() as app:
511
  gr.Markdown("## Optimal Public Transport Stop Finder in Prague")
512
+ gr.Markdown(
513
+ """
514
  Consider you are in Prague and you want to meet with your friends. What is the optimal stop to meet? Now you can find that with this app!
515
 
516
+ Time table data are being scraped from IDOS API, IDOS uses PID timetable data."""
517
+ )
518
 
 
519
  number_of_stops = gr.Slider(
520
+ minimum=2, maximum=12, step=1, value=3, label="Number of People"
 
 
 
 
521
  )
522
 
 
523
  method = gr.Radio(
524
  choices=["Minimize worst case for each", "Minimize total time"],
525
  value="Minimize worst case for each",
526
+ label="Optimization Method",
527
  )
528
 
529
  next_dt = get_next_meetup_time(4, 20) # Friday 20:00
530
  next_date = next_dt.strftime("%d/%m/%Y")
531
  next_time = next_dt.strftime("%H:%M")
 
532
  date_input = gr.Textbox(
533
+ label="Date (DD/MM/YYYY)", placeholder=f"e.g., {next_date}", value=next_date
 
 
534
  )
535
 
 
536
  time_input = gr.Textbox(
537
+ label="Time (HH:MM)", placeholder=f"e.g., {next_time}", value=next_time
 
 
538
  )
539
 
 
540
  dropdowns = []
541
  for i in range(12):
542
  dd = gr.Dropdown(
543
+ choices=ALL_STOPS, label=f"Choose Starting Stop #{i+1}", visible=False
 
 
544
  )
545
  dropdowns.append(dd)
546
 
 
553
  updates.append(gr.update(visible=False))
554
  return updates
555
 
 
556
  number_of_stops.change(
557
+ fn=update_dropdowns, inputs=number_of_stops, outputs=dropdowns
 
 
558
  )
559
 
 
560
  search_button = gr.Button("Search")
561
 
562
+ def search_optimal_stop(
563
+ num_stops, chosen_method, date_str, time_str, *all_stops
564
+ ):
565
  is_valid, error_message = validate_date_time(date_str, time_str)
566
  if not is_valid:
567
  raise gr.Error(error_message)
568
+
 
569
  selected_stops = [stop for stop in all_stops[:num_stops] if stop]
570
  print("Number of stops:", num_stops)
571
  print("Method selected:", chosen_method)
572
  print("Selected stops:", selected_stops)
573
  print("Selected date:", date_str)
574
  print("Selected time:", time_str)
575
+
576
  if chosen_method == "Minimize worst case for each":
577
  method = "minimize-worst-case"
578
  else:
579
  method = "minimize-total"
580
+
 
 
581
  try:
582
+ event_datetime = datetime.strptime(
583
+ f"{date_str} {time_str}", "%d/%m/%Y %H:%M"
584
+ )
585
  print("Event DateTime:", event_datetime)
586
  except ValueError as e:
 
587
  raise gr.Error(f"Error parsing date and time: {e}")
588
+
589
+ target_stops = get_optimal_stop(method, selected_stops, show_top_geo=10, show_top_time=SHOW_TOP+10)
590
+ print(target_stops)
591
+ df_times = get_actual_time_optimal_stop(
592
+ method, selected_stops, target_stops, event_datetime, show_top=SHOW_TOP
593
+ )
594
  df_times = df_times.with_row_index("#", offset=1)
595
+
596
  return df_times
597
 
 
598
  results_table = gr.Dataframe(
599
  headers=["Target Stop", "Worst Case Minutes", "Total Minutes"],
600
  datatype=["str", "number", "str"],
601
+ label="Optimal Stops",
602
  )
603
 
 
604
  search_button.click(
605
  fn=search_optimal_stop,
606
  inputs=[number_of_stops, method, date_input, time_input] + dropdowns,
607
+ outputs=results_table,
608
  )
609
 
 
610
  app.load(
611
+ lambda: [gr.update(visible=True) for _ in range(3)]
612
+ + [gr.update(visible=False) for _ in range(9)],
613
  inputs=[],
614
+ outputs=dropdowns,
615
  )
616
+
617
  gr.Markdown("---")
618
+ gr.Markdown(
619
+ """
620
  Created by [Daniel Herman](https://www.hermandaniel.com), check out the code [detrin/pub-finder](https://github.com/detrin/pub-finder).
621
+ """
622
+ )
623
  return app
624
 
625
+
626
+ # print("Loading time table file ...")
627
+ # prague_stops = pl.read_csv("Prague_stops_geo.csv")
628
+ # print("Calculating distances between stops ...")
629
+ # stops_geo_dist = (
630
+ # prague_stops.join(prague_stops, how="cross")
631
+ # .with_columns(
632
+ # pl.struct(["lat", "lon", "lat_right", "lon_right"])
633
+ # .map_elements(
634
+ # lambda x: geopy.distance.geodesic(
635
+ # (x["lat"], x["lon"]), (x["lat_right"], x["lon_right"])
636
+ # ).km,
637
+ # return_dtype=pl.Float64,
638
+ # )
639
+ # .alias("distance_in_km")
640
+ # )
641
+ # .rename({"name": "from", "name_right": "to"})
642
+ # .select(["from", "to", "distance_in_km"])
643
+ # )
644
+
645
+ stops_geo_dist = pl.read_parquet("Prague_stops_combinations.parquet")
646
+ print(stops_geo_dist)
647
  DISTANCE_TABLE = stops_geo_dist
648
  from_stops = DISTANCE_TABLE["from"].unique().sort().to_list()
649
  to_stops = DISTANCE_TABLE["to"].unique().sort().to_list()
650
  ALL_STOPS = sorted(list(set(from_stops) & set(to_stops)))
651
  SHOW_TOP = 15
 
 
652
 
653
+ if __name__ == "__main__":
654
  app = cerate_app()
655
+ print("Starting app ...")
656
+ app.launch()