neuralworm commited on
Commit
cfc0aba
1 Parent(s): c6dfbe1

initial commit

Browse files
Files changed (1) hide show
  1. app.py +150 -253
app.py CHANGED
@@ -28,6 +28,7 @@ from fuzzywuzzy import fuzz
28
  import calendar
29
  import translation_utils
30
  import hashlib
 
31
 
32
  translation_utils.create_translation_table()
33
 
@@ -166,110 +167,29 @@ def calculate_gematria_sum(text, date_words):
166
  else:
167
  return None
168
 
169
- def perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, include_torah, include_bible, include_quran, include_hindu, include_tripitaka):
170
- if step == 0 or rounds_combination == "0,0":
171
- return None
172
-
173
- results = {}
174
- length = 0
175
-
176
- selected_language_long = tlang # From the Gradio dropdown (long form)
177
- tlang = LANGUAGES_SUPPORTED.get(selected_language_long) #Get the short code.
178
- if tlang is None: # Handle unsupported languages
179
- tlang = "en"
180
- logger.warning(f"Unsupported language selected: {selected_language_long}. Defaulting to English (en).")
181
-
182
- if include_torah:
183
- logger.debug(f"Arguments for Torah: {(1, 39, step, rounds_combination, length, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)}")
184
- results["Torah"] = cached_process_json_files(torah.process_json_files, 1, 39, step, rounds_combination, length, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)
185
- else:
186
- results["Torah"] = []
187
-
188
- if include_bible:
189
- results["Bible"] = cached_process_json_files(bible.process_json_files, 40, 66, step, rounds_combination, length, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)
190
- else:
191
- results["Bible"] = []
192
 
193
- if include_quran:
194
- results["Quran"] = cached_process_json_files(quran.process_json_files, 1, 114, step, rounds_combination, length, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)
195
- else:
196
- results["Quran"] = []
197
 
198
- if include_hindu:
199
- results["Rig Veda"] = cached_process_json_files(hindu.process_json_files, 1, 10, step, rounds_combination, length, tlang, False, strip_in_braces, strip_diacritics_chk)
200
- else:
201
- results["Rig Veda"] = []
 
 
 
 
 
202
 
203
- if include_tripitaka:
204
- results["Tripitaka"] = cached_process_json_files(tripitaka.process_json_files, 1, 52, step, rounds_combination, length, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)
205
- else:
206
- results["Tripitaka"] = []
 
207
 
208
- return results
209
-
210
-
211
- def add_24h_projection(results_dict): #Now takes a dictionary of results
212
- for book_name, results in results_dict.items(): # Iterate per book
213
- num_results = len(results)
214
- if num_results > 0:
215
- time_interval = timedelta(minutes=24 * 60 / num_results)
216
- current_time = datetime.min.time()
217
- for i in range(num_results):
218
- next_time = (datetime.combine(datetime.min, current_time) + time_interval).time()
219
- time_range_str = f"{current_time.strftime('%H:%M')}-{next_time.strftime('%H:%M')}"
220
- results[i]['24h Projection'] = time_range_str
221
- current_time = next_time
222
- return results_dict
223
-
224
-
225
- def add_monthly_projection(results_dict, selected_date):
226
- if selected_date is None:
227
- return results_dict # Return if no date is selected
228
-
229
- for book_name, results in results_dict.items(): # Iterate per book
230
- num_results = len(results)
231
- if num_results > 0:
232
- days_in_month = calendar.monthrange(selected_date.year, selected_date.month)[1]
233
- total_seconds = (days_in_month - 1) * 24 * 3600
234
- seconds_interval = total_seconds / num_results
235
- start_datetime = datetime(selected_date.year, selected_date.month, 1)
236
- current_datetime = start_datetime
237
-
238
-
239
- for i in range(num_results):
240
- next_datetime = current_datetime + timedelta(seconds=seconds_interval)
241
- current_date = current_datetime.date() # Moved assignment inside loop
242
- next_date = next_datetime.date()
243
- date_range_str = f"{current_date.strftime('%h %d')} - {next_date.strftime('%h %d')}"
244
- results[i]['Monthly Projection'] = date_range_str
245
- current_datetime = next_datetime # Add this
246
- current_date = next_datetime.date() # Add this too
247
- return results_dict
248
-
249
-
250
- def add_yearly_projection(results_dict, selected_date): #Correct name, handle dictionary input
251
- if selected_date is None:
252
- return results_dict # Return if no date is selected
253
-
254
- for book_name, results in results_dict.items(): # Iterate per book
255
- num_results = len(results)
256
- if num_results > 0:
257
- days_in_year = 366 if calendar.isleap(selected_date.year) else 365
258
- total_seconds = (days_in_year - 1) * 24 * 3600
259
- seconds_interval = total_seconds / num_results
260
- start_datetime = datetime(selected_date.year, 1, 1)
261
- current_datetime = start_datetime
262
-
263
-
264
- for i in range(num_results):
265
- next_datetime = current_datetime + timedelta(seconds=seconds_interval)
266
- current_date = current_datetime.date() # Move assignment inside loop
267
- next_date = next_datetime.date()
268
- date_range_str = f"{current_date.strftime('%b %d')} - {next_date.strftime('%b %d')}"
269
- results[i]['Yearly Projection'] = date_range_str
270
- current_datetime = next_datetime # Update current datetime for next iteration
271
-
272
- return results_dict
273
 
274
 
275
  def sort_results(results):
@@ -290,24 +210,17 @@ with gr.Blocks() as app:
290
  with gr.Column():
291
  with gr.Row():
292
  tlang = create_language_dropdown("Target Language for Result Translation", default_value='english')
293
- selected_date = Calendar(type="datetime", label="Date to investigate (optional)", info="Pick a date from the calendar")
 
294
  use_day = gr.Checkbox(label="Use Day", info="Check to include day in search", value=True)
295
  use_month = gr.Checkbox(label="Use Month", info="Check to include month in search", value=True)
296
  use_year = gr.Checkbox(label="Use Year", info="Check to include year in search", value=True)
297
  date_language_input = create_language_dropdown("Language of the person/topic (optional) (Date Word Language)", default_value='english')
298
  with gr.Row():
299
  gematria_text = gr.Textbox(label="Name and/or Topic (required)", value="Hans Albert Einstein Mileva Marity-Einstein")
300
- date_words_output = gr.Textbox(label="Date in Words Translated (optional)")
301
- gematria_result = gr.Number(label="Journal Sum")
302
- #with gr.Row():
303
 
304
 
305
  with gr.Row():
306
- step = gr.Number(label="Jump Width (Steps) for ELS")
307
- float_step = gr.Number(visible=False, value=1)
308
- half_step_btn = gr.Button("Steps / 2")
309
- double_step_btn = gr.Button("Steps * 2")
310
-
311
  with gr.Column():
312
  round_x = gr.Number(label="Round (1)", value=1)
313
  round_y = gr.Number(label="Round (2)", value=-1)
@@ -335,48 +248,9 @@ with gr.Blocks() as app:
335
 
336
  # --- Event Handlers ---
337
 
338
- def update_date_words(selected_date, date_language_input, use_day, use_month, use_year):
339
- if selected_date is None:
340
- return ""
341
-
342
- if not use_year and not use_month and not use_day:
343
- return translate_date_to_words(selected_date, date_language_input)
344
-
345
- year = selected_date.year if use_year else None
346
- month = selected_date.month if use_month else None
347
- day = selected_date.day if use_day else None
348
-
349
- if year is not None and month is not None and day is not None:
350
- date_obj = selected_date
351
- elif year is not None and month is not None:
352
- date_obj = str(f"{year}-{month}")
353
- elif year is not None:
354
- date_obj = str(f"{year}")
355
- else: # Return empty string if no date components are selected
356
- return ""
357
-
358
-
359
- date_in_words = date_to_words(date_obj)
360
-
361
-
362
- translator = GoogleTranslator(source='auto', target=date_language_input)
363
- translated_date_words = translator.translate(date_in_words)
364
- return custom_normalize(translated_date_words)
365
-
366
- def update_journal_sum(gematria_text, date_words_output):
367
- sum_value = calculate_gematria_sum(gematria_text, date_words_output)
368
- return sum_value, sum_value, sum_value
369
-
370
  def update_rounds_combination(round_x, round_y):
371
  return f"{int(round_x)},{int(round_y)}"
372
 
373
- def update_step_half(float_step):
374
- new_step = math.ceil(float_step / 2)
375
- return new_step, float_step / 2
376
-
377
- def update_step_double(float_step):
378
- new_step = math.ceil(float_step * 2)
379
- return new_step, float_step * 2
380
 
381
 
382
  def find_closest_phrase(target_phrase, phrases):
@@ -401,98 +275,146 @@ with gr.Blocks() as app:
401
  logging.debug(f"Closest phrase found: {best_match} with score: {best_score}") # Log the best match
402
  return best_match
403
 
404
- def perform_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, include_torah, include_bible, include_quran, include_hindu, include_tripitaka, gematria_text, date_words_output, selected_date):
405
- # Inside perform_search
406
- els_results = perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces,
407
- strip_diacritics_chk, include_torah, include_bible, include_quran,
408
- include_hindu,
409
- include_tripitaka)
410
 
411
- # --- Network Search Integration ---
 
 
 
412
  most_frequent_phrases = {}
413
- combined_and_sorted_results = [] # Combined list to hold all results
414
-
415
- for book_name, book_results in els_results.items():
416
- if book_results: # Add this check to ensure book_results is not empty
417
- most_frequent_phrases[book_name] = "" # Default value
418
-
419
- for result in book_results:
420
- try:
421
- gematria_sum = calculate_gematria(result['result_text']) # Calculate gematria
422
- max_words = len(result['result_text'].split())
423
- matching_phrases = search_gematria_in_db(gematria_sum, max_words)
424
- max_words_limit = 20
425
- while not matching_phrases and max_words < max_words_limit: # Increase max_words for more results
426
- max_words += 1
427
- matching_phrases = search_gematria_in_db(gematria_sum, max_words)
428
-
429
- if matching_phrases:
430
- most_frequent_phrase = get_most_frequent_phrase(matching_phrases)
431
- most_frequent_phrases[book_name] = most_frequent_phrase
432
- else:
433
- closest_phrase = find_closest_phrase(result['result_text'],
434
- search_gematria_in_db(gematria_sum, max_words_limit))
435
- most_frequent_phrases[
436
- book_name] = closest_phrase or "" # Update most frequent phrases even if no phrase found
437
-
438
- result['Most Frequent Phrase'] = most_frequent_phrases[book_name]
439
- if 'book' in result:
440
- if isinstance(result['book'], int): # Torah, Bible, Quran case
441
- result['book'] = f"{book_name} {result['book']}."
442
- combined_and_sorted_results.append(result)
443
-
444
- except KeyError as e:
445
- print(f"DEBUG: KeyError - Key '{e.args[0]}' not found in result. Skipping this result.")
446
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
447
 
448
  # --- Batch Translation ---
 
449
  selected_language_long = tlang
450
  tlang_short = LANGUAGES_SUPPORTED.get(selected_language_long)
451
  if tlang_short is None:
452
  tlang_short = "en"
453
- logger.warning(f"Unsupported language selected: {selected_language_long}. Defaulting to English (en).")
 
454
 
455
- # Prepare lists for batch translation, including source language
456
  phrases_to_translate = []
457
- phrases_source_langs = [] # Source languages for phrases
458
  results_to_translate = []
459
- results_source_langs = [] # Source languages for results
 
460
  for result in combined_and_sorted_results:
461
  phrases_to_translate.append(result.get('Most Frequent Phrase', ''))
462
  phrases_source_langs.append(result.get("source_language", "auto"))
463
  results_to_translate.append(result.get('result_text', ''))
464
  results_source_langs.append(result.get("source_language", "auto"))
465
 
466
-
467
  translated_phrases = translation_utils.batch_translate(phrases_to_translate, tlang_short, phrases_source_langs)
468
  translated_result_texts = translation_utils.batch_translate(results_to_translate, tlang_short, results_source_langs)
469
 
470
-
471
  for i, result in enumerate(combined_and_sorted_results):
472
- result['translated_text'] = translated_result_texts.get(results_to_translate[i], None)
473
- result['Translated Most Frequent Phrase'] = translated_phrases.get(phrases_to_translate[i], None)
 
474
 
475
- # Time Projections (using els_results dictionary)
476
- updated_els_results = add_24h_projection(els_results) # Use original els_results dictionary
477
- updated_els_results = add_monthly_projection(updated_els_results, selected_date) # Call correct functions with correct params
478
- updated_els_results = add_yearly_projection(updated_els_results, selected_date)
479
 
480
- combined_and_sorted_results = []
481
- for book_results in updated_els_results.values(): # Combine results for dataframe and json
482
- combined_and_sorted_results.extend(book_results)
483
- combined_and_sorted_results = sort_results(combined_and_sorted_results) # sort combined results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
484
 
485
  df = pd.DataFrame(combined_and_sorted_results)
486
  df.index = range(1, len(df) + 1)
487
  df.reset_index(inplace=True)
488
  df.rename(columns={'index': 'Result Number'}, inplace=True)
489
 
490
- for i, result in enumerate(combined_and_sorted_results): # Iterate through the combined list
491
- result['Result Number'] = i + 1
492
-
493
  search_config = {
494
- "step": step,
495
- "rounds_combination": rounds_combination,
496
  "target_language": tlang,
497
  "strip_spaces": strip_spaces,
498
  "strip_in_braces": strip_in_braces,
@@ -503,72 +425,47 @@ with gr.Blocks() as app:
503
  "include_hindu": include_hindu,
504
  "include_tripitaka": include_tripitaka,
505
  "gematria_text": gematria_text,
506
- "date_words": date_words_output
 
507
  }
508
 
509
  output_data = {
510
  "search_configuration": search_config,
511
- "results": combined_and_sorted_results # Use the combined list here
512
  }
513
 
514
  json_data = output_data
515
-
516
- # --- Return results ---
517
  combined_most_frequent = "\n".join(
518
- f"{book}: {phrase}" for book, phrase in most_frequent_phrases.items()) # Combine phrases
519
- return df, combined_most_frequent, json_data
520
 
 
 
 
521
 
 
 
 
 
522
 
523
  # --- Event Triggers ---
524
  round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
525
  round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
526
 
527
- selected_date.change(update_date_words, inputs=[selected_date, date_language_input, use_day, use_month, use_year], outputs=[date_words_output])
528
- date_language_input.change(update_date_words, inputs=[selected_date, date_language_input, use_day, use_month, use_year], outputs=[date_words_output])
529
 
530
- gematria_text.change(update_journal_sum, inputs=[gematria_text, date_words_output], outputs=[gematria_result, step, float_step])
531
- date_words_output.change(update_journal_sum, inputs=[gematria_text, date_words_output], outputs=[gematria_result, step, float_step])
 
 
 
 
532
 
533
- half_step_btn.click(update_step_half, inputs=[float_step], outputs=[step, float_step])
534
- double_step_btn.click(update_step_double, inputs=[float_step], outputs=[step, float_step])
535
 
536
  translate_btn.click(
537
  perform_search,
538
- inputs=[step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, include_torah_chk, include_bible_chk, include_quran_chk, include_hindu_chk, include_tripitaka_chk, gematria_text, date_words_output, selected_date],
539
  outputs=[markdown_output, most_frequent_phrase_output, json_output]
540
  )
541
 
542
- app.load(
543
- update_date_words,
544
- inputs=[selected_date, date_language_input, use_day, use_month, use_year], # Include all 5 inputs
545
- outputs=[date_words_output]
546
- )
547
-
548
- use_day.change(
549
- update_date_words,
550
- inputs=[selected_date, date_language_input, use_day, use_month, use_year],
551
- outputs=[date_words_output]
552
- )
553
- use_month.change(
554
- update_date_words,
555
- inputs=[selected_date, date_language_input, use_day, use_month, use_year],
556
- outputs=[date_words_output]
557
- )
558
- use_year.change(
559
- update_date_words,
560
- inputs=[selected_date, date_language_input, use_day, use_month, use_year],
561
- outputs=[date_words_output]
562
- )
563
-
564
- def checkbox_behavior(use_day_value, use_month_value):
565
- if use_day_value: # Tick month and year automatically when day is ticked.
566
- return True, True
567
-
568
- return use_month_value, True # return month value unchanged and automatically tick year if month is checked
569
-
570
- use_day.change(checkbox_behavior, inputs=[use_day, use_month], outputs=[use_month, use_year])
571
- use_month.change(checkbox_behavior, inputs=[use_day, use_month], outputs=[use_month, use_year]) #No need for use_day here, day won't be changed by month
572
 
573
 
574
  if __name__ == "__main__":
 
28
  import calendar
29
  import translation_utils
30
  import hashlib
31
+ import time
32
 
33
  translation_utils.create_translation_table()
34
 
 
167
  else:
168
  return None
169
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
+ def add_24h_projection(results_dict, date_str): # Add date_str as parameter
172
+ combined_results = []
173
+ for book_name, results in results_dict.items():
174
+ combined_results.extend(results)
175
 
176
+ num_results = len(combined_results)
177
+ if num_results > 0:
178
+ time_interval = timedelta(minutes=24 * 60 / num_results)
179
+ current_datetime = datetime.combine(datetime.today(), datetime.min.time())
180
+ for i in range(num_results):
181
+ next_datetime = current_datetime + time_interval
182
+ time_range_str = f"{current_datetime.strftime('%H:%M')}-{next_datetime.strftime('%H:%M')}"
183
+ combined_results[i]['24h Projection'] = time_range_str
184
+ current_datetime = next_datetime
185
 
186
+ # Re-organize results back into their book dictionaries
187
+ reorganized_results = defaultdict(list)
188
+ for result in combined_results:
189
+ book_name = result.get('book', 'Unknown') #Get book name to reorganize
190
+ reorganized_results[book_name].append(result)
191
 
192
+ return reorganized_results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
 
195
  def sort_results(results):
 
210
  with gr.Column():
211
  with gr.Row():
212
  tlang = create_language_dropdown("Target Language for Result Translation", default_value='english')
213
+ start_date_range = Calendar(type="datetime", label="Start Date for ELS")
214
+ end_date_range = Calendar(type="datetime", label="End Date for ELS")
215
  use_day = gr.Checkbox(label="Use Day", info="Check to include day in search", value=True)
216
  use_month = gr.Checkbox(label="Use Month", info="Check to include month in search", value=True)
217
  use_year = gr.Checkbox(label="Use Year", info="Check to include year in search", value=True)
218
  date_language_input = create_language_dropdown("Language of the person/topic (optional) (Date Word Language)", default_value='english')
219
  with gr.Row():
220
  gematria_text = gr.Textbox(label="Name and/or Topic (required)", value="Hans Albert Einstein Mileva Marity-Einstein")
 
 
 
221
 
222
 
223
  with gr.Row():
 
 
 
 
 
224
  with gr.Column():
225
  round_x = gr.Number(label="Round (1)", value=1)
226
  round_y = gr.Number(label="Round (2)", value=-1)
 
248
 
249
  # --- Event Handlers ---
250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  def update_rounds_combination(round_x, round_y):
252
  return f"{int(round_x)},{int(round_y)}"
253
 
 
 
 
 
 
 
 
254
 
255
 
256
  def find_closest_phrase(target_phrase, phrases):
 
275
  logging.debug(f"Closest phrase found: {best_match} with score: {best_score}") # Log the best match
276
  return best_match
277
 
 
 
 
 
 
 
278
 
279
+ def perform_search(rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, include_torah, include_bible, include_quran, include_hindu, include_tripitaka, gematria_text, start_date, end_date, date_language_input):
280
+ overall_start_time = time.time()
281
+
282
+ combined_and_sorted_results = []
283
  most_frequent_phrases = {}
284
+
285
+ current_date = start_date
286
+ while current_date <= end_date:
287
+ date_str = current_date.strftime("%Y-%m-%d")
288
+ date_words = translate_date_to_words(current_date, date_language_input)
289
+
290
+ step = calculate_gematria_sum(gematria_text, date_words)
291
+ logger.debug(f"Calculated step for {date_str}: {step}")
292
+
293
+ if step != 0 and rounds_combination != "0,0":
294
+ # Process for the current date
295
+ els_results_single_date = {}
296
+ if include_torah:
297
+ els_results_single_date["Torah"] = cached_process_json_files(torah.process_json_files, 1, 39, step,
298
+ rounds_combination, 0, tlang, strip_spaces,
299
+ strip_in_braces, strip_diacritics_chk)
300
+ if include_bible:
301
+ els_results_single_date["Bible"] = cached_process_json_files(bible.process_json_files, 40, 66, step,
302
+ rounds_combination, 0, tlang, strip_spaces,
303
+ strip_in_braces, strip_diacritics_chk)
304
+ if include_quran:
305
+ els_results_single_date["Quran"] = cached_process_json_files(quran.process_json_files, 1, 114, step,
306
+ rounds_combination, 0, tlang, strip_spaces,
307
+ strip_in_braces, strip_diacritics_chk)
308
+ if include_hindu:
309
+ els_results_single_date["Rig Veda"] = cached_process_json_files(hindu.process_json_files, 1, 10, step,
310
+ rounds_combination, 0, tlang, False,
311
+ strip_in_braces, strip_diacritics_chk)
312
+ if include_tripitaka:
313
+ els_results_single_date["Tripitaka"] = cached_process_json_files(tripitaka.process_json_files, 1, 52,
314
+ step, rounds_combination, 0, tlang,
315
+ strip_spaces, strip_in_braces,
316
+ strip_diacritics_chk)
317
+
318
+ # Add 24h projection *before* iterating through books
319
+ els_results_single_date = add_24h_projection(els_results_single_date, date_str)
320
+
321
+ for book_name, book_results in els_results_single_date.items():
322
+ logger.debug(f"Processing results for book: {book_name}")
323
+ if book_results:
324
+ most_frequent_phrases[book_name] = ""
325
+ for result in book_results:
326
+ try:
327
+ gematria_sum = calculate_gematria(result['result_text'])
328
+ max_words = len(result['result_text'].split())
329
+ matching_phrases = search_gematria_in_db(gematria_sum, max_words)
330
+ max_words_limit = 20
331
+ while not matching_phrases and max_words < max_words_limit:
332
+ max_words += 1
333
+ matching_phrases = search_gematria_in_db(gematria_sum, max_words)
334
+
335
+ if matching_phrases:
336
+ most_frequent_phrase = get_most_frequent_phrase(matching_phrases)
337
+ most_frequent_phrases[book_name] = most_frequent_phrase
338
+ else:
339
+ closest_phrase = find_closest_phrase(result['result_text'],
340
+ search_gematria_in_db(gematria_sum,
341
+ max_words_limit))
342
+ most_frequent_phrases[book_name] = closest_phrase or ""
343
+
344
+ result['Most Frequent Phrase'] = most_frequent_phrases[book_name]
345
+ result['date'] = date_str
346
+ if 'book' in result:
347
+ if isinstance(result['book'], int):
348
+ result['book'] = f"{book_name} {result['book']}."
349
+
350
+ except KeyError as e:
351
+ print(f"DEBUG: KeyError - Key '{e.args[0]}' not found in result. Skipping this result.")
352
+ continue
353
+
354
+
355
+ combined_and_sorted_results.extend(book_results)
356
+
357
+
358
+
359
+ current_date += timedelta(days=1)
360
 
361
  # --- Batch Translation ---
362
+ translation_start_time = time.time()
363
  selected_language_long = tlang
364
  tlang_short = LANGUAGES_SUPPORTED.get(selected_language_long)
365
  if tlang_short is None:
366
  tlang_short = "en"
367
+ logger.warning(
368
+ f"Unsupported language selected: {selected_language_long}. Defaulting to English (en).")
369
 
 
370
  phrases_to_translate = []
371
+ phrases_source_langs = []
372
  results_to_translate = []
373
+ results_source_langs = []
374
+
375
  for result in combined_and_sorted_results:
376
  phrases_to_translate.append(result.get('Most Frequent Phrase', ''))
377
  phrases_source_langs.append(result.get("source_language", "auto"))
378
  results_to_translate.append(result.get('result_text', ''))
379
  results_source_langs.append(result.get("source_language", "auto"))
380
 
 
381
  translated_phrases = translation_utils.batch_translate(phrases_to_translate, tlang_short, phrases_source_langs)
382
  translated_result_texts = translation_utils.batch_translate(results_to_translate, tlang_short, results_source_langs)
383
 
 
384
  for i, result in enumerate(combined_and_sorted_results):
385
+ result['translated_text'] = translated_result_texts.get(results_to_translate[i], result.get('result_text', ''))
386
+ result['Translated Most Frequent Phrase'] = translated_phrases.get(phrases_to_translate[i],
387
+ result.get('Most Frequent Phrase', ''))
388
 
389
+ translation_end_time = time.time()
390
+ logger.debug(f"Batch translation took: {translation_end_time - translation_start_time} seconds")
 
 
391
 
392
+ # --- Time projections ---
393
+ time_projections_start_time = time.time()
394
+ for result in combined_and_sorted_results:
395
+ selected_date = datetime.strptime(result['date'], '%Y-%m-%d')
396
+ book_name = result.get('book', 'Unknown')
397
+ projection_input = {book_name: [result]}
398
+
399
+ updated_date_results = add_24h_projection(projection_input, result['date'])
400
+
401
+ result.update(updated_date_results[book_name][0])
402
+
403
+ combined_and_sorted_results = sort_results(combined_and_sorted_results)
404
+ time_projections_end_time = time.time()
405
+ logger.debug(
406
+ f"Time projections took: {time_projections_end_time - time_projections_start_time} seconds")
407
+
408
+ # --- Dataframe and JSON creation ---
409
+ dataframe_json_start_time = time.time()
410
 
411
  df = pd.DataFrame(combined_and_sorted_results)
412
  df.index = range(1, len(df) + 1)
413
  df.reset_index(inplace=True)
414
  df.rename(columns={'index': 'Result Number'}, inplace=True)
415
 
 
 
 
416
  search_config = {
417
+ "rounds_combination": rounds_combination, # No more 'step'
 
418
  "target_language": tlang,
419
  "strip_spaces": strip_spaces,
420
  "strip_in_braces": strip_in_braces,
 
425
  "include_hindu": include_hindu,
426
  "include_tripitaka": include_tripitaka,
427
  "gematria_text": gematria_text,
428
+ "start_date": start_date.strftime("%Y-%m-%d"),
429
+ "end_date": end_date.strftime("%Y-%m-%d")
430
  }
431
 
432
  output_data = {
433
  "search_configuration": search_config,
434
+ "results": combined_and_sorted_results
435
  }
436
 
437
  json_data = output_data
 
 
438
  combined_most_frequent = "\n".join(
439
+ f"{book}: {phrase}" for book, phrase in most_frequent_phrases.items() if phrase)
 
440
 
441
+ dataframe_json_end_time = time.time()
442
+ logger.debug(
443
+ f"Dataframe and JSON creation took: {dataframe_json_end_time - dataframe_json_start_time} seconds")
444
 
445
+ overall_end_time = time.time()
446
+ logger.debug(f"Overall process took: {overall_end_time - overall_start_time} seconds")
447
+
448
+ return df, combined_most_frequent, json_data
449
 
450
  # --- Event Triggers ---
451
  round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
452
  round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
453
 
 
 
454
 
455
+ def update_rounds_combination(round_x, round_y):
456
+ return f"{int(round_x)},{int(round_y)}"
457
+
458
+
459
+ round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
460
+ round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
461
 
 
 
462
 
463
  translate_btn.click(
464
  perform_search,
465
+ inputs=[rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, include_torah_chk, include_bible_chk, include_quran_chk, include_hindu_chk, include_tripitaka_chk, gematria_text, start_date_range, end_date_range, date_language_input],
466
  outputs=[markdown_output, most_frequent_phrase_output, json_output]
467
  )
468
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
469
 
470
 
471
  if __name__ == "__main__":