neuralworm commited on
Commit
4eaf9a0
·
1 Parent(s): fc031cb

add search interface

Browse files
Files changed (1) hide show
  1. app.py +347 -140
app.py CHANGED
@@ -1,6 +1,6 @@
1
- #TODO: Quran results have numbers
2
-
3
  import logging
 
 
4
  logger = logging.getLogger(__name__)
5
  logging.basicConfig(level=logging.INFO)
6
 
@@ -23,8 +23,8 @@ import re
23
  import sqlite3
24
  from collections import defaultdict
25
  from typing import List, Tuple
26
- import rich
27
- from fuzzywuzzy import fuzz
28
  import calendar
29
  import translation_utils
30
  import hashlib
@@ -33,9 +33,9 @@ translation_utils.create_translation_table()
33
 
34
  # Create a translator instance *once* globally
35
  translator = GoogleTranslator(source='auto', target='auto')
36
- LANGUAGES_SUPPORTED = translator.get_supported_languages(as_dict=True) # Corrected dictionary name
37
 
38
- LANGUAGE_CODE_MAP = LANGUAGES_SUPPORTED # Use deep_translator's mapping directly
39
 
40
  # --- Constants ---
41
  DATABASE_FILE = 'gematria.db'
@@ -49,7 +49,7 @@ def create_els_cache_table():
49
  if not os.path.exists(ELS_CACHE_DB):
50
  with sqlite3.connect(ELS_CACHE_DB) as conn:
51
  conn.execute('''
52
- CREATE TABLE els_cache (
53
  query_hash TEXT PRIMARY KEY,
54
  function_name TEXT,
55
  args TEXT,
@@ -111,7 +111,6 @@ def get_query_hash(func, args, kwargs):
111
  key = (func.__name__, args, kwargs)
112
  return hashlib.sha256(json.dumps(key).encode()).hexdigest()
113
 
114
-
115
  def cached_process_json_files(func, *args, **kwargs):
116
  # Create a dictionary to store the parameters
117
  params = {
@@ -154,15 +153,15 @@ def cached_process_json_files(func, *args, **kwargs):
154
  try:
155
  with sqlite3.connect(ELS_CACHE_DB, timeout=DATABASE_TIMEOUT) as conn:
156
  cursor = conn.cursor()
157
- cursor.execute("INSERT INTO els_cache (query_hash, function_name, args, kwargs, results) VALUES (?, ?, ?, ?, ?)",
158
- (query_hash, params["function"], params_json, json.dumps({}), json.dumps(results)))
 
159
  conn.commit()
160
  except sqlite3.Error as e:
161
  logger.error(f"Database error caching results: {e}")
162
 
163
  return results
164
 
165
-
166
  # --- Helper Functions (from Network app.py) ---
167
  def flatten_text(text: List) -> str:
168
  if isinstance(text, list):
@@ -185,13 +184,13 @@ def get_most_frequent_phrase(results):
185
  phrase_counts = defaultdict(int)
186
  for words, book, chapter, verse, phrase_length, word_position in results:
187
  phrase_counts[words] += 1
188
- most_frequent_phrase = max(phrase_counts, key=phrase_counts.get) if phrase_counts else None # Handle empty results
189
  return most_frequent_phrase
190
 
191
  # --- Functions from BOS app.py ---
192
- def create_language_dropdown(label, default_value='English', show_label=True): # Default value must be in LANGUAGE_CODE_MAP
193
  return gr.Dropdown(
194
- choices=list(LANGUAGE_CODE_MAP.keys()), # Correct choices
195
  label=label,
196
  value=default_value,
197
  show_label=show_label
@@ -210,22 +209,21 @@ def calculate_gematria_sum(text, date_words):
210
  else:
211
  return None
212
 
213
- def perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, include_torah, include_bible, include_quran, include_hindu, include_tripitaka):
 
214
  if step == 0 or rounds_combination == "0,0":
215
  return None
216
 
217
  results = {}
218
  length = 0
219
 
220
- selected_language_long = tlang # From the Gradio dropdown (long form)
221
- # Get the short code.
222
  tlang = LANGUAGES_SUPPORTED.get(selected_language_long)
223
- if tlang is None: # Handle unsupported languages
224
  tlang = "en"
225
  logger.warning(
226
  f"Unsupported language selected: {selected_language_long}. Defaulting to English (en).")
227
 
228
- # Cache Update: Pass parameters individually
229
  if include_torah:
230
  logger.debug(
231
  f"Arguments for Torah: {(1, 39, step, rounds_combination, length, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)}")
@@ -235,35 +233,37 @@ def perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_b
235
  results["Torah"] = []
236
 
237
  if include_bible:
238
- results["Bible"] = cached_process_json_files(bible.process_json_files, 40, 66, step, rounds_combination, length,
 
239
  tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)
240
  else:
241
  results["Bible"] = []
242
 
243
  if include_quran:
244
- results["Quran"] = cached_process_json_files(quran.process_json_files, 1, 114, step, rounds_combination, length,
 
245
  tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)
246
  else:
247
  results["Quran"] = []
248
 
249
  if include_hindu:
250
  results["Rig Veda"] = cached_process_json_files(
251
- hindu.process_json_files, 1, 10, step, rounds_combination, length, tlang, False, strip_in_braces, strip_diacritics_chk)
 
252
  else:
253
  results["Rig Veda"] = []
254
 
255
  if include_tripitaka:
256
  results["Tripitaka"] = cached_process_json_files(
257
- tripitaka.process_json_files, 1, 52, step, rounds_combination, length, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)
 
258
  else:
259
  results["Tripitaka"] = []
260
 
261
  return results
262
 
263
-
264
-
265
- def add_24h_projection(results_dict): #Now takes a dictionary of results
266
- for book_name, results in results_dict.items(): # Iterate per book
267
  num_results = len(results)
268
  if num_results > 0:
269
  time_interval = timedelta(minutes=24 * 60 / num_results)
@@ -275,12 +275,11 @@ def add_24h_projection(results_dict): #Now takes a dictionary of results
275
  current_time = next_time
276
  return results_dict
277
 
278
-
279
  def add_monthly_projection(results_dict, selected_date):
280
  if selected_date is None:
281
- return results_dict # Return if no date is selected
282
 
283
- for book_name, results in results_dict.items(): # Iterate per book
284
  num_results = len(results)
285
  if num_results > 0:
286
  days_in_month = calendar.monthrange(selected_date.year, selected_date.month)[1]
@@ -289,23 +288,21 @@ def add_monthly_projection(results_dict, selected_date):
289
  start_datetime = datetime(selected_date.year, selected_date.month, 1)
290
  current_datetime = start_datetime
291
 
292
-
293
  for i in range(num_results):
294
  next_datetime = current_datetime + timedelta(seconds=seconds_interval)
295
- current_date = current_datetime.date() # Moved assignment inside loop
296
  next_date = next_datetime.date()
297
  date_range_str = f"{current_date.strftime('%h %d')} - {next_date.strftime('%h %d')}"
298
  results[i]['Monthly Projection'] = date_range_str
299
- current_datetime = next_datetime # Add this
300
- current_date = next_datetime.date() # Add this too
301
  return results_dict
302
 
303
-
304
- def add_yearly_projection(results_dict, selected_date): #Correct name, handle dictionary input
305
  if selected_date is None:
306
- return results_dict # Return if no date is selected
307
 
308
- for book_name, results in results_dict.items(): # Iterate per book
309
  num_results = len(results)
310
  if num_results > 0:
311
  days_in_year = 366 if calendar.isleap(selected_date.year) else 365
@@ -314,80 +311,293 @@ def add_yearly_projection(results_dict, selected_date): #Correct name, handle di
314
  start_datetime = datetime(selected_date.year, 1, 1)
315
  current_datetime = start_datetime
316
 
317
-
318
  for i in range(num_results):
319
  next_datetime = current_datetime + timedelta(seconds=seconds_interval)
320
- current_date = current_datetime.date() # Move assignment inside loop
321
  next_date = next_datetime.date()
322
  date_range_str = f"{current_date.strftime('%b %d')} - {next_date.strftime('%b %d')}"
323
  results[i]['Yearly Projection'] = date_range_str
324
- current_datetime = next_datetime # Update current datetime for next iteration
325
 
326
  return results_dict
327
 
328
-
329
  def sort_results(results):
330
  def parse_time(time_str):
331
  try:
332
  hours, minutes = map(int, time_str.split(':'))
333
- return hours * 60 + minutes # Convert to total minutes
334
  except ValueError:
335
- return 24 * 60 # Sort invalid times to the end
336
 
337
  return sorted(results, key=lambda x: (
338
- parse_time(x.get('24h Projection', '23:59').split('-')[0]), # Sort by start time first
339
- parse_time(x.get('24h Projection', '23:59').split('-')[1]) # Then by end time
340
  ))
341
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  # --- Main Gradio App ---
343
  with gr.Blocks() as app:
344
- with gr.Column():
345
- with gr.Row():
346
- tlang = create_language_dropdown("Target Language for Result Translation", default_value='english')
347
- selected_date = Calendar(type="datetime", label="Date to investigate (optional)", info="Pick a date from the calendar")
348
- use_day = gr.Checkbox(label="Use Day", info="Check to include day in search", value=True)
349
- use_month = gr.Checkbox(label="Use Month", info="Check to include month in search", value=True)
350
- use_year = gr.Checkbox(label="Use Year", info="Check to include year in search", value=True)
351
- date_language_input = create_language_dropdown("Language of the person/topic (optional) (Date Word Language)", default_value='english')
352
- with gr.Row():
353
- gematria_text = gr.Textbox(label="Name and/or Topic (required)", value="Hans Albert Einstein Mileva Marity-Einstein")
354
- date_words_output = gr.Textbox(label="Date in Words Translated (optional)")
355
- gematria_result = gr.Number(label="Journal Sum")
356
- #with gr.Row():
357
-
358
-
359
- with gr.Row():
360
- step = gr.Number(label="Jump Width (Steps) for ELS")
361
- float_step = gr.Number(visible=False, value=1)
362
- half_step_btn = gr.Button("Steps / 2")
363
- double_step_btn = gr.Button("Steps * 2")
364
-
365
- with gr.Column():
366
- round_x = gr.Number(label="Round (1)", value=1)
367
- round_y = gr.Number(label="Round (2)", value=-1)
368
-
369
- rounds_combination = gr.Textbox(label="Combined Rounds", value="1,-1")
370
-
371
- with gr.Row():
372
- include_torah_chk = gr.Checkbox(label="Include Torah", value=True)
373
- include_bible_chk = gr.Checkbox(label="Include Bible", value=True)
374
- include_quran_chk = gr.Checkbox(label="Include Quran", value=True)
375
- include_hindu_chk = gr.Checkbox(label="Include Rigveda", value=False)
376
- include_tripitaka_chk = gr.Checkbox(label="Include Tripitaka", value=False)
377
-
378
- strip_spaces = gr.Checkbox(label="Strip Spaces from Books", value=True)
379
- strip_in_braces = gr.Checkbox(label="Strip Text in Braces from Books", value=True)
380
- strip_diacritics_chk = gr.Checkbox(label="Strip Diacritics from Books", value=True)
381
-
382
- translate_btn = gr.Button("Search with ELS")
383
-
384
- # --- Output Components ---
385
- markdown_output = gr.Dataframe(label="ELS Results")
386
- most_frequent_phrase_output = gr.Textbox(label="Most Frequent Phrase in Network Search")
387
- json_output = gr.JSON(label="JSON Output")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
 
389
  # --- Event Handlers ---
390
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
  def update_date_words(selected_date, date_language_input, use_day, use_month, use_year):
392
  if selected_date is None:
393
  return ""
@@ -408,10 +618,8 @@ with gr.Blocks() as app:
408
  else: # Return empty string if no date components are selected
409
  return ""
410
 
411
-
412
  date_in_words = date_to_words(date_obj)
413
 
414
-
415
  translator = GoogleTranslator(source='auto', target=date_language_input)
416
  translated_date_words = translator.translate(date_in_words)
417
  return custom_normalize(translated_date_words)
@@ -431,51 +639,50 @@ with gr.Blocks() as app:
431
  new_step = math.ceil(float_step * 2)
432
  return new_step, float_step * 2
433
 
434
-
435
  def find_closest_phrase(target_phrase, phrases):
436
  best_match = None
437
  best_score = 0
438
 
439
- logging.debug(f"Target phrase for similarity search: {target_phrase}") # Log target phrase
440
 
441
  for phrase, _, _, _, _, _ in phrases:
442
  word_length_diff = abs(len(target_phrase.split()) - len(phrase.split()))
443
  similarity_score = fuzz.ratio(target_phrase, phrase)
444
  combined_score = similarity_score - word_length_diff
445
 
446
- logging.debug(f"Comparing with phrase: {phrase}") # Log each phrase being compared
447
  logging.debug(
448
- f"Word Length Difference: {word_length_diff}, Similarity Score: {similarity_score}, Combined Score: {combined_score}") # Log scores
449
 
450
  if combined_score > best_score:
451
  best_score = combined_score
452
  best_match = phrase
453
 
454
- logging.debug(f"Closest phrase found: {best_match} with score: {best_score}") # Log the best match
455
  return best_match
456
 
457
- def perform_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, include_torah, include_bible, include_quran, include_hindu, include_tripitaka, gematria_text, date_words_output, selected_date):
458
- # Inside perform_search
 
459
  els_results = perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces,
460
- strip_diacritics_chk, include_torah, include_bible, include_quran,
461
- include_hindu,
462
- include_tripitaka)
463
 
464
- # --- Network Search Integration ---
465
  most_frequent_phrases = {}
466
- combined_and_sorted_results = [] # Combined list to hold all results
467
 
468
  for book_name, book_results in els_results.items():
469
- if book_results: # Add this check to ensure book_results is not empty
470
- most_frequent_phrases[book_name] = "" # Default value
471
 
472
  for result in book_results:
473
  try:
474
- gematria_sum = calculate_gematria(result['result_text']) # Calculate gematria
475
  max_words = len(result['result_text'].split())
476
  matching_phrases = search_gematria_in_db(gematria_sum, max_words)
477
  max_words_limit = 20
478
- while not matching_phrases and max_words < max_words_limit: # Increase max_words for more results
479
  max_words += 1
480
  matching_phrases = search_gematria_in_db(gematria_sum, max_words)
481
 
@@ -484,13 +691,13 @@ with gr.Blocks() as app:
484
  most_frequent_phrases[book_name] = most_frequent_phrase
485
  else:
486
  closest_phrase = find_closest_phrase(result['result_text'],
487
- search_gematria_in_db(gematria_sum, max_words_limit))
488
  most_frequent_phrases[
489
- book_name] = closest_phrase or "" # Update most frequent phrases even if no phrase found
490
 
491
  result['Most Frequent Phrase'] = most_frequent_phrases[book_name]
492
  if 'book' in result:
493
- if isinstance(result['book'], int): # Torah, Bible, Quran case
494
  result['book'] = f"{book_name} {result['book']}."
495
  combined_and_sorted_results.append(result)
496
 
@@ -498,49 +705,46 @@ with gr.Blocks() as app:
498
  print(f"DEBUG: KeyError - Key '{e.args[0]}' not found in result. Skipping this result.")
499
  continue
500
 
501
- # --- Batch Translation ---
502
  selected_language_long = tlang
503
  tlang_short = LANGUAGES_SUPPORTED.get(selected_language_long)
504
  if tlang_short is None:
505
  tlang_short = "en"
506
  logger.warning(f"Unsupported language selected: {selected_language_long}. Defaulting to English (en).")
507
 
508
- # Prepare lists for batch translation, including source language
509
  phrases_to_translate = []
510
- phrases_source_langs = [] # Source languages for phrases
511
  results_to_translate = []
512
- results_source_langs = [] # Source languages for results
513
  for result in combined_and_sorted_results:
514
  phrases_to_translate.append(result.get('Most Frequent Phrase', ''))
515
- # Always use 'iw' as the source language for "Most Frequent Phrase"
516
  phrases_source_langs.append("he")
517
  results_to_translate.append(result.get('result_text', ''))
518
  results_source_langs.append(result.get("source_language", "auto"))
519
 
520
- translated_phrases = translation_utils.batch_translate(phrases_to_translate, tlang_short, phrases_source_langs)
521
- translated_result_texts = translation_utils.batch_translate(results_to_translate, tlang_short, results_source_langs)
522
-
 
523
 
524
  for i, result in enumerate(combined_and_sorted_results):
525
  result['translated_text'] = translated_result_texts.get(results_to_translate[i], None)
526
  result['Translated Most Frequent Phrase'] = translated_phrases.get(phrases_to_translate[i], None)
527
 
528
- # Time Projections (using els_results dictionary)
529
- updated_els_results = add_24h_projection(els_results) # Use original els_results dictionary
530
- updated_els_results = add_monthly_projection(updated_els_results, selected_date) # Call correct functions with correct params
531
  updated_els_results = add_yearly_projection(updated_els_results, selected_date)
532
 
533
  combined_and_sorted_results = []
534
- for book_results in updated_els_results.values(): # Combine results for dataframe and json
535
  combined_and_sorted_results.extend(book_results)
536
- combined_and_sorted_results = sort_results(combined_and_sorted_results) # sort combined results
537
 
538
  df = pd.DataFrame(combined_and_sorted_results)
539
  df.index = range(1, len(df) + 1)
540
  df.reset_index(inplace=True)
541
  df.rename(columns={'index': 'Result Number'}, inplace=True)
542
 
543
- for i, result in enumerate(combined_and_sorted_results): # Iterate through the combined list
544
  result['Result Number'] = i + 1
545
 
546
  search_config = {
@@ -561,40 +765,44 @@ with gr.Blocks() as app:
561
 
562
  output_data = {
563
  "search_configuration": search_config,
564
- "results": combined_and_sorted_results # Use the combined list here
565
  }
566
 
567
  json_data = output_data
568
 
569
- # --- Return results ---
570
  combined_most_frequent = "\n".join(
571
- f"{book}: {phrase}" for book, phrase in most_frequent_phrases.items()) # Combine phrases
572
  return df, combined_most_frequent, json_data
573
 
574
-
575
-
576
  # --- Event Triggers ---
577
  round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
578
  round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
579
 
580
- selected_date.change(update_date_words, inputs=[selected_date, date_language_input, use_day, use_month, use_year], outputs=[date_words_output])
581
- date_language_input.change(update_date_words, inputs=[selected_date, date_language_input, use_day, use_month, use_year], outputs=[date_words_output])
 
 
 
582
 
583
- gematria_text.change(update_journal_sum, inputs=[gematria_text, date_words_output], outputs=[gematria_result, step, float_step])
584
- date_words_output.change(update_journal_sum, inputs=[gematria_text, date_words_output], outputs=[gematria_result, step, float_step])
 
 
585
 
586
  half_step_btn.click(update_step_half, inputs=[float_step], outputs=[step, float_step])
587
  double_step_btn.click(update_step_double, inputs=[float_step], outputs=[step, float_step])
588
 
589
  translate_btn.click(
590
  perform_search,
591
- inputs=[step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, include_torah_chk, include_bible_chk, include_quran_chk, include_hindu_chk, include_tripitaka_chk, gematria_text, date_words_output, selected_date],
 
 
592
  outputs=[markdown_output, most_frequent_phrase_output, json_output]
593
  )
594
 
595
  app.load(
596
  update_date_words,
597
- inputs=[selected_date, date_language_input, use_day, use_month, use_year], # Include all 5 inputs
598
  outputs=[date_words_output]
599
  )
600
 
@@ -615,14 +823,13 @@ with gr.Blocks() as app:
615
  )
616
 
617
  def checkbox_behavior(use_day_value, use_month_value):
618
- if use_day_value: # Tick month and year automatically when day is ticked.
619
  return True, True
620
 
621
- return use_month_value, True # return month value unchanged and automatically tick year if month is checked
622
 
623
  use_day.change(checkbox_behavior, inputs=[use_day, use_month], outputs=[use_month, use_year])
624
- use_month.change(checkbox_behavior, inputs=[use_day, use_month], outputs=[use_month, use_year]) #No need for use_day here, day won't be changed by month
625
-
626
 
627
  if __name__ == "__main__":
628
  app.launch(share=False)
 
 
 
1
  import logging
2
+ import os
3
+
4
  logger = logging.getLogger(__name__)
5
  logging.basicConfig(level=logging.INFO)
6
 
 
23
  import sqlite3
24
  from collections import defaultdict
25
  from typing import List, Tuple
26
+ # import rich # Removed rich
27
+ # from fuzzywuzzy import fuzz # Removed fuzzywuzzy
28
  import calendar
29
  import translation_utils
30
  import hashlib
 
33
 
34
  # Create a translator instance *once* globally
35
  translator = GoogleTranslator(source='auto', target='auto')
36
+ LANGUAGES_SUPPORTED = translator.get_supported_languages(as_dict=True)
37
 
38
+ LANGUAGE_CODE_MAP = LANGUAGES_SUPPORTED # Use deep_translator's mapping directly
39
 
40
  # --- Constants ---
41
  DATABASE_FILE = 'gematria.db'
 
49
  if not os.path.exists(ELS_CACHE_DB):
50
  with sqlite3.connect(ELS_CACHE_DB) as conn:
51
  conn.execute('''
52
+ CREATE TABLE IF NOT EXISTS els_cache (
53
  query_hash TEXT PRIMARY KEY,
54
  function_name TEXT,
55
  args TEXT,
 
111
  key = (func.__name__, args, kwargs)
112
  return hashlib.sha256(json.dumps(key).encode()).hexdigest()
113
 
 
114
  def cached_process_json_files(func, *args, **kwargs):
115
  # Create a dictionary to store the parameters
116
  params = {
 
153
  try:
154
  with sqlite3.connect(ELS_CACHE_DB, timeout=DATABASE_TIMEOUT) as conn:
155
  cursor = conn.cursor()
156
+ cursor.execute(
157
+ "INSERT INTO els_cache (query_hash, function_name, args, kwargs, results) VALUES (?, ?, ?, ?, ?)",
158
+ (query_hash, params["function"], params_json, json.dumps({}), json.dumps(results)))
159
  conn.commit()
160
  except sqlite3.Error as e:
161
  logger.error(f"Database error caching results: {e}")
162
 
163
  return results
164
 
 
165
  # --- Helper Functions (from Network app.py) ---
166
  def flatten_text(text: List) -> str:
167
  if isinstance(text, list):
 
184
  phrase_counts = defaultdict(int)
185
  for words, book, chapter, verse, phrase_length, word_position in results:
186
  phrase_counts[words] += 1
187
+ most_frequent_phrase = max(phrase_counts, key=phrase_counts.get) if phrase_counts else None
188
  return most_frequent_phrase
189
 
190
  # --- Functions from BOS app.py ---
191
+ def create_language_dropdown(label, default_value='English', show_label=True):
192
  return gr.Dropdown(
193
+ choices=list(LANGUAGE_CODE_MAP.keys()),
194
  label=label,
195
  value=default_value,
196
  show_label=show_label
 
209
  else:
210
  return None
211
 
212
+ def perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk,
213
+ include_torah, include_bible, include_quran, include_hindu, include_tripitaka):
214
  if step == 0 or rounds_combination == "0,0":
215
  return None
216
 
217
  results = {}
218
  length = 0
219
 
220
+ selected_language_long = tlang
 
221
  tlang = LANGUAGES_SUPPORTED.get(selected_language_long)
222
+ if tlang is None:
223
  tlang = "en"
224
  logger.warning(
225
  f"Unsupported language selected: {selected_language_long}. Defaulting to English (en).")
226
 
 
227
  if include_torah:
228
  logger.debug(
229
  f"Arguments for Torah: {(1, 39, step, rounds_combination, length, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)}")
 
233
  results["Torah"] = []
234
 
235
  if include_bible:
236
+ results["Bible"] = cached_process_json_files(bible.process_json_files, 40, 66, step, rounds_combination,
237
+ length,
238
  tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)
239
  else:
240
  results["Bible"] = []
241
 
242
  if include_quran:
243
+ results["Quran"] = cached_process_json_files(quran.process_json_files, 1, 114, step, rounds_combination,
244
+ length,
245
  tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)
246
  else:
247
  results["Quran"] = []
248
 
249
  if include_hindu:
250
  results["Rig Veda"] = cached_process_json_files(
251
+ hindu.process_json_files, 1, 10, step, rounds_combination, length, tlang, False, strip_in_braces,
252
+ strip_diacritics_chk)
253
  else:
254
  results["Rig Veda"] = []
255
 
256
  if include_tripitaka:
257
  results["Tripitaka"] = cached_process_json_files(
258
+ tripitaka.process_json_files, 1, 52, step, rounds_combination, length, tlang, strip_spaces,
259
+ strip_in_braces, strip_diacritics_chk)
260
  else:
261
  results["Tripitaka"] = []
262
 
263
  return results
264
 
265
+ def add_24h_projection(results_dict):
266
+ for book_name, results in results_dict.items():
 
 
267
  num_results = len(results)
268
  if num_results > 0:
269
  time_interval = timedelta(minutes=24 * 60 / num_results)
 
275
  current_time = next_time
276
  return results_dict
277
 
 
278
  def add_monthly_projection(results_dict, selected_date):
279
  if selected_date is None:
280
+ return results_dict
281
 
282
+ for book_name, results in results_dict.items():
283
  num_results = len(results)
284
  if num_results > 0:
285
  days_in_month = calendar.monthrange(selected_date.year, selected_date.month)[1]
 
288
  start_datetime = datetime(selected_date.year, selected_date.month, 1)
289
  current_datetime = start_datetime
290
 
 
291
  for i in range(num_results):
292
  next_datetime = current_datetime + timedelta(seconds=seconds_interval)
293
+ current_date = current_datetime.date()
294
  next_date = next_datetime.date()
295
  date_range_str = f"{current_date.strftime('%h %d')} - {next_date.strftime('%h %d')}"
296
  results[i]['Monthly Projection'] = date_range_str
297
+ current_datetime = next_datetime
298
+ current_date = next_datetime.date()
299
  return results_dict
300
 
301
+ def add_yearly_projection(results_dict, selected_date):
 
302
  if selected_date is None:
303
+ return results_dict
304
 
305
+ for book_name, results in results_dict.items():
306
  num_results = len(results)
307
  if num_results > 0:
308
  days_in_year = 366 if calendar.isleap(selected_date.year) else 365
 
311
  start_datetime = datetime(selected_date.year, 1, 1)
312
  current_datetime = start_datetime
313
 
 
314
  for i in range(num_results):
315
  next_datetime = current_datetime + timedelta(seconds=seconds_interval)
316
+ current_date = current_datetime.date()
317
  next_date = next_datetime.date()
318
  date_range_str = f"{current_date.strftime('%b %d')} - {next_date.strftime('%b %d')}"
319
  results[i]['Yearly Projection'] = date_range_str
320
+ current_datetime = next_datetime
321
 
322
  return results_dict
323
 
 
324
  def sort_results(results):
325
  def parse_time(time_str):
326
  try:
327
  hours, minutes = map(int, time_str.split(':'))
328
+ return hours * 60 + minutes
329
  except ValueError:
330
+ return 24 * 60
331
 
332
  return sorted(results, key=lambda x: (
333
+ parse_time(x.get('24h Projection', '23:59').split('-')[0]),
334
+ parse_time(x.get('24h Projection', '23:59').split('-')[1])
335
  ))
336
 
337
+ def extract_rounds_combinations():
338
+ """Extracts unique rounds combinations from the database."""
339
+ combinations = set()
340
+ try:
341
+ with sqlite3.connect(ELS_CACHE_DB) as conn:
342
+ cursor = conn.cursor()
343
+ cursor.execute("SELECT args FROM els_cache")
344
+ all_args = cursor.fetchall()
345
+ for args_tuple in all_args:
346
+ args_str = args_tuple[0]
347
+ try:
348
+ args_json = json.loads(args_str)
349
+ if 'rounds' in args_json:
350
+ combinations.add(args_json['rounds'])
351
+ except json.JSONDecodeError:
352
+ logger.error(f"Could not decode JSON for args: {args_str}")
353
+ except sqlite3.Error as e:
354
+ logger.error(f"Database error: {e}")
355
+ logger.info(f"Found unique rounds combinations: {combinations}")
356
+ return ["All"] + sorted(list(combinations))
357
+
358
+ def update_rounds_dropdown():
359
+ new_choices = extract_rounds_combinations()
360
+ return new_choices
361
+
362
  # --- Main Gradio App ---
363
  with gr.Blocks() as app:
364
+ with gr.Tab("ELS Search"):
365
+ with gr.Column():
366
+ with gr.Row():
367
+ tlang = create_language_dropdown("Target Language for Result Translation", default_value='english')
368
+ selected_date = Calendar(type="datetime", label="Date to investigate (optional)",
369
+ info="Pick a date from the calendar")
370
+ use_day = gr.Checkbox(label="Use Day", info="Check to include day in search", value=True)
371
+ use_month = gr.Checkbox(label="Use Month", info="Check to include month in search", value=True)
372
+ use_year = gr.Checkbox(label="Use Year", info="Check to include year in search", value=True)
373
+ date_language_input = create_language_dropdown(
374
+ "Language of the person/topic (optional) (Date Word Language)", default_value='english')
375
+ with gr.Row():
376
+ gematria_text = gr.Textbox(label="Name and/or Topic (required)",
377
+ value="Hans Albert Einstein Mileva Marity-Einstein")
378
+ date_words_output = gr.Textbox(label="Date in Words Translated (optional)")
379
+ gematria_result = gr.Number(label="Journal Sum")
380
+ # with gr.Row():
381
+
382
+ with gr.Row():
383
+ step = gr.Number(label="Jump Width (Steps) for ELS")
384
+ float_step = gr.Number(visible=False, value=1)
385
+ half_step_btn = gr.Button("Steps / 2")
386
+ double_step_btn = gr.Button("Steps * 2")
387
+
388
+ with gr.Column():
389
+ round_x = gr.Number(label="Round (1)", value=1)
390
+ round_y = gr.Number(label="Round (2)", value=-1)
391
+
392
+ rounds_combination = gr.Textbox(label="Combined Rounds", value="1,-1")
393
+
394
+ with gr.Row():
395
+ include_torah_chk = gr.Checkbox(label="Include Torah", value=True)
396
+ include_bible_chk = gr.Checkbox(label="Include Bible", value=True)
397
+ include_quran_chk = gr.Checkbox(label="Include Quran", value=True)
398
+ include_hindu_chk = gr.Checkbox(label="Include Rigveda", value=False)
399
+ include_tripitaka_chk = gr.Checkbox(label="Include Tripitaka", value=False)
400
+
401
+ strip_spaces = gr.Checkbox(label="Strip Spaces from Books", value=True)
402
+ strip_in_braces = gr.Checkbox(label="Strip Text in Braces from Books", value=True)
403
+ strip_diacritics_chk = gr.Checkbox(label="Strip Diacritics from Books", value=True)
404
+
405
+ translate_btn = gr.Button("Search with ELS")
406
+
407
+ # --- Output Components ---
408
+ markdown_output = gr.Dataframe(label="ELS Results")
409
+ most_frequent_phrase_output = gr.Textbox(label="Most Frequent Phrase in Network Search")
410
+ json_output = gr.JSON(label="JSON Output")
411
+
412
+ with gr.Tab("Cache Database Search"):
413
+ with gr.Column():
414
+ with gr.Row():
415
+ main_book_filter = gr.Dropdown(label="Filter by Main Book",
416
+ choices=["All", "Torah", "Bible", "Quran", "Rig Veda", "Tripitaka"],
417
+ value="All")
418
+ # Keine choices hier, nur das Label und den Initialwert
419
+ rounds_filter = gr.Dropdown(label="Filter by Rounds", value="All")
420
+
421
+ with gr.Row():
422
+ search_type = gr.Radio(label="Search by",
423
+ choices=["Text in result_text", "Gematria Sum in results"],
424
+ value="Text in result_text")
425
+ with gr.Row():
426
+ search_term = gr.Textbox(label="Search Term", visible=True)
427
+ gematria_sum_search = gr.Number(label="Gematria Sum", visible=False)
428
+
429
+ with gr.Row():
430
+ search_db_btn = gr.Button("Search Cache Database")
431
+ with gr.Row():
432
+ cache_search_results = gr.JSON(label="Cache Search Results")
433
+
434
+ def update_search_components(search_type):
435
+ if search_type == "Text in result_text":
436
+ return gr.Textbox.update(visible=True), gr.Number.update(visible=False)
437
+ else:
438
+ return gr.Textbox.update(visible=False), gr.Number.update(visible=True)
439
+
440
+
441
+ def search_cache_database(search_type, search_term, gematria_sum_search, main_book_filter, rounds_filter):
442
+ """Searches the cache database based on the selected filters and search term."""
443
+ results = []
444
+ if main_book_filter == "All" and rounds_filter == "All" and not search_term and not gematria_sum_search:
445
+ return results
446
+
447
+ try:
448
+ with sqlite3.connect(ELS_CACHE_DB) as conn:
449
+ cursor = conn.cursor()
450
+
451
+ if search_type == "Text in result_text":
452
+ # Optimization: If only main_book_filter is selected, don't perform a full search
453
+ if main_book_filter != "All" and rounds_filter == "All" and not search_term:
454
+ return results
455
+
456
+ cursor.execute("SELECT * FROM els_cache")
457
+ all_results = cursor.fetchall()
458
+ columns = [desc[0] for desc in cursor.description]
459
+
460
+ for row in all_results:
461
+ row_dict = dict(zip(columns, row))
462
+ args_dict = json.loads(row_dict['args'])
463
+ function_name = row_dict['function_name']
464
+
465
+ # Function name filtering
466
+ include_result = False
467
+ if main_book_filter == "All":
468
+ include_result = True
469
+ elif main_book_filter == "Torah" and function_name == "torah.process_json_files":
470
+ include_result = True
471
+ elif main_book_filter == "Bible" and function_name == "bible.process_json_files":
472
+ include_result = True
473
+ elif main_book_filter == "Quran" and function_name == "quran.process_json_files":
474
+ include_result = True
475
+ elif main_book_filter == "Rig Veda" and function_name == "hindu.process_json_files":
476
+ include_result = True
477
+ elif main_book_filter == "Tripitaka" and function_name == "tripitaka.process_json_files":
478
+ include_result = True
479
+
480
+ if not include_result:
481
+ continue
482
+
483
+ # Rounds filtering
484
+ if rounds_filter != "All" and args_dict.get('rounds') != rounds_filter:
485
+ continue
486
+
487
+ try:
488
+ results_json = json.loads(row_dict['results'])
489
+ for result_entry in results_json:
490
+ if 'result_text' in result_entry and search_term in result_entry['result_text']:
491
+ entry = {
492
+ 'function_name': function_name,
493
+ 'step': args_dict.get('step'),
494
+ 'rounds': args_dict.get('rounds'),
495
+ 'result': result_entry
496
+ }
497
+ results.append(entry)
498
+ except (json.JSONDecodeError, TypeError) as e:
499
+ logger.error(f"Error processing row: {e}")
500
+ continue
501
+
502
+ elif search_type == "Gematria Sum in results":
503
+
504
+ # Optimization: If only main_book_filter is selected, don't perform a full search
505
+ if main_book_filter != "All" and rounds_filter == "All" and not gematria_sum_search:
506
+ return results
507
+
508
+ if not isinstance(gematria_sum_search, (int, float)):
509
+ return results
510
+
511
+ cursor.execute("SELECT * FROM els_cache")
512
+ all_results = cursor.fetchall()
513
+ columns = [desc[0] for desc in cursor.description]
514
+
515
+ for row in all_results:
516
+ row_dict = dict(zip(columns, row))
517
+ args_dict = json.loads(row_dict['args'])
518
+ function_name = row_dict['function_name']
519
+
520
+ # Function name filtering
521
+ include_result = False
522
+ if main_book_filter == "All":
523
+ include_result = True
524
+ elif main_book_filter == "Torah" and function_name == "torah.process_json_files":
525
+ include_result = True
526
+ elif main_book_filter == "Bible" and function_name == "bible.process_json_files":
527
+ include_result = True
528
+ elif main_book_filter == "Quran" and function_name == "quran.process_json_files":
529
+ include_result = True
530
+ elif main_book_filter == "Rig Veda" and function_name == "hindu.process_json_files":
531
+ include_result = True
532
+ elif main_book_filter == "Tripitaka" and function_name == "tripitaka.process_json_files":
533
+ include_result = True
534
+
535
+ if not include_result:
536
+ continue
537
+
538
+ # Rounds filtering
539
+ if rounds_filter != "All" and args_dict.get('rounds') != rounds_filter:
540
+ continue
541
+
542
+ try:
543
+ results_json = json.loads(row_dict['results'])
544
+ for result_entry in results_json:
545
+ if 'result_sum' in result_entry and result_entry[
546
+ 'result_sum'] == gematria_sum_search:
547
+ entry = {
548
+ 'function_name': function_name,
549
+ 'step': args_dict.get('step'),
550
+ 'rounds': args_dict.get('rounds'),
551
+ 'result': result_entry
552
+ }
553
+ results.append(entry)
554
+ except (json.JSONDecodeError, TypeError) as e:
555
+ logger.error(f"Error processing row: {e}")
556
+ continue
557
+
558
+ # Sort results by gematria sum
559
+ results.sort(
560
+ key=lambda x: x['result']['result_sum'] if 'result' in x and 'result_sum' in x['result'] else 0)
561
+ return results
562
+
563
+ except sqlite3.Error as e:
564
+ logger.error(f"Database error: {e}")
565
+ return []
566
+
567
+ def update_search_components(search_type):
568
+ """Updates the visibility of the search term and gematria sum input fields."""
569
+ if search_type == "Text in result_text":
570
+ return {"visible": True, "__type__": "update"}, {"visible": False, "__type__": "update"}
571
+ else:
572
+ return {"visible": False, "__type__": "update"}, {"visible": True, "__type__": "update"}
573
 
574
  # --- Event Handlers ---
575
 
576
+ search_type.change(
577
+ fn=update_search_components,
578
+ inputs=[search_type],
579
+ outputs=[search_term, gematria_sum_search]
580
+ )
581
+
582
+ search_db_btn.click(
583
+ fn=search_cache_database,
584
+ inputs=[search_type, search_term, gematria_sum_search, main_book_filter, rounds_filter],
585
+ outputs=cache_search_results
586
+ )
587
+
588
+
589
+ def update_rounds_choices():
590
+ return gr.update(choices=extract_rounds_combinations()) # gr.update, nicht gr.Dropdown.update
591
+
592
+ app.load(fn=update_rounds_choices, inputs=None, outputs=rounds_filter)
593
+
594
+ main_book_filter.change(
595
+ fn=update_rounds_choices,
596
+ inputs=None, # No input needed here
597
+ outputs=rounds_filter
598
+ )
599
+
600
+ # rest of the handlers
601
  def update_date_words(selected_date, date_language_input, use_day, use_month, use_year):
602
  if selected_date is None:
603
  return ""
 
618
  else: # Return empty string if no date components are selected
619
  return ""
620
 
 
621
  date_in_words = date_to_words(date_obj)
622
 
 
623
  translator = GoogleTranslator(source='auto', target=date_language_input)
624
  translated_date_words = translator.translate(date_in_words)
625
  return custom_normalize(translated_date_words)
 
639
  new_step = math.ceil(float_step * 2)
640
  return new_step, float_step * 2
641
 
 
642
  def find_closest_phrase(target_phrase, phrases):
643
  best_match = None
644
  best_score = 0
645
 
646
+ logging.debug(f"Target phrase for similarity search: {target_phrase}")
647
 
648
  for phrase, _, _, _, _, _ in phrases:
649
  word_length_diff = abs(len(target_phrase.split()) - len(phrase.split()))
650
  similarity_score = fuzz.ratio(target_phrase, phrase)
651
  combined_score = similarity_score - word_length_diff
652
 
653
+ logging.debug(f"Comparing with phrase: {phrase}")
654
  logging.debug(
655
+ f"Word Length Difference: {word_length_diff}, Similarity Score: {similarity_score}, Combined Score: {combined_score}")
656
 
657
  if combined_score > best_score:
658
  best_score = combined_score
659
  best_match = phrase
660
 
661
+ logging.debug(f"Closest phrase found: {best_match} with score: {best_score}")
662
  return best_match
663
 
664
+ def perform_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk,
665
+ include_torah, include_bible, include_quran, include_hindu, include_tripitaka, gematria_text,
666
+ date_words_output, selected_date):
667
  els_results = perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces,
668
+ strip_diacritics_chk, include_torah, include_bible, include_quran,
669
+ include_hindu,
670
+ include_tripitaka)
671
 
 
672
  most_frequent_phrases = {}
673
+ combined_and_sorted_results = []
674
 
675
  for book_name, book_results in els_results.items():
676
+ if book_results:
677
+ most_frequent_phrases[book_name] = ""
678
 
679
  for result in book_results:
680
  try:
681
+ gematria_sum = calculate_gematria(result['result_text'])
682
  max_words = len(result['result_text'].split())
683
  matching_phrases = search_gematria_in_db(gematria_sum, max_words)
684
  max_words_limit = 20
685
+ while not matching_phrases and max_words < max_words_limit:
686
  max_words += 1
687
  matching_phrases = search_gematria_in_db(gematria_sum, max_words)
688
 
 
691
  most_frequent_phrases[book_name] = most_frequent_phrase
692
  else:
693
  closest_phrase = find_closest_phrase(result['result_text'],
694
+ search_gematria_in_db(gematria_sum, max_words_limit))
695
  most_frequent_phrases[
696
+ book_name] = closest_phrase or ""
697
 
698
  result['Most Frequent Phrase'] = most_frequent_phrases[book_name]
699
  if 'book' in result:
700
+ if isinstance(result['book'], int):
701
  result['book'] = f"{book_name} {result['book']}."
702
  combined_and_sorted_results.append(result)
703
 
 
705
  print(f"DEBUG: KeyError - Key '{e.args[0]}' not found in result. Skipping this result.")
706
  continue
707
 
 
708
  selected_language_long = tlang
709
  tlang_short = LANGUAGES_SUPPORTED.get(selected_language_long)
710
  if tlang_short is None:
711
  tlang_short = "en"
712
  logger.warning(f"Unsupported language selected: {selected_language_long}. Defaulting to English (en).")
713
 
 
714
  phrases_to_translate = []
715
+ phrases_source_langs = []
716
  results_to_translate = []
717
+ results_source_langs = []
718
  for result in combined_and_sorted_results:
719
  phrases_to_translate.append(result.get('Most Frequent Phrase', ''))
 
720
  phrases_source_langs.append("he")
721
  results_to_translate.append(result.get('result_text', ''))
722
  results_source_langs.append(result.get("source_language", "auto"))
723
 
724
+ translated_phrases = translation_utils.batch_translate(phrases_to_translate, tlang_short,
725
+ phrases_source_langs)
726
+ translated_result_texts = translation_utils.batch_translate(results_to_translate, tlang_short,
727
+ results_source_langs)
728
 
729
  for i, result in enumerate(combined_and_sorted_results):
730
  result['translated_text'] = translated_result_texts.get(results_to_translate[i], None)
731
  result['Translated Most Frequent Phrase'] = translated_phrases.get(phrases_to_translate[i], None)
732
 
733
+ updated_els_results = add_24h_projection(els_results)
734
+ updated_els_results = add_monthly_projection(updated_els_results, selected_date)
 
735
  updated_els_results = add_yearly_projection(updated_els_results, selected_date)
736
 
737
  combined_and_sorted_results = []
738
+ for book_results in updated_els_results.values():
739
  combined_and_sorted_results.extend(book_results)
740
+ combined_and_sorted_results = sort_results(combined_and_sorted_results)
741
 
742
  df = pd.DataFrame(combined_and_sorted_results)
743
  df.index = range(1, len(df) + 1)
744
  df.reset_index(inplace=True)
745
  df.rename(columns={'index': 'Result Number'}, inplace=True)
746
 
747
+ for i, result in enumerate(combined_and_sorted_results):
748
  result['Result Number'] = i + 1
749
 
750
  search_config = {
 
765
 
766
  output_data = {
767
  "search_configuration": search_config,
768
+ "results": combined_and_sorted_results
769
  }
770
 
771
  json_data = output_data
772
 
 
773
  combined_most_frequent = "\n".join(
774
+ f"{book}: {phrase}" for book, phrase in most_frequent_phrases.items())
775
  return df, combined_most_frequent, json_data
776
 
 
 
777
  # --- Event Triggers ---
778
  round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
779
  round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
780
 
781
+ selected_date.change(update_date_words, inputs=[selected_date, date_language_input, use_day, use_month, use_year],
782
+ outputs=[date_words_output])
783
+ date_language_input.change(update_date_words,
784
+ inputs=[selected_date, date_language_input, use_day, use_month, use_year],
785
+ outputs=[date_words_output])
786
 
787
+ gematria_text.change(update_journal_sum, inputs=[gematria_text, date_words_output],
788
+ outputs=[gematria_result, step, float_step])
789
+ date_words_output.change(update_journal_sum, inputs=[gematria_text, date_words_output],
790
+ outputs=[gematria_result, step, float_step])
791
 
792
  half_step_btn.click(update_step_half, inputs=[float_step], outputs=[step, float_step])
793
  double_step_btn.click(update_step_double, inputs=[float_step], outputs=[step, float_step])
794
 
795
  translate_btn.click(
796
  perform_search,
797
+ inputs=[step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, include_torah_chk,
798
+ include_bible_chk, include_quran_chk, include_hindu_chk, include_tripitaka_chk, gematria_text,
799
+ date_words_output, selected_date],
800
  outputs=[markdown_output, most_frequent_phrase_output, json_output]
801
  )
802
 
803
  app.load(
804
  update_date_words,
805
+ inputs=[selected_date, date_language_input, use_day, use_month, use_year],
806
  outputs=[date_words_output]
807
  )
808
 
 
823
  )
824
 
825
  def checkbox_behavior(use_day_value, use_month_value):
826
+ if use_day_value:
827
  return True, True
828
 
829
+ return use_month_value, True
830
 
831
  use_day.change(checkbox_behavior, inputs=[use_day, use_month], outputs=[use_month, use_year])
832
+ use_month.change(checkbox_behavior, inputs=[use_day, use_month], outputs=[use_month, use_year])
 
833
 
834
  if __name__ == "__main__":
835
  app.launch(share=False)