neuralworm commited on
Commit
e4ee4df
1 Parent(s): 95fdffd

greek gematria fix, ui fix

Browse files
Files changed (3) hide show
  1. app.py +165 -76
  2. bible.py +50 -19
  3. gematria.py +7 -2
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import logging
 
2
  logger = logging.getLogger(__name__)
3
- logging.basicConfig(level=logging.INFO)
4
 
5
  import gradio as gr
6
  import torah
@@ -12,7 +13,7 @@ from gematria import calculate_gematria, strip_diacritics
12
  import pandas as pd
13
  from deep_translator import GoogleTranslator
14
  from gradio_calendar import Calendar
15
- from datetime import datetime
16
  import math
17
  import json
18
  import re
@@ -24,6 +25,7 @@ from typing import List, Tuple
24
  DATABASE_FILE = 'gematria.db'
25
  MAX_PHRASE_LENGTH_LIMIT = 20
26
 
 
27
  # --- Database Initialization ---
28
  def initialize_database():
29
  global conn
@@ -54,15 +56,18 @@ def initialize_database():
54
  ''')
55
  conn.commit()
56
 
 
57
  # --- Initialize Database ---
58
  initialize_database()
59
 
 
60
  # --- Helper Functions (from Network app.py) ---
61
  def flatten_text(text: List) -> str:
62
  if isinstance(text, list):
63
  return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
64
  return text
65
 
 
66
  def search_gematria_in_db(gematria_sum: int, max_words: int) -> List[Tuple[str, str, int, int, int, str]]:
67
  global conn
68
  with sqlite3.connect(DATABASE_FILE) as conn:
@@ -75,6 +80,7 @@ def search_gematria_in_db(gematria_sum: int, max_words: int) -> List[Tuple[str,
75
  results = cursor.fetchall()
76
  return results
77
 
 
78
  def get_most_frequent_phrase(results):
79
  phrase_counts = defaultdict(int)
80
  for words, book, chapter, verse, phrase_length, word_position in results:
@@ -82,16 +88,18 @@ def get_most_frequent_phrase(results):
82
  most_frequent_phrase = max(phrase_counts, key=phrase_counts.get) if phrase_counts else None # Handle empty results
83
  return most_frequent_phrase
84
 
 
85
  # --- Functions from BOS app.py ---
86
  def create_language_dropdown(label, default_value='en', show_label=True):
87
  languages = GoogleTranslator(source='en', target='en').get_supported_languages(as_dict=True)
88
  return gr.Dropdown(
89
- choices=list(languages.keys()),
90
- label=label,
91
- value=default_value,
92
- show_label=show_label
93
  )
94
 
 
95
  def calculate_gematria_sum(text, date_words):
96
  if text or date_words:
97
  combined_input = f"{text} {date_words}"
@@ -104,22 +112,30 @@ def calculate_gematria_sum(text, date_words):
104
  else:
105
  return None
106
 
107
- def perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, merge_results, include_torah, include_bible, include_quran):
 
 
108
  if step == 0 or rounds_combination == "0,0":
109
  return None
110
-
111
  torah_results = []
112
  bible_results = []
113
  quran_results = []
114
-
115
  if include_torah:
116
- torah_results.extend(torah.process_json_files(1, 39, step, rounds_combination, 0, tlang, strip_spaces, strip_in_braces, strip_diacritics))
117
-
 
 
118
  if include_bible:
119
- bible_results.extend(bible.process_json_files(40, 66, step, rounds_combination, 0, tlang, strip_spaces, strip_in_braces, strip_diacritics))
120
-
 
 
121
  if include_quran:
122
- quran_results.extend(quran.process_json_files(1, 114, step, rounds_combination, 0, tlang, strip_spaces, strip_in_braces, strip_diacritics))
 
 
123
 
124
  if merge_results:
125
  results = []
@@ -136,13 +152,59 @@ def perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_b
136
 
137
  return results
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  # --- Main Gradio App ---
140
  with gr.Blocks() as app:
 
 
 
 
141
  with gr.Row():
142
  tlang = create_language_dropdown("Target Language for Translation", default_value='english')
143
- selected_date = Calendar(type="datetime", label="Date to investigate (optional)", info="Pick a date from the calendar")
144
- date_language_input = create_language_dropdown("Language of the person/topic (optional) (Date Word Language)", default_value='english')
145
- date_words_output = gr.Textbox(label="Date in Words Translated (optional)")
146
 
147
  with gr.Row():
148
  gematria_text = gr.Textbox(label="Name and/or Topic (required)", value="Hans Albert Einstein")
@@ -150,14 +212,14 @@ with gr.Blocks() as app:
150
 
151
  with gr.Row():
152
  step = gr.Number(label="Jump Width (Steps) for ELS")
153
- float_step = gr.Number(visible=False, value=1)
154
  half_step_btn = gr.Button("Steps / 2")
155
  double_step_btn = gr.Button("Steps * 2")
156
-
157
  with gr.Column():
158
  round_x = gr.Number(label="Round (1)", value=1)
159
  round_y = gr.Number(label="Round (2)", value=-1)
160
-
161
  rounds_combination = gr.Textbox(label="Combined Rounds", value="1,-1")
162
 
163
  with gr.Row():
@@ -165,7 +227,7 @@ with gr.Blocks() as app:
165
  include_bible_chk = gr.Checkbox(label="Include Bible", value=True)
166
  include_quran_chk = gr.Checkbox(label="Include Quran", value=True)
167
  merge_results_chk = gr.Checkbox(label="Merge Results (Torah-Bible-Quran)", value=True)
168
-
169
  strip_spaces = gr.Checkbox(label="Strip Spaces from Books", value=True)
170
  strip_in_braces = gr.Checkbox(label="Strip Text in Braces from Books", value=True)
171
  strip_diacritics_chk = gr.Checkbox(label="Strip Diacritics from Books", value=True)
@@ -175,101 +237,128 @@ with gr.Blocks() as app:
175
  # --- Output Components ---
176
  markdown_output = gr.Dataframe(label="ELS Results")
177
  most_frequent_phrase_output = gr.Textbox(label="Most Frequent Phrase in Network Search")
 
 
 
178
 
179
- # --- Event Handlers ---
180
- def update_date_words(selected_date, date_language_input):
181
- return translate_date_to_words(selected_date, date_language_input)
182
 
 
183
  def update_journal_sum(gematria_text, date_words_output):
184
  sum_value = calculate_gematria_sum(gematria_text, date_words_output)
185
  return sum_value, sum_value, sum_value
186
 
 
187
  def update_rounds_combination(round_x, round_y):
188
  return f"{int(round_x)},{int(round_y)}"
189
 
 
190
  def update_step_half(float_step):
191
  new_step = math.ceil(float_step / 2)
192
  return new_step, float_step / 2
193
 
 
194
  def update_step_double(float_step):
195
  new_step = math.ceil(float_step * 2)
196
  return new_step, float_step * 2
197
 
198
 
199
- def perform_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, merge_results, include_torah, include_bible, include_quran, gematria_text, date_words_output):
200
- els_results = perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, merge_results, include_torah, include_bible, include_quran)
201
-
202
- # --- Network Search Integration ---
203
- updated_els_results = []
204
- for result in els_results:
205
- print("DEBUG: Result from perform_els_search:", result)
206
- try:
207
- gematria_sum = calculate_gematria(result['result_text'])
208
- except KeyError as e:
209
- print(f"DEBUG: KeyError - Key '{e.args[0]}' not found in result. Skipping this result.")
210
- continue
211
-
212
- max_words = len(result['result_text'].split())
213
- matching_phrases = search_gematria_in_db(gematria_sum, max_words)
214
-
215
- # Iteratively increase max_words if no results are found
216
- max_words_limit = 20 # Set a limit for max_words
217
- while not matching_phrases and max_words < max_words_limit:
218
- max_words += 1
 
 
 
 
 
 
219
  matching_phrases = search_gematria_in_db(gematria_sum, max_words)
220
 
221
- # Find most frequent phrase or first phrase with lowest word count
222
- if matching_phrases:
223
- most_frequent_phrase = get_most_frequent_phrase(matching_phrases)
224
- else:
225
- # Sort initial results by word count and take the first phrase
226
- sorted_results = sorted(search_gematria_in_db(gematria_sum, max_words_limit), key=lambda x: len(x[0].split()))
227
- most_frequent_phrase = sorted_results[0][0] if sorted_results else ""
228
-
229
- # Add most frequent phrase to the result dictionary
230
- result['Most Frequent Phrase'] = most_frequent_phrase
231
-
232
- updated_els_results.append(result)
 
 
 
 
 
 
 
 
 
 
 
233
 
234
  # --- Prepare Dataframe ---
235
- df = pd.DataFrame(updated_els_results)
236
  df.index = range(1, len(df) + 1)
237
  df.reset_index(inplace=True)
238
  df.rename(columns={'index': 'Result Number'}, inplace=True)
239
 
240
- return df, most_frequent_phrase
 
 
 
 
 
 
 
 
 
241
 
242
 
 
 
 
 
 
 
243
 
244
  # --- Event Triggers ---
245
  round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
246
  round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
247
 
248
- selected_date.change(update_date_words, inputs=[selected_date, date_language_input], outputs=[date_words_output])
249
- date_language_input.change(update_date_words, inputs=[selected_date, date_language_input], outputs=[date_words_output])
250
-
251
- gematria_text.change(update_journal_sum, inputs=[gematria_text, date_words_output], outputs=[gematria_result, step, float_step])
252
- date_words_output.change(update_journal_sum, inputs=[gematria_text, date_words_output], outputs=[gematria_result, step, float_step])
253
-
254
  half_step_btn.click(update_step_half, inputs=[float_step], outputs=[step, float_step])
255
  double_step_btn.click(update_step_double, inputs=[float_step], outputs=[step, float_step])
256
 
257
  translate_btn.click(
258
  perform_search,
259
- inputs=[step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, merge_results_chk, include_torah_chk, include_bible_chk, include_quran_chk, gematria_text, date_words_output],
260
- outputs=[markdown_output, most_frequent_phrase_output]
 
 
261
  )
262
 
263
- app.load(
264
- update_date_words,
265
- inputs=[selected_date, date_language_input],
266
- outputs=[date_words_output]
267
- )
268
- app.load(
269
- update_journal_sum,
270
- inputs=[gematria_text, date_words_output],
271
- outputs=[gematria_result, step, float_step]
272
  )
273
 
274
  if __name__ == "__main__":
275
- app.launch(share=False)
 
1
  import logging
2
+
3
  logger = logging.getLogger(__name__)
4
+ logging.basicConfig(level=logging.DEBUG)
5
 
6
  import gradio as gr
7
  import torah
 
13
  import pandas as pd
14
  from deep_translator import GoogleTranslator
15
  from gradio_calendar import Calendar
16
+ from datetime import datetime, timedelta
17
  import math
18
  import json
19
  import re
 
25
  DATABASE_FILE = 'gematria.db'
26
  MAX_PHRASE_LENGTH_LIMIT = 20
27
 
28
+
29
  # --- Database Initialization ---
30
  def initialize_database():
31
  global conn
 
56
  ''')
57
  conn.commit()
58
 
59
+
60
  # --- Initialize Database ---
61
  initialize_database()
62
 
63
+
64
  # --- Helper Functions (from Network app.py) ---
65
  def flatten_text(text: List) -> str:
66
  if isinstance(text, list):
67
  return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
68
  return text
69
 
70
+
71
  def search_gematria_in_db(gematria_sum: int, max_words: int) -> List[Tuple[str, str, int, int, int, str]]:
72
  global conn
73
  with sqlite3.connect(DATABASE_FILE) as conn:
 
80
  results = cursor.fetchall()
81
  return results
82
 
83
+
84
  def get_most_frequent_phrase(results):
85
  phrase_counts = defaultdict(int)
86
  for words, book, chapter, verse, phrase_length, word_position in results:
 
88
  most_frequent_phrase = max(phrase_counts, key=phrase_counts.get) if phrase_counts else None # Handle empty results
89
  return most_frequent_phrase
90
 
91
+
92
  # --- Functions from BOS app.py ---
93
  def create_language_dropdown(label, default_value='en', show_label=True):
94
  languages = GoogleTranslator(source='en', target='en').get_supported_languages(as_dict=True)
95
  return gr.Dropdown(
96
+ choices=list(languages.keys()),
97
+ label=label,
98
+ value=default_value,
99
+ show_label=show_label
100
  )
101
 
102
+
103
  def calculate_gematria_sum(text, date_words):
104
  if text or date_words:
105
  combined_input = f"{text} {date_words}"
 
112
  else:
113
  return None
114
 
115
+
116
+ def perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk,
117
+ merge_results, include_torah, include_bible, include_quran):
118
  if step == 0 or rounds_combination == "0,0":
119
  return None
120
+
121
  torah_results = []
122
  bible_results = []
123
  quran_results = []
124
+
125
  if include_torah:
126
+ torah_results.extend(
127
+ torah.process_json_files(1, 39, step, rounds_combination, 0, tlang, strip_spaces, strip_in_braces,
128
+ strip_diacritics))
129
+
130
  if include_bible:
131
+ bible_results.extend(
132
+ bible.process_json_files(40, 66, step, rounds_combination, 0, tlang, strip_spaces, strip_in_braces,
133
+ strip_diacritics))
134
+
135
  if include_quran:
136
+ quran_results.extend(
137
+ quran.process_json_files(1, 114, step, rounds_combination, 0, tlang, strip_spaces, strip_in_braces,
138
+ strip_diacritics))
139
 
140
  if merge_results:
141
  results = []
 
152
 
153
  return results
154
 
155
+
156
+ def generate_json_dump(start, end, step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk,
157
+ search_phrase, results_df, start_date, end_date):
158
+ """Generates the JSON dump with configuration, date range, and results."""
159
+ config = {
160
+ "Start Book": start,
161
+ "End Book": end,
162
+ "Step": step,
163
+ "Rounds": rounds_combination,
164
+ "Target Language": tlang,
165
+ "Strip Spaces": strip_spaces,
166
+ "Strip Text in Braces": strip_in_braces,
167
+ "Strip Diacritics": strip_diacritics_chk,
168
+ "Search Phrase": search_phrase
169
+ }
170
+ result = {
171
+ "Configuration": config,
172
+ "DateRange": {
173
+ "StartDate": start_date.strftime("%Y-%m-%d"),
174
+ "EndDate": end_date.strftime("%Y-%m-%d")
175
+ },
176
+ "Results": json.loads(results_df.to_json(orient='records', force_ascii=False))
177
+ }
178
+ logger.info(f"Generated JSON dump: {result}")
179
+ return json.dumps(result, indent=4, ensure_ascii=False)
180
+
181
+
182
+ def download_json_file(config_json, step, rounds_combination, strip_spaces, strip_in_braces, strip_diacritics_chk):
183
+ """Downloads the JSON config file with a descriptive name."""
184
+ filename_suffix = ""
185
+ if strip_spaces:
186
+ filename_suffix += "-stSp"
187
+ if strip_in_braces:
188
+ filename_suffix += "-stBr"
189
+ if strip_diacritics_chk:
190
+ filename_suffix += "-stDc"
191
+ file_path = f"step-{step}-rounds-{rounds_combination}{filename_suffix}.json" # Include rounds in filename
192
+ with open(file_path, "w", encoding='utf-8') as file:
193
+ file.write(config_json)
194
+ logger.info(f"Downloaded JSON file to: {file_path}")
195
+ return file_path
196
+
197
+
198
  # --- Main Gradio App ---
199
  with gr.Blocks() as app:
200
+ with gr.Row():
201
+ start_date = Calendar(type="datetime", label="Start Date")
202
+ end_date = Calendar(type="datetime", label="End Date")
203
+
204
  with gr.Row():
205
  tlang = create_language_dropdown("Target Language for Translation", default_value='english')
206
+ date_language_input = create_language_dropdown("Language of the person/topic (optional) (Date Word Language)",
207
+ default_value='english')
 
208
 
209
  with gr.Row():
210
  gematria_text = gr.Textbox(label="Name and/or Topic (required)", value="Hans Albert Einstein")
 
212
 
213
  with gr.Row():
214
  step = gr.Number(label="Jump Width (Steps) for ELS")
215
+ float_step = gr.Number(visible=False, value=1)
216
  half_step_btn = gr.Button("Steps / 2")
217
  double_step_btn = gr.Button("Steps * 2")
218
+
219
  with gr.Column():
220
  round_x = gr.Number(label="Round (1)", value=1)
221
  round_y = gr.Number(label="Round (2)", value=-1)
222
+
223
  rounds_combination = gr.Textbox(label="Combined Rounds", value="1,-1")
224
 
225
  with gr.Row():
 
227
  include_bible_chk = gr.Checkbox(label="Include Bible", value=True)
228
  include_quran_chk = gr.Checkbox(label="Include Quran", value=True)
229
  merge_results_chk = gr.Checkbox(label="Merge Results (Torah-Bible-Quran)", value=True)
230
+
231
  strip_spaces = gr.Checkbox(label="Strip Spaces from Books", value=True)
232
  strip_in_braces = gr.Checkbox(label="Strip Text in Braces from Books", value=True)
233
  strip_diacritics_chk = gr.Checkbox(label="Strip Diacritics from Books", value=True)
 
237
  # --- Output Components ---
238
  markdown_output = gr.Dataframe(label="ELS Results")
239
  most_frequent_phrase_output = gr.Textbox(label="Most Frequent Phrase in Network Search")
240
+ json_output = gr.Textbox(label="JSON Configuration Output")
241
+ json_download_btn = gr.Button("Prepare .json for Download")
242
+ json_file = gr.File(label="Download Config JSON", file_count="single")
243
 
 
 
 
244
 
245
+ # --- Event Handlers ---
246
  def update_journal_sum(gematria_text, date_words_output):
247
  sum_value = calculate_gematria_sum(gematria_text, date_words_output)
248
  return sum_value, sum_value, sum_value
249
 
250
+
251
  def update_rounds_combination(round_x, round_y):
252
  return f"{int(round_x)},{int(round_y)}"
253
 
254
+
255
  def update_step_half(float_step):
256
  new_step = math.ceil(float_step / 2)
257
  return new_step, float_step / 2
258
 
259
+
260
  def update_step_double(float_step):
261
  new_step = math.ceil(float_step * 2)
262
  return new_step, float_step * 2
263
 
264
 
265
+ def perform_search(start_date, end_date, date_language_input, step, rounds_combination, tlang, strip_spaces,
266
+ strip_in_braces, strip_diacritics_chk, merge_results, include_torah, include_bible,
267
+ include_quran, gematria_text):
268
+ all_results = []
269
+ delta = timedelta(days=1)
270
+ current_date = start_date
271
+
272
+ while current_date <= end_date:
273
+ date_words_output = translate_date_to_words(current_date, date_language_input)
274
+ journal_sum, _, _ = update_journal_sum(gematria_text, date_words_output)
275
+ step = journal_sum
276
+
277
+ els_results = perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces,
278
+ strip_diacritics_chk, merge_results, include_torah, include_bible,
279
+ include_quran)
280
+
281
+ # --- Network Search Integration ---
282
+ updated_els_results = []
283
+ for result in els_results:
284
+ try:
285
+ gematria_sum = calculate_gematria(result['result_text'])
286
+ except KeyError as e:
287
+ print(f"DEBUG: KeyError - Key '{e.args[0]}' not found in result. Skipping this result.")
288
+ continue
289
+
290
+ max_words = len(result['result_text'].split())
291
  matching_phrases = search_gematria_in_db(gematria_sum, max_words)
292
 
293
+ # Iteratively increase max_words if no results are found
294
+ max_words_limit = 20 # Set a limit for max_words
295
+ while not matching_phrases and max_words < max_words_limit:
296
+ max_words += 1
297
+ matching_phrases = search_gematria_in_db(gematria_sum, max_words)
298
+
299
+ # Find most frequent phrase or first phrase with lowest word count
300
+ if matching_phrases:
301
+ most_frequent_phrase = get_most_frequent_phrase(matching_phrases)
302
+ else:
303
+ # Sort initial results by word count and take the first phrase
304
+ sorted_results = sorted(search_gematria_in_db(gematria_sum, max_words_limit),
305
+ key=lambda x: len(x[0].split()))
306
+ most_frequent_phrase = sorted_results[0][0] if sorted_results else ""
307
+
308
+ # Add most frequent phrase, date, and date_words to the result dictionary
309
+ result['Most Frequent Phrase'] = most_frequent_phrase
310
+ result['Date'] = current_date.strftime('%Y-%m-%d')
311
+ result['Date Words'] = date_words_output
312
+ updated_els_results.append(result)
313
+
314
+ all_results.extend(updated_els_results)
315
+ current_date += delta
316
 
317
  # --- Prepare Dataframe ---
318
+ df = pd.DataFrame(all_results)
319
  df.index = range(1, len(df) + 1)
320
  df.reset_index(inplace=True)
321
  df.rename(columns={'index': 'Result Number'}, inplace=True)
322
 
323
+ # Find the most frequent phrase across all dates
324
+ all_phrases = [result['Most Frequent Phrase'] for result in all_results]
325
+ most_frequent_phrase = max(set(all_phrases), key=all_phrases.count) if all_phrases else ""
326
+
327
+ # Generate JSON output
328
+ search_phrase = f"{gematria_text}" # Removed date_words_output as it's now included in each result
329
+ config_json = generate_json_dump(1, 180, step, rounds_combination, tlang, strip_spaces, strip_in_braces,
330
+ strip_diacritics_chk, search_phrase, df, start_date, end_date)
331
+
332
+ return df, most_frequent_phrase, config_json
333
 
334
 
335
+ def handle_json_download(config_json, step, rounds_combination, strip_spaces, strip_in_braces,
336
+ strip_diacritics_chk):
337
+ """Handles the download of the JSON config file."""
338
+ return download_json_file(config_json, step, rounds_combination, strip_spaces, strip_in_braces,
339
+ strip_diacritics_chk)
340
+
341
 
342
  # --- Event Triggers ---
343
  round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
344
  round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
345
 
 
 
 
 
 
 
346
  half_step_btn.click(update_step_half, inputs=[float_step], outputs=[step, float_step])
347
  double_step_btn.click(update_step_double, inputs=[float_step], outputs=[step, float_step])
348
 
349
  translate_btn.click(
350
  perform_search,
351
+ inputs=[start_date, end_date, date_language_input, step, rounds_combination, tlang, strip_spaces,
352
+ strip_in_braces, strip_diacritics_chk, merge_results_chk, include_torah_chk, include_bible_chk,
353
+ include_quran_chk, gematria_text],
354
+ outputs=[markdown_output, most_frequent_phrase_output, json_output]
355
  )
356
 
357
+ json_download_btn.click(
358
+ handle_json_download,
359
+ inputs=[json_output, step, rounds_combination, strip_spaces, strip_in_braces, strip_diacritics_chk],
360
+ outputs=[json_file]
 
 
 
 
 
361
  )
362
 
363
  if __name__ == "__main__":
364
+ app.launch(share=False)
bible.py CHANGED
@@ -57,32 +57,62 @@ import os
57
  import re
58
  import csv
59
 
60
- def process_json_files(start=1, end=66, step=1, rounds="1", length=0, tlang="en", strip_spaces=True, strip_in_braces=True, strip_diacritics=True, average_compile=False):
 
 
61
  file_name = "texts/bible/OpenGNT_version3_3.csv"
62
  translator = GoogleTranslator(source='auto', target=tlang)
63
  results = []
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  try:
66
  with open(file_name, 'r', encoding='utf-8') as file:
67
  reader = csv.DictReader(file, delimiter='\t')
68
-
69
  book_texts = {}
70
  current_book = None
71
  for row in reader:
72
- #logger.debug(f"Processing row: {row}")
73
  book = int(row['〔Book|Chapter|Verse〕'].split('|')[0][1:])
74
  if book < start or book > end:
75
- #logger.debug(f"Skipping book {book} (out of range)")
76
  continue
77
  if current_book != book:
78
  current_book = book
79
  book_texts[book] = ""
80
  greek_text = row['〔OGNTk|OGNTu|OGNTa|lexeme|rmac|sn〕']
81
- greek_text = greek_text.split('〔')[1]
82
  greek_text = greek_text.split('|')[0]
83
- #print(greek_text)
84
  book_texts[book] += greek_text + " "
85
- #print(book_texts)
86
  for book, full_text in book_texts.items():
87
  logger.debug(f"Processing book {book}")
88
  clean_text = full_text
@@ -103,7 +133,7 @@ def process_json_files(start=1, end=66, step=1, rounds="1", length=0, tlang="en"
103
  for round_num in map(int, rounds.split(',')):
104
  if not (round_num == 1 and step > text_length) and not (round_num == -1 and step > text_length):
105
  if round_num > 0:
106
- current_position = step - 1
107
  else:
108
  current_position = text_length - 1 if step == 1 else text_length - step
109
 
@@ -133,24 +163,25 @@ def process_json_files(start=1, end=66, step=1, rounds="1", length=0, tlang="en"
133
 
134
  translated_text = translator.translate(result_text) if result_text else ""
135
 
 
 
136
  if result_text:
137
  logger.debug(f"Result for book {book}: {result_text}")
138
- results.append({
139
- "book": book,
140
- "result_text": result_text,
141
- "result_sum": calculate_gematria(result_text),
142
- "translated_text": translated_text
143
- })
 
 
 
144
 
145
  except FileNotFoundError:
146
  results.append({"error": f"File {file_name} not found."})
147
 
148
  return results
149
-
150
-
151
-
152
-
153
- # Tests
154
  test_results = [
155
  #(process_json_files(1, 1, 21, rounds="3", length=0), ""),
156
  #(process_json_files(1, 1, 22, rounds="1", length=0), ""),
 
57
  import re
58
  import csv
59
 
60
+
61
+ def process_json_files(start=1, end=66, step=1, rounds="1", length=0, tlang="en", strip_spaces=True,
62
+ strip_in_braces=True, strip_diacritics=True, average_compile=False):
63
  file_name = "texts/bible/OpenGNT_version3_3.csv"
64
  translator = GoogleTranslator(source='auto', target=tlang)
65
  results = []
66
 
67
+ # Dictionary für die 27 Bücher des Neuen Testaments (Englische Namen)
68
+ nt_books = {
69
+ 40: "Matthew",
70
+ 41: "Mark",
71
+ 42: "Luke",
72
+ 43: "John",
73
+ 44: "Acts",
74
+ 45: "Romans",
75
+ 46: "1. Corinthians",
76
+ 47: "2. Corinthians",
77
+ 48: "Galatians",
78
+ 49: "Ephesians",
79
+ 50: "Philippians",
80
+ 51: "Colossians",
81
+ 52: "1. Thessalonians",
82
+ 53: "2. Thessalonians",
83
+ 54: "1. Timothy",
84
+ 55: "2. Timothy",
85
+ 56: "Titus",
86
+ 57: "Philemon",
87
+ 58: "Hebrews",
88
+ 59: "James",
89
+ 60: "1. Peter",
90
+ 61: "2. Peter",
91
+ 62: "1. John",
92
+ 63: "2. John",
93
+ 64: "3. John",
94
+ 65: "Jude",
95
+ 66: "Revelation"
96
+ }
97
+
98
  try:
99
  with open(file_name, 'r', encoding='utf-8') as file:
100
  reader = csv.DictReader(file, delimiter='\t')
101
+
102
  book_texts = {}
103
  current_book = None
104
  for row in reader:
 
105
  book = int(row['〔Book|Chapter|Verse〕'].split('|')[0][1:])
106
  if book < start or book > end:
 
107
  continue
108
  if current_book != book:
109
  current_book = book
110
  book_texts[book] = ""
111
  greek_text = row['〔OGNTk|OGNTu|OGNTa|lexeme|rmac|sn〕']
112
+ greek_text = greek_text.split('〔')[1]
113
  greek_text = greek_text.split('|')[0]
 
114
  book_texts[book] += greek_text + " "
115
+
116
  for book, full_text in book_texts.items():
117
  logger.debug(f"Processing book {book}")
118
  clean_text = full_text
 
133
  for round_num in map(int, rounds.split(',')):
134
  if not (round_num == 1 and step > text_length) and not (round_num == -1 and step > text_length):
135
  if round_num > 0:
136
+ current_position = step - 1
137
  else:
138
  current_position = text_length - 1 if step == 1 else text_length - step
139
 
 
163
 
164
  translated_text = translator.translate(result_text) if result_text else ""
165
 
166
+ result_sum = calculate_gematria(result_text)
167
+
168
  if result_text:
169
  logger.debug(f"Result for book {book}: {result_text}")
170
+ result = {
171
+ 'book': book, # Use the correct 'book' variable
172
+ 'title': nt_books.get(book, "Unknown Book"), # Get book name from dictionary
173
+ 'result_text': result_text,
174
+ 'result_sum': result_sum, # Make sure result_sum is calculated correctly
175
+ 'translated_text': translated_text
176
+ }
177
+ results.append(result)
178
+
179
 
180
  except FileNotFoundError:
181
  results.append({"error": f"File {file_name} not found."})
182
 
183
  return results
184
+ # Tests
 
 
 
 
185
  test_results = [
186
  #(process_json_files(1, 1, 21, rounds="3", length=0), ""),
187
  #(process_json_files(1, 1, 22, rounds="1", length=0), ""),
gematria.py CHANGED
@@ -1,4 +1,7 @@
1
  import unicodedata
 
 
 
2
 
3
  def strip_diacritics(text):
4
  """
@@ -10,7 +13,7 @@ def strip_diacritics(text):
10
  if unicodedata.category(char) not in ['Mn', 'Cf']:
11
  stripped_text += char
12
  else:
13
- print(f"Info: Diakritisches Zeichen '{char}' wird ignoriert.")
14
  return stripped_text
15
 
16
  def letter_to_value(letter):
@@ -75,6 +78,8 @@ def letter_to_value(letter):
75
  'Σ': 200, 'Τ': 300, 'Υ': 400, 'Φ': 500, 'Χ': 600, 'Ψ': 700, 'Ω': 800, 'Ϡ': 900,
76
  'σ': 200, # Sigma
77
  'ς': 200, # Final Sigma
 
 
78
  }
79
 
80
  # Stelle sicher, dass Diakritika entfernt werden, bevor auf das Wörterbuch zugegriffen wird
@@ -86,7 +91,7 @@ def letter_to_value(letter):
86
  return 0
87
  else:
88
  # Gib eine spezifische Warnung aus, wenn das Zeichen unbekannt ist
89
- print(f"Warnung: Unbekanntes Zeichen '{letter}' ignoriert.")
90
  return 0
91
 
92
 
 
1
  import unicodedata
2
+ import logging
3
+
4
+ logger = logging.getLogger(__name__)
5
 
6
  def strip_diacritics(text):
7
  """
 
13
  if unicodedata.category(char) not in ['Mn', 'Cf']:
14
  stripped_text += char
15
  else:
16
+ logger.info(f"Info: Diakritisches Zeichen '{char}' wird ignoriert.")
17
  return stripped_text
18
 
19
  def letter_to_value(letter):
 
78
  'Σ': 200, 'Τ': 300, 'Υ': 400, 'Φ': 500, 'Χ': 600, 'Ψ': 700, 'Ω': 800, 'Ϡ': 900,
79
  'σ': 200, # Sigma
80
  'ς': 200, # Final Sigma
81
+ 'ϲ': 200, # Lunate Sigma (Greek)
82
+ 'Ϲ': 200, # Uppercase Lunate Sigma (Greek)
83
  }
84
 
85
  # Stelle sicher, dass Diakritika entfernt werden, bevor auf das Wörterbuch zugegriffen wird
 
91
  return 0
92
  else:
93
  # Gib eine spezifische Warnung aus, wenn das Zeichen unbekannt ist
94
+ logger.info(f"Warnung: Unbekanntes Zeichen '{letter}' ignoriert.")
95
  return 0
96
 
97