bartman081523 commited on
Commit
26e6a7b
·
1 Parent(s): 8be00b3

book:unknown fix

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +84 -77
  3. gematria.db +2 -2
.gitignore CHANGED
@@ -1 +1,2 @@
1
  __pycache__/
 
 
1
  __pycache__/
2
+ gematria.db.bak
app.py CHANGED
@@ -10,9 +10,9 @@ from deep_translator import GoogleTranslator, exceptions
10
  from urllib.parse import quote_plus
11
 
12
  # Set up logging
13
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
14
 
15
- # Global variables for database connection, translator and book names
16
  conn = None
17
  translator = None
18
  book_names = {}
@@ -29,7 +29,7 @@ def initialize_database():
29
  gematria_sum INTEGER,
30
  words TEXT,
31
  translation TEXT,
32
- book INTEGER,
33
  chapter INTEGER,
34
  verse INTEGER,
35
  PRIMARY KEY (gematria_sum, book, chapter, verse)
@@ -37,7 +37,7 @@ def initialize_database():
37
  ''')
38
  cursor.execute('''
39
  CREATE TABLE IF NOT EXISTS processed_books (
40
- book INTEGER PRIMARY KEY,
41
  max_phrase_length INTEGER
42
  )
43
  ''')
@@ -50,52 +50,62 @@ def initialize_translator():
50
  translator = GoogleTranslator(source='iw', target='en')
51
  logging.info("Translator initialized.")
52
 
53
- def populate_database(tanach_texts, max_phrase_length=1):
54
  """Populates the database with phrases from the Tanach and their Gematria values."""
55
  global conn, book_names
56
- logging.info("Populating database...")
57
  cursor = conn.cursor()
58
 
59
- for book_id, book_data in tanach_texts.items():
60
- # Check if the book is already processed for this max_phrase_length
61
- cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (book_id,))
62
- result = cursor.fetchone()
63
- if result and result[0] >= max_phrase_length:
64
- logging.info(f"Skipping book {book_id}: Already processed with max_phrase_length {result[0]}")
65
- continue
66
 
67
- logging.info(f"Processing book {book_id} with max_phrase_length {max_phrase_length}")
 
 
 
 
 
 
 
 
 
68
 
69
- if 'text' not in book_data or not isinstance(book_data['text'], list):
70
- logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.")
71
- continue
 
 
 
72
 
73
- title = book_data.get('title', 'Unknown')
74
- book_names[book_id] = title
75
 
76
- chapters = book_data['text']
77
- for chapter_id, chapter in enumerate(chapters):
78
- if not isinstance(chapter, list):
79
- logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
80
  continue
81
- for verse_id, verse in enumerate(chapter):
82
- verse_text = flatten_text(verse)
83
- # Remove text in square brackets
84
- verse_text = re.sub(r'\[.*?\]', '', verse_text)
85
- verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
86
- verse_text = re.sub(r" +", " ", verse_text)
87
- words = verse_text.split()
88
-
89
- # Iterate through phrases of different lengths
90
- for length in range(1, max_phrase_length + 1):
91
- for start in range(len(words) - length + 1):
92
- phrase_candidate = " ".join(words[start:start + length])
93
- gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
94
- insert_phrase_to_db(gematria_sum, phrase_candidate, book_id, chapter_id + 1, verse_id + 1)
95
-
96
- # Mark the book as processed for this max_phrase_length
97
- cursor.execute('''INSERT OR REPLACE INTO processed_books (book, max_phrase_length) VALUES (?, ?)''', (book_id, max_phrase_length))
98
- conn.commit()
 
 
 
 
 
 
99
  logging.info("Database population complete.")
100
 
101
  def insert_phrase_to_db(gematria_sum, phrase_candidate, book, chapter, verse):
@@ -104,8 +114,8 @@ def insert_phrase_to_db(gematria_sum, phrase_candidate, book, chapter, verse):
104
  cursor = conn.cursor()
105
  try:
106
  cursor.execute('''
107
- INSERT INTO results (gematria_sum, words, book, chapter, verse)
108
- VALUES (?, ?, ?, ?, ?)
109
  ''', (gematria_sum, phrase_candidate, book, chapter, verse))
110
  conn.commit()
111
  logging.debug(f"Inserted phrase: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
@@ -126,9 +136,9 @@ def get_translation(phrase):
126
  else:
127
  translation = translate_and_store(phrase)
128
  cursor.execute('''
129
- UPDATE results
130
- SET translation = ?
131
- WHERE words = ?
132
  ''', (translation, phrase))
133
  conn.commit()
134
  return translation
@@ -136,7 +146,7 @@ def get_translation(phrase):
136
  def translate_and_store(phrase):
137
  """Translates a Hebrew phrase to English using Google Translate and handles potential errors."""
138
  global translator
139
- max_retries = 3
140
  retries = 0
141
 
142
  while retries < max_retries:
@@ -145,7 +155,7 @@ def translate_and_store(phrase):
145
  logging.debug(f"Translated phrase: {translation}")
146
  return translation
147
  except (exceptions.TranslationNotFound, exceptions.NotValidPayload,
148
- exceptions.ServerException, exceptions.RequestError, requests.exceptions.ConnectionError) as e:
149
  retries += 1
150
  logging.warning(f"Error translating phrase '{phrase}': {e}. Retrying... ({retries}/{max_retries})")
151
 
@@ -191,20 +201,19 @@ def gematria_search_interface(phrase):
191
  results = []
192
  results.append("<div class='results-container'>")
193
  for book, phrases in results_by_book.items():
194
- results.append(f"<h4>Book: {book_names.get(book, 'Unknown')}</h4>")
195
  for words, chapter, verse in phrases:
196
  translation = get_translation(words)
197
- book_name_english = book_names.get(book, 'Unknown')
198
- link = f"https://www.biblegateway.com/passage/?search={quote_plus(book_name_english)}+{chapter}%3A{verse}&version=CJB"
199
  results.append(f"""
200
- <div class='result-item'>
201
- <p>Chapter: {chapter}, Verse: {verse}</p>
202
- <p class='hebrew-phrase'>Hebrew Phrase: {words}</p>
203
- <p>Translation: {translation}</p>
204
- <a href='{link}' target='_blank' class='bible-link'>[See on Bible Gateway]</a>
205
- </div>
206
- """)
207
- results.append("</div>") # Close results-container div
208
 
209
  conn.close()
210
 
@@ -212,33 +221,33 @@ def gematria_search_interface(phrase):
212
  style = """
213
  <style>
214
  .results-container {
215
- display: grid;
216
- grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
217
- gap: 20px;
218
  }
219
 
220
  .result-item {
221
- border: 1px solid #ccc;
222
- padding: 15px;
223
- border-radius: 5px;
224
- box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.1);
225
  }
226
 
227
  .hebrew-phrase {
228
- font-family: 'SBL Hebrew', 'Ezra SIL', serif;
229
- direction: rtl;
230
  }
231
 
232
  .bible-link {
233
- display: block;
234
- margin-top: 10px;
235
- color: #007bff;
236
- text-decoration: none;
237
  }
238
  </style>
239
  """
240
 
241
- return style + "\n".join(results)
242
 
243
  def flatten_text(text):
244
  """Helper function to flatten nested lists into a single list."""
@@ -252,10 +261,8 @@ def run_app():
252
  initialize_translator()
253
 
254
  # Pre-populate the database
255
- tanach_texts = process_json_files(1, 39)
256
- populate_database(tanach_texts, max_phrase_length=5)
257
- tanach_texts = process_json_files(27, 27)
258
- populate_database(tanach_texts, max_phrase_length=24)
259
 
260
  iface = gr.Interface(
261
  fn=gematria_search_interface,
 
10
  from urllib.parse import quote_plus
11
 
12
  # Set up logging
13
+ logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
14
 
15
+ # Global variables for database connection, translator, and book names
16
  conn = None
17
  translator = None
18
  book_names = {}
 
29
  gematria_sum INTEGER,
30
  words TEXT,
31
  translation TEXT,
32
+ book TEXT, -- Store book name directly
33
  chapter INTEGER,
34
  verse INTEGER,
35
  PRIMARY KEY (gematria_sum, book, chapter, verse)
 
37
  ''')
38
  cursor.execute('''
39
  CREATE TABLE IF NOT EXISTS processed_books (
40
+ book TEXT PRIMARY KEY, -- Store book name directly
41
  max_phrase_length INTEGER
42
  )
43
  ''')
 
50
  translator = GoogleTranslator(source='iw', target='en')
51
  logging.info("Translator initialized.")
52
 
53
+ def populate_database(start_book, end_book, max_phrase_length=1):
54
  """Populates the database with phrases from the Tanach and their Gematria values."""
55
  global conn, book_names
56
+ logging.info(f"Populating database with books from {start_book} to {end_book}...")
57
  cursor = conn.cursor()
58
 
59
+ for book_id in range(start_book, end_book + 1):
60
+ book_data = process_json_files(book_id, book_id) # Get data for the single book
 
 
 
 
 
61
 
62
+ # process_json_files returns a dictionary with book_id as key,
63
+ # so access the book data directly
64
+ if book_id in book_data:
65
+ book_data = book_data[book_id]
66
+ if 'title' not in book_data or not isinstance(book_data['title'], str):
67
+ logging.warning(f"Skipping book {book_id} due to missing or invalid 'title' field.")
68
+ continue
69
+
70
+ title = book_data['title']
71
+ book_names[book_id] = title
72
 
73
+ # Check if the book is already processed for this max_phrase_length
74
+ cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (title,))
75
+ result = cursor.fetchone()
76
+ if result and result[0] >= max_phrase_length:
77
+ logging.info(f"Skipping book {title}: Already processed with max_phrase_length {result[0]}")
78
+ continue
79
 
80
+ logging.info(f"Processing book {title} with max_phrase_length {max_phrase_length}")
 
81
 
82
+ if 'text' not in book_data or not isinstance(book_data['text'], list):
83
+ logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.")
 
 
84
  continue
85
+
86
+ chapters = book_data['text']
87
+ for chapter_id, chapter in enumerate(chapters):
88
+ if not isinstance(chapter, list):
89
+ logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
90
+ continue
91
+ for verse_id, verse in enumerate(chapter):
92
+ verse_text = flatten_text(verse)
93
+ # Remove text in square brackets
94
+ verse_text = re.sub(r'\[.*?\]', '', verse_text)
95
+ verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
96
+ verse_text = re.sub(r" +", " ", verse_text)
97
+ words = verse_text.split()
98
+
99
+ # Iterate through phrases of different lengths
100
+ for length in range(1, max_phrase_length + 1):
101
+ for start in range(len(words) - length + 1):
102
+ phrase_candidate = " ".join(words[start:start + length])
103
+ gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
104
+ insert_phrase_to_db(gematria_sum, phrase_candidate, title, chapter_id + 1, verse_id + 1)
105
+
106
+ # Mark the book as processed for this max_phrase_length
107
+ cursor.execute('''INSERT OR REPLACE INTO processed_books (book, max_phrase_length) VALUES (?, ?)''', (title, max_phrase_length))
108
+ conn.commit()
109
  logging.info("Database population complete.")
110
 
111
  def insert_phrase_to_db(gematria_sum, phrase_candidate, book, chapter, verse):
 
114
  cursor = conn.cursor()
115
  try:
116
  cursor.execute('''
117
+ INSERT INTO results (gematria_sum, words, book, chapter, verse)
118
+ VALUES (?, ?, ?, ?, ?)
119
  ''', (gematria_sum, phrase_candidate, book, chapter, verse))
120
  conn.commit()
121
  logging.debug(f"Inserted phrase: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
 
136
  else:
137
  translation = translate_and_store(phrase)
138
  cursor.execute('''
139
+ UPDATE results
140
+ SET translation = ?
141
+ WHERE words = ?
142
  ''', (translation, phrase))
143
  conn.commit()
144
  return translation
 
146
  def translate_and_store(phrase):
147
  """Translates a Hebrew phrase to English using Google Translate and handles potential errors."""
148
  global translator
149
+ max_retries = 3
150
  retries = 0
151
 
152
  while retries < max_retries:
 
155
  logging.debug(f"Translated phrase: {translation}")
156
  return translation
157
  except (exceptions.TranslationNotFound, exceptions.NotValidPayload,
158
+ exceptions.ServerException, exceptions.RequestError, requests.exceptions.ConnectionError) as e:
159
  retries += 1
160
  logging.warning(f"Error translating phrase '{phrase}': {e}. Retrying... ({retries}/{max_retries})")
161
 
 
201
  results = []
202
  results.append("<div class='results-container'>")
203
  for book, phrases in results_by_book.items():
204
+ results.append(f"<h4>Book: {book}</h4>") # Directly display book name
205
  for words, chapter, verse in phrases:
206
  translation = get_translation(words)
207
+ link = f"https://www.biblegateway.com/passage/?search={quote_plus(book)}+{chapter}%3A{verse}&version=CJB"
 
208
  results.append(f"""
209
+ <div class='result-item'>
210
+ <p>Chapter: {chapter}, Verse: {verse}</p>
211
+ <p class='hebrew-phrase'>Hebrew Phrase: {words}</p>
212
+ <p>Translation: {translation}</p>
213
+ <a href='{link}' target='_blank' class='bible-link'>[See on Bible Gateway]</a>
214
+ </div>
215
+ """)
216
+ results.append("</div>") # Close results-container div
217
 
218
  conn.close()
219
 
 
221
  style = """
222
  <style>
223
  .results-container {
224
+ display: grid;
225
+ grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
226
+ gap: 20px;
227
  }
228
 
229
  .result-item {
230
+ border: 1px solid #ccc;
231
+ padding: 15px;
232
+ border-radius: 5px;
233
+ box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.1);
234
  }
235
 
236
  .hebrew-phrase {
237
+ font-family: 'SBL Hebrew', 'Ezra SIL', serif;
238
+ direction: rtl;
239
  }
240
 
241
  .bible-link {
242
+ display: block;
243
+ margin-top: 10px;
244
+ color: #007bff;
245
+ text-decoration: none;
246
  }
247
  </style>
248
  """
249
 
250
+ return style + "\n".join(results)
251
 
252
  def flatten_text(text):
253
  """Helper function to flatten nested lists into a single list."""
 
261
  initialize_translator()
262
 
263
  # Pre-populate the database
264
+ populate_database(1, 39, max_phrase_length=1) # Books 1 to 39 (adjust as needed)
265
+ #populate_database(27, 27, max_phrase_length=1) # Book 27 (Psalms) - adjust as needed
 
 
266
 
267
  iface = gr.Interface(
268
  fn=gematria_search_interface,
gematria.db CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06faa206f211a61ddf8609791d575035fba1f81f092b9b4a5a30a24d7d4bc2f0
3
- size 69844992
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee62e36845670369178014d353e35dcc219749ff181873d7af325410479a4537
3
+ size 18497536