Spaces:
Sleeping
Sleeping
bartman081523
commited on
Commit
·
26e6a7b
1
Parent(s):
8be00b3
book:unknown fix
Browse files- .gitignore +1 -0
- app.py +84 -77
- gematria.db +2 -2
.gitignore
CHANGED
@@ -1 +1,2 @@
|
|
1 |
__pycache__/
|
|
|
|
1 |
__pycache__/
|
2 |
+
gematria.db.bak
|
app.py
CHANGED
@@ -10,9 +10,9 @@ from deep_translator import GoogleTranslator, exceptions
|
|
10 |
from urllib.parse import quote_plus
|
11 |
|
12 |
# Set up logging
|
13 |
-
logging.basicConfig(level=logging.
|
14 |
|
15 |
-
# Global variables for database connection, translator and book names
|
16 |
conn = None
|
17 |
translator = None
|
18 |
book_names = {}
|
@@ -29,7 +29,7 @@ def initialize_database():
|
|
29 |
gematria_sum INTEGER,
|
30 |
words TEXT,
|
31 |
translation TEXT,
|
32 |
-
book
|
33 |
chapter INTEGER,
|
34 |
verse INTEGER,
|
35 |
PRIMARY KEY (gematria_sum, book, chapter, verse)
|
@@ -37,7 +37,7 @@ def initialize_database():
|
|
37 |
''')
|
38 |
cursor.execute('''
|
39 |
CREATE TABLE IF NOT EXISTS processed_books (
|
40 |
-
book
|
41 |
max_phrase_length INTEGER
|
42 |
)
|
43 |
''')
|
@@ -50,52 +50,62 @@ def initialize_translator():
|
|
50 |
translator = GoogleTranslator(source='iw', target='en')
|
51 |
logging.info("Translator initialized.")
|
52 |
|
53 |
-
def populate_database(
|
54 |
"""Populates the database with phrases from the Tanach and their Gematria values."""
|
55 |
global conn, book_names
|
56 |
-
logging.info("Populating database...")
|
57 |
cursor = conn.cursor()
|
58 |
|
59 |
-
for book_id,
|
60 |
-
|
61 |
-
cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (book_id,))
|
62 |
-
result = cursor.fetchone()
|
63 |
-
if result and result[0] >= max_phrase_length:
|
64 |
-
logging.info(f"Skipping book {book_id}: Already processed with max_phrase_length {result[0]}")
|
65 |
-
continue
|
66 |
|
67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
72 |
|
73 |
-
|
74 |
-
book_names[book_id] = title
|
75 |
|
76 |
-
|
77 |
-
|
78 |
-
if not isinstance(chapter, list):
|
79 |
-
logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
|
80 |
continue
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
logging.info("Database population complete.")
|
100 |
|
101 |
def insert_phrase_to_db(gematria_sum, phrase_candidate, book, chapter, verse):
|
@@ -104,8 +114,8 @@ def insert_phrase_to_db(gematria_sum, phrase_candidate, book, chapter, verse):
|
|
104 |
cursor = conn.cursor()
|
105 |
try:
|
106 |
cursor.execute('''
|
107 |
-
|
108 |
-
|
109 |
''', (gematria_sum, phrase_candidate, book, chapter, verse))
|
110 |
conn.commit()
|
111 |
logging.debug(f"Inserted phrase: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
|
@@ -126,9 +136,9 @@ def get_translation(phrase):
|
|
126 |
else:
|
127 |
translation = translate_and_store(phrase)
|
128 |
cursor.execute('''
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
''', (translation, phrase))
|
133 |
conn.commit()
|
134 |
return translation
|
@@ -136,7 +146,7 @@ def get_translation(phrase):
|
|
136 |
def translate_and_store(phrase):
|
137 |
"""Translates a Hebrew phrase to English using Google Translate and handles potential errors."""
|
138 |
global translator
|
139 |
-
max_retries = 3
|
140 |
retries = 0
|
141 |
|
142 |
while retries < max_retries:
|
@@ -145,7 +155,7 @@ def translate_and_store(phrase):
|
|
145 |
logging.debug(f"Translated phrase: {translation}")
|
146 |
return translation
|
147 |
except (exceptions.TranslationNotFound, exceptions.NotValidPayload,
|
148 |
-
|
149 |
retries += 1
|
150 |
logging.warning(f"Error translating phrase '{phrase}': {e}. Retrying... ({retries}/{max_retries})")
|
151 |
|
@@ -191,20 +201,19 @@ def gematria_search_interface(phrase):
|
|
191 |
results = []
|
192 |
results.append("<div class='results-container'>")
|
193 |
for book, phrases in results_by_book.items():
|
194 |
-
results.append(f"<h4>Book: {
|
195 |
for words, chapter, verse in phrases:
|
196 |
translation = get_translation(words)
|
197 |
-
|
198 |
-
link = f"https://www.biblegateway.com/passage/?search={quote_plus(book_name_english)}+{chapter}%3A{verse}&version=CJB"
|
199 |
results.append(f"""
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
results.append("</div>")
|
208 |
|
209 |
conn.close()
|
210 |
|
@@ -212,33 +221,33 @@ def gematria_search_interface(phrase):
|
|
212 |
style = """
|
213 |
<style>
|
214 |
.results-container {
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
}
|
219 |
|
220 |
.result-item {
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
}
|
226 |
|
227 |
.hebrew-phrase {
|
228 |
-
|
229 |
-
|
230 |
}
|
231 |
|
232 |
.bible-link {
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
}
|
238 |
</style>
|
239 |
"""
|
240 |
|
241 |
-
return style + "\n".join(results)
|
242 |
|
243 |
def flatten_text(text):
|
244 |
"""Helper function to flatten nested lists into a single list."""
|
@@ -252,10 +261,8 @@ def run_app():
|
|
252 |
initialize_translator()
|
253 |
|
254 |
# Pre-populate the database
|
255 |
-
|
256 |
-
populate_database(
|
257 |
-
tanach_texts = process_json_files(27, 27)
|
258 |
-
populate_database(tanach_texts, max_phrase_length=24)
|
259 |
|
260 |
iface = gr.Interface(
|
261 |
fn=gematria_search_interface,
|
|
|
10 |
from urllib.parse import quote_plus
|
11 |
|
12 |
# Set up logging
|
13 |
+
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
14 |
|
15 |
+
# Global variables for database connection, translator, and book names
|
16 |
conn = None
|
17 |
translator = None
|
18 |
book_names = {}
|
|
|
29 |
gematria_sum INTEGER,
|
30 |
words TEXT,
|
31 |
translation TEXT,
|
32 |
+
book TEXT, -- Store book name directly
|
33 |
chapter INTEGER,
|
34 |
verse INTEGER,
|
35 |
PRIMARY KEY (gematria_sum, book, chapter, verse)
|
|
|
37 |
''')
|
38 |
cursor.execute('''
|
39 |
CREATE TABLE IF NOT EXISTS processed_books (
|
40 |
+
book TEXT PRIMARY KEY, -- Store book name directly
|
41 |
max_phrase_length INTEGER
|
42 |
)
|
43 |
''')
|
|
|
50 |
translator = GoogleTranslator(source='iw', target='en')
|
51 |
logging.info("Translator initialized.")
|
52 |
|
53 |
+
def populate_database(start_book, end_book, max_phrase_length=1):
|
54 |
"""Populates the database with phrases from the Tanach and their Gematria values."""
|
55 |
global conn, book_names
|
56 |
+
logging.info(f"Populating database with books from {start_book} to {end_book}...")
|
57 |
cursor = conn.cursor()
|
58 |
|
59 |
+
for book_id in range(start_book, end_book + 1):
|
60 |
+
book_data = process_json_files(book_id, book_id) # Get data for the single book
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
+
# process_json_files returns a dictionary with book_id as key,
|
63 |
+
# so access the book data directly
|
64 |
+
if book_id in book_data:
|
65 |
+
book_data = book_data[book_id]
|
66 |
+
if 'title' not in book_data or not isinstance(book_data['title'], str):
|
67 |
+
logging.warning(f"Skipping book {book_id} due to missing or invalid 'title' field.")
|
68 |
+
continue
|
69 |
+
|
70 |
+
title = book_data['title']
|
71 |
+
book_names[book_id] = title
|
72 |
|
73 |
+
# Check if the book is already processed for this max_phrase_length
|
74 |
+
cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (title,))
|
75 |
+
result = cursor.fetchone()
|
76 |
+
if result and result[0] >= max_phrase_length:
|
77 |
+
logging.info(f"Skipping book {title}: Already processed with max_phrase_length {result[0]}")
|
78 |
+
continue
|
79 |
|
80 |
+
logging.info(f"Processing book {title} with max_phrase_length {max_phrase_length}")
|
|
|
81 |
|
82 |
+
if 'text' not in book_data or not isinstance(book_data['text'], list):
|
83 |
+
logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.")
|
|
|
|
|
84 |
continue
|
85 |
+
|
86 |
+
chapters = book_data['text']
|
87 |
+
for chapter_id, chapter in enumerate(chapters):
|
88 |
+
if not isinstance(chapter, list):
|
89 |
+
logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
|
90 |
+
continue
|
91 |
+
for verse_id, verse in enumerate(chapter):
|
92 |
+
verse_text = flatten_text(verse)
|
93 |
+
# Remove text in square brackets
|
94 |
+
verse_text = re.sub(r'\[.*?\]', '', verse_text)
|
95 |
+
verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
|
96 |
+
verse_text = re.sub(r" +", " ", verse_text)
|
97 |
+
words = verse_text.split()
|
98 |
+
|
99 |
+
# Iterate through phrases of different lengths
|
100 |
+
for length in range(1, max_phrase_length + 1):
|
101 |
+
for start in range(len(words) - length + 1):
|
102 |
+
phrase_candidate = " ".join(words[start:start + length])
|
103 |
+
gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
|
104 |
+
insert_phrase_to_db(gematria_sum, phrase_candidate, title, chapter_id + 1, verse_id + 1)
|
105 |
+
|
106 |
+
# Mark the book as processed for this max_phrase_length
|
107 |
+
cursor.execute('''INSERT OR REPLACE INTO processed_books (book, max_phrase_length) VALUES (?, ?)''', (title, max_phrase_length))
|
108 |
+
conn.commit()
|
109 |
logging.info("Database population complete.")
|
110 |
|
111 |
def insert_phrase_to_db(gematria_sum, phrase_candidate, book, chapter, verse):
|
|
|
114 |
cursor = conn.cursor()
|
115 |
try:
|
116 |
cursor.execute('''
|
117 |
+
INSERT INTO results (gematria_sum, words, book, chapter, verse)
|
118 |
+
VALUES (?, ?, ?, ?, ?)
|
119 |
''', (gematria_sum, phrase_candidate, book, chapter, verse))
|
120 |
conn.commit()
|
121 |
logging.debug(f"Inserted phrase: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
|
|
|
136 |
else:
|
137 |
translation = translate_and_store(phrase)
|
138 |
cursor.execute('''
|
139 |
+
UPDATE results
|
140 |
+
SET translation = ?
|
141 |
+
WHERE words = ?
|
142 |
''', (translation, phrase))
|
143 |
conn.commit()
|
144 |
return translation
|
|
|
146 |
def translate_and_store(phrase):
|
147 |
"""Translates a Hebrew phrase to English using Google Translate and handles potential errors."""
|
148 |
global translator
|
149 |
+
max_retries = 3
|
150 |
retries = 0
|
151 |
|
152 |
while retries < max_retries:
|
|
|
155 |
logging.debug(f"Translated phrase: {translation}")
|
156 |
return translation
|
157 |
except (exceptions.TranslationNotFound, exceptions.NotValidPayload,
|
158 |
+
exceptions.ServerException, exceptions.RequestError, requests.exceptions.ConnectionError) as e:
|
159 |
retries += 1
|
160 |
logging.warning(f"Error translating phrase '{phrase}': {e}. Retrying... ({retries}/{max_retries})")
|
161 |
|
|
|
201 |
results = []
|
202 |
results.append("<div class='results-container'>")
|
203 |
for book, phrases in results_by_book.items():
|
204 |
+
results.append(f"<h4>Book: {book}</h4>") # Directly display book name
|
205 |
for words, chapter, verse in phrases:
|
206 |
translation = get_translation(words)
|
207 |
+
link = f"https://www.biblegateway.com/passage/?search={quote_plus(book)}+{chapter}%3A{verse}&version=CJB"
|
|
|
208 |
results.append(f"""
|
209 |
+
<div class='result-item'>
|
210 |
+
<p>Chapter: {chapter}, Verse: {verse}</p>
|
211 |
+
<p class='hebrew-phrase'>Hebrew Phrase: {words}</p>
|
212 |
+
<p>Translation: {translation}</p>
|
213 |
+
<a href='{link}' target='_blank' class='bible-link'>[See on Bible Gateway]</a>
|
214 |
+
</div>
|
215 |
+
""")
|
216 |
+
results.append("</div>") # Close results-container div
|
217 |
|
218 |
conn.close()
|
219 |
|
|
|
221 |
style = """
|
222 |
<style>
|
223 |
.results-container {
|
224 |
+
display: grid;
|
225 |
+
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
|
226 |
+
gap: 20px;
|
227 |
}
|
228 |
|
229 |
.result-item {
|
230 |
+
border: 1px solid #ccc;
|
231 |
+
padding: 15px;
|
232 |
+
border-radius: 5px;
|
233 |
+
box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.1);
|
234 |
}
|
235 |
|
236 |
.hebrew-phrase {
|
237 |
+
font-family: 'SBL Hebrew', 'Ezra SIL', serif;
|
238 |
+
direction: rtl;
|
239 |
}
|
240 |
|
241 |
.bible-link {
|
242 |
+
display: block;
|
243 |
+
margin-top: 10px;
|
244 |
+
color: #007bff;
|
245 |
+
text-decoration: none;
|
246 |
}
|
247 |
</style>
|
248 |
"""
|
249 |
|
250 |
+
return style + "\n".join(results)
|
251 |
|
252 |
def flatten_text(text):
|
253 |
"""Helper function to flatten nested lists into a single list."""
|
|
|
261 |
initialize_translator()
|
262 |
|
263 |
# Pre-populate the database
|
264 |
+
populate_database(1, 39, max_phrase_length=1) # Books 1 to 39 (adjust as needed)
|
265 |
+
#populate_database(27, 27, max_phrase_length=1) # Book 27 (Psalms) - adjust as needed
|
|
|
|
|
266 |
|
267 |
iface = gr.Interface(
|
268 |
fn=gematria_search_interface,
|
gematria.db
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee62e36845670369178014d353e35dcc219749ff181873d7af325410479a4537
|
3 |
+
size 18497536
|