Spaces:
Sleeping
Sleeping
neuralworm
commited on
Commit
โข
56768f9
1
Parent(s):
e594864
Upload torah.py
Browse files
torah.py
ADDED
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
logger = logging.getLogger(__name__)
|
3 |
+
|
4 |
+
import json
|
5 |
+
import os
|
6 |
+
import re
|
7 |
+
from deep_translator import GoogleTranslator
|
8 |
+
from gematria import calculate_gematria
|
9 |
+
import math
|
10 |
+
|
11 |
+
# Hebrew gematria values for relevant characters
|
12 |
+
gematria_values = {
|
13 |
+
'ื': 1, 'ื': 2, 'ื': 3, 'ื': 4, 'ื': 5, 'ื': 6, 'ื': 7, 'ื': 8, 'ื': 9,
|
14 |
+
'ื': 10, 'ื': 20, 'ื': 500, 'ื': 30, 'ื': 40, 'ื': 600, 'ื ': 50, 'ื': 700,
|
15 |
+
'ืก': 60, 'ืข': 70, 'ืค': 80, 'ืฃ': 800, 'ืฆ': 90, 'ืฅ': 900, 'ืง': 100,
|
16 |
+
'ืจ': 200, 'ืฉ': 300, 'ืช': 400
|
17 |
+
}
|
18 |
+
|
19 |
+
# Reverse dictionary for converting gematria values back to Hebrew characters
|
20 |
+
reverse_gematria_values = {v: k for k, v in gematria_values.items()}
|
21 |
+
|
22 |
+
# Function to convert a Hebrew string to its gematria values
|
23 |
+
def string_to_gematria(s):
|
24 |
+
return [gematria_values.get(char, 0) for char in s] # Handle characters not in the dictionary
|
25 |
+
|
26 |
+
# Function to convert a single gematria value to Hebrew characters
|
27 |
+
def gematria_to_string(value):
|
28 |
+
result = []
|
29 |
+
for val in sorted(reverse_gematria_values.keys(), reverse=True):
|
30 |
+
while value >= val:
|
31 |
+
result.append(reverse_gematria_values[val])
|
32 |
+
value -= val
|
33 |
+
return ''.join(result)
|
34 |
+
|
35 |
+
# Function to calculate the average gematria values of corresponding characters and convert them to Hebrew characters
|
36 |
+
def average_gematria(str1, str2):
|
37 |
+
# Convert strings to gematria values
|
38 |
+
gematria1 = string_to_gematria(str1)
|
39 |
+
gematria2 = string_to_gematria(str2)
|
40 |
+
|
41 |
+
# Handle cases where strings have different lengths by padding with 0s
|
42 |
+
max_len = max(len(gematria1), len(gematria2))
|
43 |
+
gematria1.extend([0] * (max_len - len(gematria1)))
|
44 |
+
gematria2.extend([0] * (max_len - len(gematria2)))
|
45 |
+
|
46 |
+
# Calculate the average of corresponding gematria values and apply math.ceil
|
47 |
+
average_gematria_values = [math.ceil((g1 + g2) / 2) for g1, g2 in zip(gematria1, gematria2)]
|
48 |
+
|
49 |
+
# Convert the average gematria values back to Hebrew characters
|
50 |
+
return ''.join(gematria_to_string(val) for val in average_gematria_values)
|
51 |
+
|
52 |
+
def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip_spaces=True, strip_in_braces=True, strip_diacritics=True, average_compile=False):
|
53 |
+
base_path = "texts/torah"
|
54 |
+
translator = GoogleTranslator(source='auto', target=tlang)
|
55 |
+
results = []
|
56 |
+
|
57 |
+
for i in range(start, end + 1):
|
58 |
+
file_name = f"{base_path}/{i:02}.json"
|
59 |
+
try:
|
60 |
+
with open(file_name, 'r', encoding='utf-8') as file:
|
61 |
+
data = json.load(file)
|
62 |
+
text_blocks = data["text"]
|
63 |
+
|
64 |
+
full_text = ""
|
65 |
+
for block in text_blocks:
|
66 |
+
full_text += ' '.join(block)
|
67 |
+
|
68 |
+
clean_text = full_text
|
69 |
+
if strip_in_braces:
|
70 |
+
clean_text = re.sub(r"\[.*?\]", "", clean_text, flags=re.DOTALL)
|
71 |
+
if strip_diacritics:
|
72 |
+
clean_text = re.sub(r"[^\u05D0-\u05EA ]+", "", clean_text)
|
73 |
+
if strip_spaces:
|
74 |
+
clean_text = clean_text.replace(" ", "")
|
75 |
+
else:
|
76 |
+
clean_text = clean_text.replace(" ", " ")
|
77 |
+
clean_text = clean_text.replace(" ", " ")
|
78 |
+
clean_text = clean_text.replace(" ", " ")
|
79 |
+
|
80 |
+
text_length = len(clean_text)
|
81 |
+
|
82 |
+
selected_characters_per_round = {}
|
83 |
+
for round_num in map(int, rounds.split(',')):
|
84 |
+
# Handle cases where no characters should be selected
|
85 |
+
if not (round_num == 1 and step > text_length) and not (round_num == -1 and step > text_length):
|
86 |
+
# Corrected logic for negative rounds and step = 1
|
87 |
+
if round_num > 0:
|
88 |
+
current_position = step - 1
|
89 |
+
else:
|
90 |
+
current_position = text_length - 1 if step == 1 else text_length - step
|
91 |
+
|
92 |
+
completed_rounds = 0
|
93 |
+
selected_characters = ""
|
94 |
+
|
95 |
+
while completed_rounds < abs(round_num):
|
96 |
+
selected_characters += clean_text[current_position % text_length]
|
97 |
+
|
98 |
+
# Update current_position based on the sign of rounds
|
99 |
+
current_position += step if round_num > 0 else -step
|
100 |
+
|
101 |
+
if (round_num > 0 and current_position >= text_length * (completed_rounds + 1)) or \
|
102 |
+
(round_num < 0 and current_position < 0):
|
103 |
+
completed_rounds += 1
|
104 |
+
|
105 |
+
selected_characters_per_round[round_num] = selected_characters
|
106 |
+
|
107 |
+
if average_compile and len(selected_characters_per_round) > 1:
|
108 |
+
result_text = ""
|
109 |
+
keys = sorted(selected_characters_per_round.keys())
|
110 |
+
for i in range(len(keys) - 1):
|
111 |
+
result_text = average_gematria(selected_characters_per_round[keys[i]], selected_characters_per_round[keys[i+1]])
|
112 |
+
else:
|
113 |
+
result_text = ''.join(selected_characters_per_round.values())
|
114 |
+
|
115 |
+
if length != 0:
|
116 |
+
result_text = result_text[:length]
|
117 |
+
|
118 |
+
translated_text = translator.translate(result_text) if result_text else ""
|
119 |
+
|
120 |
+
if result_text: # Only append if result_text is not empty
|
121 |
+
results.append({
|
122 |
+
"book": i,
|
123 |
+
"title": data["title"],
|
124 |
+
"result_text": result_text,
|
125 |
+
"result_sum": calculate_gematria(result_text),
|
126 |
+
"translated_text": translated_text
|
127 |
+
})
|
128 |
+
|
129 |
+
except FileNotFoundError:
|
130 |
+
results.append({"error": f"File {file_name} not found."})
|
131 |
+
except json.JSONDecodeError as e:
|
132 |
+
results.append({"error": f"File {file_name} could not be read as JSON: {e}"})
|
133 |
+
except KeyError as e:
|
134 |
+
results.append({"error": f"Expected key 'text' is missing in {file_name}: {e}"})
|
135 |
+
|
136 |
+
return results
|
137 |
+
|
138 |
+
|
139 |
+
# Tests
|
140 |
+
test_results = [
|
141 |
+
#(process_json_files(0, 0, 21, rounds="3", length=0), "ืฉืจืง"),
|
142 |
+
#(process_json_files(0, 0, 22, rounds="1", length=0), "ืช"),
|
143 |
+
#(process_json_files(0, 0, 22, rounds="3", length=0), "ืชืชืช"),
|
144 |
+
#(process_json_files(0, 0, 23, rounds="3", length=0), "ืืื"),
|
145 |
+
#(process_json_files(0, 0, 11, rounds="1", length=0), "ืืช"),
|
146 |
+
#(process_json_files(0, 0, 2, rounds="1", length=0), "ืืืืืืื ืขืฆืจืช"),
|
147 |
+
#(process_json_files(0, 0, 23, rounds="1", length=0), None), # Expect None, when no results
|
148 |
+
#(process_json_files(0, 0, 23, rounds="-1", length=0), None), # Expect None, when no results
|
149 |
+
#(process_json_files(0, 0, 22, rounds="-1", length=0), "ื"),
|
150 |
+
#(process_json_files(0, 0, 22, rounds="-2", length=0), "ืื"),
|
151 |
+
#(process_json_files(0, 0, 1, rounds="-1", length=0), "ืชืฉืจืงืฆืคืขืกื ืืืืืืืืืืืืื"), # Reversed Hebrew alphabet
|
152 |
+
#(process_json_files(0, 0, 1, rounds="1,-1", length=0), "ืืืืืืืืืืืืืื ืกืขืคืฆืงืจืฉืชืชืฉืจืงืฆืคืขืกื ืืืืืืืืืืืืื"), # Combined rounds
|
153 |
+
#(process_json_files(0, 0, 22, rounds="1,-1", length=0, average_compile=True), "ืจื"), # average compile test (400+1) / 2 = math.ceil(200.5)=201=200+1="ืจื"
|
154 |
+
]
|
155 |
+
|
156 |
+
all_tests_passed = True
|
157 |
+
for result, expected in test_results:
|
158 |
+
if expected is None: # Check if no result is expected
|
159 |
+
if not result:
|
160 |
+
logger.info(f"Test passed: Expected no results, got no results.")
|
161 |
+
else:
|
162 |
+
logger.error(f"Test failed: Expected no results, but got: {result}")
|
163 |
+
all_tests_passed = False
|
164 |
+
else:
|
165 |
+
# Check if result is not empty before accessing elements
|
166 |
+
if result:
|
167 |
+
result_text = result[0]['result_text']
|
168 |
+
if result_text == expected:
|
169 |
+
logger.info(f"Test passed: Expected '{expected}', got '{result_text}'")
|
170 |
+
else:
|
171 |
+
logger.error(f"Test failed: Expected '{expected}', but got '{result_text}'")
|
172 |
+
all_tests_passed = False
|
173 |
+
else:
|
174 |
+
logger.error(f"Test failed: Expected '{expected}', but got no results")
|
175 |
+
all_tests_passed = False
|
176 |
+
|
177 |
+
if all_tests_passed:
|
178 |
+
logger.info("All round tests passed.")
|