neuralworm commited on
Commit
c45b5bd
โ€ข
1 Parent(s): 56768f9

Upload 2 files

Browse files
Files changed (2) hide show
  1. bible.py +193 -0
  2. quran.py +186 -0
bible.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ logger = logging.getLogger(__name__)
3
+
4
+
5
+ import json
6
+ import os
7
+ import re
8
+ from deep_translator import GoogleTranslator
9
+ from gematria import calculate_gematria
10
+ import math
11
+
12
+
13
+ # Hebrew gematria values for relevant characters
14
+ gematria_values = {
15
+ 'ื': 1, 'ื‘': 2, 'ื’': 3, 'ื“': 4, 'ื”': 5, 'ื•': 6, 'ื–': 7, 'ื—': 8, 'ื˜': 9,
16
+ 'ื™': 10, 'ื›': 20, 'ืš': 500, 'ืœ': 30, 'ืž': 40, 'ื': 600, 'ื ': 50, 'ืŸ': 700,
17
+ 'ืก': 60, 'ืข': 70, 'ืค': 80, 'ืฃ': 800, 'ืฆ': 90, 'ืฅ': 900, 'ืง': 100,
18
+ 'ืจ': 200, 'ืฉ': 300, 'ืช': 400
19
+ }
20
+
21
+ # Reverse dictionary for converting gematria values back to Hebrew characters
22
+ reverse_gematria_values = {v: k for k, v in gematria_values.items()}
23
+
24
+ # Function to convert a Hebrew string to its gematria values
25
+ def string_to_gematria(s):
26
+ return [gematria_values.get(char, 0) for char in s] # Handle characters not in the dictionary
27
+
28
+ # Function to convert a single gematria value to Hebrew characters
29
+ def gematria_to_string(value):
30
+ result = []
31
+ for val in sorted(reverse_gematria_values.keys(), reverse=True):
32
+ while value >= val:
33
+ result.append(reverse_gematria_values[val])
34
+ value -= val
35
+ return ''.join(result)
36
+
37
+ # Function to calculate the average gematria values of corresponding characters and convert them to Hebrew characters
38
+ def average_gematria(str1, str2):
39
+ # Convert strings to gematria values
40
+ gematria1 = string_to_gematria(str1)
41
+ gematria2 = string_to_gematria(str2)
42
+
43
+ # Handle cases where strings have different lengths by padding with 0s
44
+ max_len = max(len(gematria1), len(gematria2))
45
+ gematria1.extend([0] * (max_len - len(gematria1)))
46
+ gematria2.extend([0] * (max_len - len(gematria2)))
47
+
48
+ # Calculate the average of corresponding gematria values and apply math.ceil
49
+ average_gematria_values = [math.ceil((g1 + g2) / 2) for g1, g2 in zip(gematria1, gematria2)]
50
+
51
+ # Convert the average gematria values back to Hebrew characters
52
+ return ''.join(gematria_to_string(val) for val in average_gematria_values)
53
+
54
+
55
+ from deep_translator import GoogleTranslator
56
+ import os
57
+ import re
58
+ import csv
59
+
60
+ def process_json_files(start=1, end=66, step=1, rounds="1", length=0, tlang="en", strip_spaces=True, strip_in_braces=True, strip_diacritics=True, average_compile=False):
61
+ file_name = "texts/bible/OpenGNT_version3_3.csv"
62
+ translator = GoogleTranslator(source='auto', target=tlang)
63
+ results = []
64
+
65
+ try:
66
+ with open(file_name, 'r', encoding='utf-8') as file:
67
+ reader = csv.DictReader(file, delimiter='\t')
68
+
69
+ book_texts = {}
70
+ current_book = None
71
+ for row in reader:
72
+ #logger.debug(f"Processing row: {row}")
73
+ book = int(row['ใ€”Book๏ฝœChapter๏ฝœVerseใ€•'].split('๏ฝœ')[0][1:])
74
+ if book < start or book > end:
75
+ #logger.debug(f"Skipping book {book} (out of range)")
76
+ continue
77
+ if current_book != book:
78
+ current_book = book
79
+ book_texts[book] = ""
80
+ greek_text = row['ใ€”OGNTk๏ฝœOGNTu๏ฝœOGNTa๏ฝœlexeme๏ฝœrmac๏ฝœsnใ€•']
81
+ greek_text = greek_text.split('ใ€”')[1]
82
+ greek_text = greek_text.split('๏ฝœ')[0]
83
+ #print(greek_text)
84
+ book_texts[book] += greek_text + " "
85
+ #print(book_texts)
86
+ for book, full_text in book_texts.items():
87
+ logger.debug(f"Processing book {book}")
88
+ clean_text = full_text
89
+ if strip_in_braces:
90
+ clean_text = re.sub(r"\[.*?\]", "", clean_text, flags=re.DOTALL)
91
+ if strip_diacritics:
92
+ clean_text = re.sub(r"[^\u0370-\u03FF\u1F00-\u1FFF ]+", "", clean_text)
93
+ if strip_spaces:
94
+ clean_text = clean_text.replace(" ", "")
95
+ else:
96
+ clean_text = clean_text.replace(" ", " ")
97
+ clean_text = clean_text.replace(" ", " ")
98
+ clean_text = clean_text.replace(" ", " ")
99
+
100
+ text_length = len(clean_text)
101
+
102
+ selected_characters_per_round = {}
103
+ for round_num in map(int, rounds.split(',')):
104
+ if not (round_num == 1 and step > text_length) and not (round_num == -1 and step > text_length):
105
+ if round_num > 0:
106
+ current_position = step - 1
107
+ else:
108
+ current_position = text_length - 1 if step == 1 else text_length - step
109
+
110
+ completed_rounds = 0
111
+ selected_characters = ""
112
+
113
+ while completed_rounds < abs(round_num):
114
+ selected_characters += clean_text[current_position % text_length]
115
+ current_position += step if round_num > 0 else -step
116
+
117
+ if (round_num > 0 and current_position >= text_length * (completed_rounds + 1)) or \
118
+ (round_num < 0 and current_position < 0):
119
+ completed_rounds += 1
120
+
121
+ selected_characters_per_round[round_num] = selected_characters
122
+
123
+ if average_compile and len(selected_characters_per_round) > 1:
124
+ result_text = ""
125
+ keys = sorted(selected_characters_per_round.keys())
126
+ for i in range(len(keys) - 1):
127
+ result_text = average_gematria(selected_characters_per_round[keys[i]], selected_characters_per_round[keys[i+1]])
128
+ else:
129
+ result_text = ''.join(selected_characters_per_round.values())
130
+
131
+ if length != 0:
132
+ result_text = result_text[:length]
133
+
134
+ translated_text = translator.translate(result_text) if result_text else ""
135
+
136
+ if result_text:
137
+ logger.debug(f"Result for book {book}: {result_text}")
138
+ results.append({
139
+ "book": book,
140
+ "result_text": result_text,
141
+ "result_sum": calculate_gematria(result_text),
142
+ "translated_text": translated_text
143
+ })
144
+
145
+ except FileNotFoundError:
146
+ results.append({"error": f"File {file_name} not found."})
147
+
148
+ return results
149
+
150
+
151
+
152
+
153
+ # Tests
154
+ test_results = [
155
+ #(process_json_files(1, 1, 21, rounds="3", length=0), ""),
156
+ #(process_json_files(1, 1, 22, rounds="1", length=0), ""),
157
+ #(process_json_files(1, 1, 22, rounds="3", length=0), ""),
158
+ #(process_json_files(1, 1, 23, rounds="3", length=0), ""),
159
+ #(process_json_files(1, 1, 11, rounds="1", length=0), ""),
160
+ #(process_json_files(1, 1, 2, rounds="1", length=0), ""),
161
+ #(process_json_files(1, 1, 23, rounds="1", length=0), None), # Expect None, when no results
162
+ #(process_json_files(1, 1, 23, rounds="-1", length=0), None), # Expect None, when no results
163
+ #(process_json_files(1, 1, 22, rounds="-1", length=0), ""),
164
+ #(process_json_files(1, 1, 22, rounds="-2", length=0), ""),
165
+ #(process_json_files(1, 1, 1, rounds="-1", length=0), ""), # Reversed Hebrew alphabet
166
+ #(process_json_files(1, 1, 1, rounds="1,-1", length=0), ""), # Combined rounds
167
+ #(process_json_files(1, 1, 22, rounds="1,-1", length=0, average_compile=True), ""), # average compile test (400+1) / 2 = math.ceil(200.5)=201=200+1="ืจื"
168
+ ]
169
+
170
+ all_tests_passed = True
171
+ for result, expected in test_results:
172
+ if expected is None: # Check if no result is expected
173
+ if not result:
174
+ logger.info(f"Test passed: Expected no results, got no results.")
175
+ else:
176
+ logger.error(f"Test failed: Expected no results, but got: {result}")
177
+ all_tests_passed = False
178
+ else:
179
+ # Check if result is not empty before accessing elements
180
+ if result:
181
+ #result_text = result[0]['result_text']
182
+ result_text = None
183
+ if result_text == expected:
184
+ logger.info(f"Test passed: Expected '{expected}', got '{result_text}'")
185
+ else:
186
+ logger.error(f"Test failed: Expected '{expected}', but got '{result_text}'")
187
+ all_tests_passed = False
188
+ else:
189
+ logger.error(f"Test failed: Expected '{expected}', but got no results")
190
+ all_tests_passed = False
191
+
192
+ if all_tests_passed:
193
+ logger.info("All round tests passed.")
quran.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ logger = logging.getLogger(__name__)
3
+
4
+ import json
5
+ import os
6
+ import re
7
+ from deep_translator import GoogleTranslator
8
+ from gematria import calculate_gematria
9
+ import math
10
+
11
+ # Hebrew gematria values for relevant characters
12
+ gematria_values = {
13
+ 'ื': 1, 'ื‘': 2, 'ื’': 3, 'ื“': 4, 'ื”': 5, 'ื•': 6, 'ื–': 7, 'ื—': 8, 'ื˜': 9,
14
+ 'ื™': 10, 'ื›': 20, 'ืš': 500, 'ืœ': 30, 'ืž': 40, 'ื': 600, 'ื ': 50, 'ืŸ': 700,
15
+ 'ืก': 60, 'ืข': 70, 'ืค': 80, 'ืฃ': 800, 'ืฆ': 90, 'ืฅ': 900, 'ืง': 100,
16
+ 'ืจ': 200, 'ืฉ': 300, 'ืช': 400
17
+ }
18
+
19
+ # Reverse dictionary for converting gematria values back to Hebrew characters
20
+ reverse_gematria_values = {v: k for k, v in gematria_values.items()}
21
+
22
+ # Function to convert a Hebrew string to its gematria values
23
+ def string_to_gematria(s):
24
+ return [gematria_values.get(char, 0) for char in s] # Handle characters not in the dictionary
25
+
26
+ # Function to convert a single gematria value to Hebrew characters
27
+ def gematria_to_string(value):
28
+ result = []
29
+ for val in sorted(reverse_gematria_values.keys(), reverse=True):
30
+ while value >= val:
31
+ result.append(reverse_gematria_values[val])
32
+ value -= val
33
+ return ''.join(result)
34
+
35
+ # Function to calculate the average gematria values of corresponding characters and convert them to Hebrew characters
36
+ def average_gematria(str1, str2):
37
+ # Convert strings to gematria values
38
+ gematria1 = string_to_gematria(str1)
39
+ gematria2 = string_to_gematria(str2)
40
+
41
+ # Handle cases where strings have different lengths by padding with 0s
42
+ max_len = max(len(gematria1), len(gematria2))
43
+ gematria1.extend([0] * (max_len - len(gematria1)))
44
+ gematria2.extend([0] * (max_len - len(gematria2)))
45
+
46
+ # Calculate the average of corresponding gematria values and apply math.ceil
47
+ average_gematria_values = [math.ceil((g1 + g2) / 2) for g1, g2 in zip(gematria1, gematria2)]
48
+
49
+ # Convert the average gematria values back to Hebrew characters
50
+ return ''.join(gematria_to_string(val) for val in average_gematria_values)
51
+
52
+
53
+ def process_json_files(start=1, end=114, step=1, rounds="1", length=0, tlang="en", strip_spaces=True,
54
+ strip_in_braces=True, strip_diacritics=True, average_compile=False, translate=True):
55
+ base_path = "texts/quran"
56
+ translator = GoogleTranslator(source='ar', target=tlang)
57
+ results = []
58
+
59
+ for i in range(start, end + 1):
60
+ file_name = f"{base_path}/{i:03}.json" # Updated file name formatting
61
+ try:
62
+ with open(file_name, 'r', encoding='utf-8') as file:
63
+ data = json.load(file)
64
+
65
+ # Extract text from verses
66
+ full_text = ""
67
+ for verse_key, verse_text in data["verse"].items():
68
+ full_text += verse_text + " "
69
+
70
+ full_text = full_text.replace("\ufeff", "")
71
+
72
+ clean_text = full_text
73
+ if strip_in_braces:
74
+ clean_text = re.sub(r"\[.*?\]", "", clean_text, flags=re.DOTALL)
75
+ if strip_diacritics:
76
+ clean_text = re.sub(
77
+ r"[\u0610-\u061A\u064B-\u065F\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7\u06E8\u06EA-\u06ED]+", "",
78
+ clean_text)
79
+ if strip_spaces:
80
+ clean_text = clean_text.replace(" ", "")
81
+ else:
82
+ clean_text = clean_text.replace(" ", " ")
83
+ clean_text = clean_text.replace(" ", " ")
84
+ clean_text = clean_text.replace(" ", " ")
85
+
86
+ text_length = len(clean_text)
87
+
88
+ selected_characters_per_round = {}
89
+ for round_num in map(int, rounds.split(',')):
90
+ # Handle cases where no characters should be selected
91
+ if not (round_num == 1 and step > text_length) and not (round_num == -1 and step > text_length):
92
+ # Corrected logic for negative rounds and step = 1
93
+ if round_num > 0:
94
+ current_position = step - 1
95
+ else:
96
+ current_position = text_length - 1 if step == 1 else text_length - step
97
+
98
+ completed_rounds = 0
99
+ selected_characters = ""
100
+
101
+ while completed_rounds < abs(round_num):
102
+ selected_characters += clean_text[current_position % text_length]
103
+
104
+ # Update current_position based on the sign of rounds
105
+ current_position += step if round_num > 0 else -step
106
+
107
+ if (round_num > 0 and current_position >= text_length * (completed_rounds + 1)) or \
108
+ (round_num < 0 and current_position < 0):
109
+ completed_rounds += 1
110
+
111
+ selected_characters_per_round[round_num] = selected_characters
112
+
113
+ if average_compile and len(selected_characters_per_round) > 1:
114
+ result_text = ""
115
+ keys = sorted(selected_characters_per_round.keys())
116
+ for i in range(len(keys) - 1):
117
+ result_text = average_gematria(selected_characters_per_round[keys[i]],
118
+ selected_characters_per_round[keys[i + 1]])
119
+ else:
120
+ result_text = ''.join(selected_characters_per_round.values())
121
+
122
+ if length != 0:
123
+ result_text = result_text[:length]
124
+
125
+ translated_text = translator.translate(result_text) if result_text and translate else ""
126
+
127
+ if result_text: # Only append if result_text is not empty
128
+ results.append({
129
+ "book": i,
130
+ "title": data["name"], # Use "name" instead of "title"
131
+ "result_text": result_text,
132
+ "result_sum": calculate_gematria(result_text),
133
+ "translated_text": translated_text
134
+ })
135
+
136
+ except FileNotFoundError:
137
+ results.append({"error": f"File {file_name} not found."})
138
+ except json.JSONDecodeError as e:
139
+ results.append({"error": f"File {file_name} could not be read as JSON: {e}"})
140
+ except KeyError as e:
141
+ results.append({"error": f"Expected key 'verse' is missing in {file_name}: {e}"}) # Updated key
142
+
143
+ return results
144
+
145
+
146
+ # Tests
147
+ test_results = [
148
+ #(process_json_files(0, 0, 21, rounds="3", length=0), "ืฉืจืง"),
149
+ #(process_json_files(0, 0, 22, rounds="1", length=0), "ืช"),
150
+ #(process_json_files(0, 0, 22, rounds="3", length=0), "ืชืชืช"),
151
+ #(process_json_files(0, 0, 23, rounds="3", length=0), "ืื‘ื’"),
152
+ #(process_json_files(0, 0, 11, rounds="1", length=0), "ื›ืช"),
153
+ #(process_json_files(0, 0, 2, rounds="1", length=0), "ื‘ื“ื•ื—ื™ืœื ืขืฆืจืช"),
154
+ #(process_json_files(0, 0, 23, rounds="1", length=0), None), # Expect None, when no results
155
+ #(process_json_files(0, 0, 23, rounds="-1", length=0), None), # Expect None, when no results
156
+ #(process_json_files(0, 0, 22, rounds="-1", length=0), "ื"),
157
+ #(process_json_files(0, 0, 22, rounds="-2", length=0), "ืื"),
158
+ #(process_json_files(0, 0, 1, rounds="-1", length=0), "ืชืฉืจืงืฆืคืขืกื ืžืœื›ื™ื˜ื—ื–ื•ื”ื“ื’ื‘ื"), # Reversed Hebrew alphabet
159
+ #(process_json_files(0, 0, 1, rounds="1,-1", length=0), "ืื‘ื’ื“ื”ื•ื–ื—ื˜ื™ื›ืœืžื ืกืขืคืฆืงืจืฉืชืชืฉืจืงืฆืคืขืกื ืžืœื›ื™ื˜ื—ื–ื•ื”ื“ื’ื‘ื"), # Combined rounds
160
+ #(process_json_files(0, 0, 22, rounds="1,-1", length=0, average_compile=True), "ืจื"), # average compile test (400+1) / 2 = math.ceil(200.5)=201=200+1="ืจื"
161
+ ]
162
+
163
+ all_tests_passed = True
164
+ for result, expected in test_results:
165
+ if expected is None: # Check if no result is expected
166
+ if not result:
167
+ logger.info(f"Test passed: Expected no results, got no results.")
168
+ else:
169
+ logger.error(f"Test failed: Expected no results, but got: {result}")
170
+ all_tests_passed = False
171
+ else:
172
+ # Check if result is not empty before accessing elements
173
+ if result:
174
+ #result_text = result[0]['result_text']
175
+ result_text = None
176
+ if result_text == expected:
177
+ logger.info(f"Test passed: Expected '{expected}', got '{result_text}'")
178
+ else:
179
+ logger.error(f"Test failed: Expected '{expected}', but got '{result_text}'")
180
+ all_tests_passed = False
181
+ else:
182
+ logger.error(f"Test failed: Expected '{expected}', but got no results")
183
+ all_tests_passed = False
184
+
185
+ if all_tests_passed:
186
+ logger.info("All round tests passed.")