Browse filesadded fast lookup for hadith
@@ -10,7 +10,9 @@ from datasets import Value
10 |
from datasets import Dataset
11 |
import matplotlib.pyplot as plt
12 |
import re
13 |
14 |
pattern = r'"(.*?)"'
15 |
# this pattern captures anything in a double quotes.
16 |
@@ -56,6 +58,22 @@ matn_info['Book_ID'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split(
56 |
matn_info['Hadith Number'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[1]))
57 |
matn_info = pd.merge(matn_info, books, on='Book_ID')
58 |
59 |
def value_to_hex(value):
60 |
rgba_color = cmap(value)
61 |
return "#{:02X}{:02X}{:02X}".format(int(rgba_color[0] * 255), int(rgba_color[1] * 255), int(rgba_color[2] * 255))
@@ -72,37 +90,52 @@ def get_node_info(node):
72 |
73 |
74 |
def visualize_isnad(taraf_num, yaxis):
75 |
taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
76 |
taraf_hadith = taraf['bookid_hadithid'].to_list()
77 |
78 |
79 |
80 |
taraf_author = taraf['Author'].to_list()
81 |
taraf_hadith_number = taraf['Hadith Number'].to_list()
82 |
lst_hadith = []
83 |
hadith_cleaned = isnad_info['Tarafs Cleaned'].apply(lambda x: taraf_num in x)
84 |
isnad_hadith = isnad_info[hadith_cleaned]
85 |
for i in range(len(taraf_hadith_split)):
86 |
# This checks each hadith in the Taraf, is that book id hadith id found in each of the edges of isnad_info
87 |
#This loop get the end transmitter of each Hadith in the Taraf
88 |
isnad_in_hadith1 = isnad_hadith['Hadiths Cleaned'].apply(lambda x: taraf_hadith_split[i] in x )
89 |
isnad_hadith1 = isnad_hadith[isnad_in_hadith1][['Source', 'Destination']]
90 |
G = nx.from_pandas_edgelist(isnad_hadith1, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
91 |
node = [int(n) for n, d in G.out_degree() if d == 0]
92 |
for n in node:
93 |
gen_node = narrator_bios[narrator_bios['Rawi ID']==n]['Generation'].to_list()
94 |
if len(gen_node):
95 |
gen_node = gen_node[0]
96 |
97 |
gen_node = -1
98 |
name_node = narrator_bios[narrator_bios['Rawi ID']==n]['Famous Name'].to_list()
99 |
if len(name_node):
100 |
name_node = name_node[0]
101 |
102 |
name_node = 'ููุงู'
103 |
lst_hadith.append([taraf_matns[i], gen_node, name_node, taraf_book[i], taraf_author[i], taraf_hadith_number[i], str(n), i])
104 |
df = pd.DataFrame(lst_hadith, columns = ['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Book Hadith Number', 'End Transmitter ID', 'Hadith Number'])
105 |
106 |
isnad_hadith['Teacher'] = isnad_hadith['Source'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
107 |
isnad_hadith['Student'] = isnad_hadith['Destination'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
108 |
isnad_hadith['Teacher'] = isnad_hadith['Teacher'].apply(lambda x: x[0] if len(x)==1 else 'ููุงู')
10 |
from datasets import Dataset
11 |
import matplotlib.pyplot as plt
12 |
import re
13 |
from collections import defaultdict
14 |
from huggingface_hub import hf_hub_download
15 |
16 |
pattern = r'"(.*?)"'
17 |
# this pattern captures anything in a double quotes.
18 |
58 |
matn_info['Hadith Number'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[1]))
59 |
matn_info = pd.merge(matn_info, books, on='Book_ID')
60 |
61 |
62 |
from huggingface_hub import hf_hub_download
63 |
64 |
# Download and read a file
65 |
file_path = hf_hub_download(
66 |
repo_id="FDSRashid/hadith_info", # read in fast lookup data structure
67 |
68 |
69 |
70 |
71 |
72 |
with open(file_path, 'r') as f:
73 |
hadith_lookup_dict = json.load(f)
74 |
hadith_lookup = defaultdict(list, hadith_lookup_dict)
75 |
76 |
77 |
def value_to_hex(value):
78 |
rgba_color = cmap(value)
79 |
return "#{:02X}{:02X}{:02X}".format(int(rgba_color[0] * 255), int(rgba_color[1] * 255), int(rgba_color[2] * 255))
90 |
91 |
92 |
def visualize_isnad(taraf_num, yaxis):
93 |
# Precompute filtered dataframes
94 |
taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
95 |
taraf_hadith = taraf['bookid_hadithid'].to_list()
96 |
97 |
98 |
# Precompute hadiths where taraf_num exists
99 |
hadith_cleaned = isnad_info['Tarafs Cleaned'].apply(lambda x: taraf_num in x)
100 |
isnad_hadith = isnad_info[hadith_cleaned]
101 |
102 |
lst_hadith = []
103 |
104 |
for i, hadith_parts in enumerate(taraf_hadith):
105 |
# look up hadith for each bookid_hadithid
106 |
isnad_hadith1 = isnad_info.iloc[hadith_lookup[taraf_hadith[i]]][['Source', 'Destination']]
107 |
108 |
# Create graph and find end nodes
109 |
G = nx.from_pandas_edgelist(isnad_hadith1, source='Source', target='Destination', create_using=nx.DiGraph())
110 |
nodes = [int(n) for n, d in G.out_degree() if d == 0]
111 |
112 |
if nodes:
113 |
# Batch fetch data from narrator_bios for efficiency
114 |
bio_data = narrator_bios[narrator_bios['Rawi ID'].isin(nodes)]
115 |
116 |
for n in nodes:
117 |
gen_node = bio_data.loc[bio_data['Rawi ID'] == n, 'Generation'].squeeze()
118 |
gen_node = gen_node if pd.notna(gen_node) else -1
119 |
120 |
name_node = bio_data.loc[bio_data['Rawi ID'] == n, 'Famous Name'].squeeze()
121 |
name_node = name_node if pd.notna(name_node) else 'ููุงู'
122 |
123 |
# Append result for each node
124 |
125 |
126 |
127 |
128 |
129 |
130 |
taraf.iloc[i]['Hadith Number'],
131 |
132 |
133 |
134 |
135 |
# Convert to DataFrame
136 |
df = pd.DataFrame(lst_hadith, columns=['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Book Hadith Number', 'End Transmitter ID', 'Hadith Number'])
137 |
138 |
139 |
isnad_hadith['Teacher'] = isnad_hadith['Source'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
140 |
isnad_hadith['Student'] = isnad_hadith['Destination'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
141 |
isnad_hadith['Teacher'] = isnad_hadith['Teacher'].apply(lambda x: x[0] if len(x)==1 else 'ููุงู')