File size: 15,018 Bytes
c71a7d8
 
 
 
 
 
 
 
 
 
 
 
 
 
a116d30
 
e072981
448861d
c71a7d8
 
 
 
261d935
c71a7d8
 
 
 
 
a116d30
 
261d935
 
c71a7d8
 
 
8edff27
c71a7d8
cffe818
 
 
 
 
 
a116d30
448861d
c71a7d8
a116d30
5d223db
a116d30
c71a7d8
 
5d223db
 
 
028d3cf
b8be8d2
2b5e2b5
5d223db
c71a7d8
 
 
 
 
ecfd1f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c71a7d8
ecfd1f4
 
 
261d935
ecfd1f4
 
 
261d935
ef641ab
 
261d935
a116d30
 
ef641ab
 
261d935
73c9524
 
6da2368
 
 
 
 
 
 
 
 
 
ec9e7d7
bb8f84b
c71a7d8
97ebd68
 
 
 
bb8f84b
 
7d2cacf
 
 
 
261d935
7d2cacf
 
 
 
97ebd68
5ca5481
97ebd68
7d2cacf
 
97ebd68
 
 
 
7d2cacf
142c25c
 
 
 
7d2cacf
 
 
261d935
7d2cacf
 
 
 
fbde103
7d2cacf
bb8f84b
7d2cacf
 
 
 
957c876
7d2cacf
261d935
 
58515bc
261d935
c71a7d8
 
 
5187c1e
7d2cacf
c71a7d8
3291a65
 
028d3cf
3291a65
ecfd1f4
028d3cf
d5207f2
 
 
248b1ce
 
 
 
 
09f5baa
 
 
 
 
d5207f2
09f5baa
 
 
 
248b1ce
 
 
09f5baa
 
248b1ce
 
 
 
09f5baa
248b1ce
 
 
 
09f5baa
 
 
 
 
 
 
a869ffd
 
 
d5207f2
09f5baa
 
 
 
248b1ce
09f5baa
d5207f2
 
 
09f5baa
 
 
 
a116d30
c71a7d8
 
3291a65
02d71bd
3291a65
 
5187c1e
3291a65
 
 
24e5e4e
02d71bd
e78a4d2
02d71bd
028d3cf
02d71bd
 
 
4a367b3
ecfd1f4
3291a65
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
import gradio as gr
from pyvis.network import Network
import networkx as nx
import numpy as np
import pandas as pd
import os
from datasets import load_dataset
from datasets import Features
from datasets import Value
from datasets import Dataset
import matplotlib.pyplot as plt
import re

pattern = r'"(.*?)"'
# this pattern captures anything in a double quotes. 

Secret_token = os.getenv('HF_token')

dataset = load_dataset('FDSRashid/hadith_info',data_files = 'Basic_Edge_Information.csv', token = Secret_token, split = 'train')

edge_info = dataset.to_pandas()

features = Features({'Rawi ID': Value('int32'), 'Famous Name': Value('string'), 'Narrator Rank': Value('string'), 'Number of Narrations': Value('string'),  'Generation': Value('string')})
narrator_bios = load_dataset("FDSRashid/hadith_info", data_files = 'Teacher_Bios.csv', token = Secret_token,features=features )
narrator_bios = narrator_bios['train'].to_pandas()
narrator_bios.loc[49845, 'Narrator Rank'] = 'ุฑุณูˆู„ ุงู„ู„ู‡'
narrator_bios.loc[49845, 'Number of Narrations'] = 0
narrator_bios['Number of Narrations'] = narrator_bios['Number of Narrations'].astype(int)
narrator_bios.loc[49845, 'Number of Narrations'] = 327512
# 8125 Narrators have no Generation, listed in dataset as None
narrator_bios['Generation'] = narrator_bios['Generation'].replace([None], [-1])
narrator_bios['Generation'] = narrator_bios['Generation'].astype(int)

features = Features({'matn': Value('string'), 'taraf_ID': Value('string'), 'bookid_hadithid': Value('string')})

dataset = load_dataset("FDSRashid/hadith_info", data_files = 'All_Matns.csv',token = Secret_token, features = features)
matn_info = dataset['train'].to_pandas()
matn_info = matn_info.drop(97550)
matn_info = matn_info.drop(307206)
matn_info['taraf_ID'] = matn_info['taraf_ID'].replace('KeyAbsent', -1)

matn_info['taraf_ID'] = matn_info['taraf_ID'].astype(int)

# Isnad Info Hadiths column is structured like {"BookNum_HadithNum", ...} for each edge
isnad_info = load_dataset('FDSRashid/hadith_info',token = Secret_token, data_files = 'isnad_info.csv', split = 'train').to_pandas()
isnad_info['Hadiths Cleaned'] = isnad_info['Hadiths'].apply(lambda x: [re.findall(pattern, string)[0].split("_") for string in x[1:-1].split(',')])
# Hadiths Cleaned is a list of lists, each sub-list is Book Id, Hadith ID
taraf_max = np.max(matn_info['taraf_ID'].unique())
isnad_info['Tarafs Cleaned'] = isnad_info['Tarafs'].apply(lambda x: np.array([int(i.strip(' ')) for i in x[1:-1].split(',')]))

cmap = plt.colormaps['cool']

books = load_dataset('FDSRashid/Hadith_info', data_files='Books.csv', token = Secret_token)['train'].to_pandas()

matn_info['Book_ID'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[0]))
matn_info['Hadith Number'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[1]))
matn_info = pd.merge(matn_info, books, on='Book_ID')

def value_to_hex(value):
    rgba_color = cmap(value)
    return "#{:02X}{:02X}{:02X}".format(int(rgba_color[0] * 255), int(rgba_color[1] * 255), int(rgba_color[2] * 255))


def visualize_subTaraf(taraf_num, hadith_str, yaxis):
    hadith_list = hadith_str.split(',')
    hadith_list = [hadith.strip() for hadith in hadith_list]
    hadiths = np.array([], dtype=int)
    for hadith in hadith_list:
        if '-' in hadith:
            if hadith.count('-') > 1:
                #print('Please use only one Dash mark!')
                raise gr.Error('Please use only one Dash mark!')
            hadith_multi = hadith.strip().split('-')
            if any([not had.isnumeric() for had in hadith_multi]):
                #print('Invalid Begining')
                raise gr.Error('Invalid Begining')
            elif len(hadith_multi) != 2:
                #print('Two numbers for a range of Hadith numbers please!')
                raise gr.Error('Two numbers for a range of Hadith numbers please!')
            hadith_multi = [int(had) for had in hadith_multi]
            hadiths = np.append(hadiths, np.arange(hadith_multi[0], hadith_multi[1]))
        elif hadith.isnumeric():
            hadiths = np.append(hadiths, int(hadith))
        else:
            #print('Invalid Data format!')
            raise gr.Error("Invalid Data format!")
    return hadiths
    

def visualize_isnad(taraf_num, yaxis):
    taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
    taraf_hadith = taraf['bookid_hadithid'].to_list()
    taraf_matns = taraf['matn'].to_list()
    taraf_hadith_split = [i.split('_') for i in taraf_hadith]
    taraf_book = taraf['Book_Name'].to_list()
    taraf_author = taraf['Author'].to_list()
    taraf_hadith_number = taraf['Hadith Number'].to_list()
    lst_hadith = []
    hadith_cleaned = isnad_info['Tarafs Cleaned'].apply(lambda x: taraf_num in x)
    isnad_hadith = isnad_info[hadith_cleaned]
    for i in range(len(taraf_hadith_split)):
        # This checks each hadith in the Taraf, is that book id hadith id found in each of the edges of isnad_info
        #This loop get the end transmitter of each Hadith in the Taraf
        isnad_in_hadith1 = isnad_hadith['Hadiths Cleaned'].apply(lambda x: taraf_hadith_split[i] in x )
        isnad_hadith1 = isnad_hadith[isnad_in_hadith1][['Source', 'Destination']]
        G = nx.from_pandas_edgelist(isnad_hadith1, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
        node = [int(n) for n, d in G.out_degree() if d == 0]
        for n in node:
            gen_node = narrator_bios[narrator_bios['Rawi ID']==n]['Generation'].to_list()
            if len(gen_node):
                gen_node = gen_node[0]
            else:
                gen_node = -1
            name_node = narrator_bios[narrator_bios['Rawi ID']==n]['Famous Name'].to_list()
            if len(name_node):
                name_node = name_node[0]
            else:
                name_node = 'ูู„ุงู†'
            lst_hadith.append([taraf_matns[i], gen_node, name_node, taraf_book[i], taraf_author[i], taraf_hadith_number[i], str(n), str(i)])
    df = pd.DataFrame(lst_hadith, columns = ['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Book Hadith Number', 'End Transmitter ID', 'Hadith Number'])
    
    isnad_hadith['Teacher'] = isnad_hadith['Source'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
    isnad_hadith['Student'] = isnad_hadith['Destination'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
    isnad_hadith['Teacher'] = isnad_hadith['Teacher'].apply(lambda x: x[0] if len(x)==1 else 'ูู„ุงู†')
    isnad_hadith['Student'] = isnad_hadith['Student'].apply(lambda x: x[0] if len(x)==1 else 'ูู„ุงู†')

    end_nodes = df['End Transmitter ID'].tolist() 
    G = nx.from_pandas_edgelist(isnad_hadith, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
    isnad_pos = nx.nx_agraph.graphviz_layout(G, prog='dot')
    x_stretch = 4
    y_stretch = 4
    net = Network(directed =True)

    for node, pos in isnad_pos.items():
        node_info = narrator_bios[narrator_bios['Rawi ID'] == int(node)]
        student_narrations = node_info['Number of Narrations'].to_list()
        if len(student_narrations):
            student_narrations = student_narrations[0]
        else:
            student_narrations = 1
        student_gen = node_info['Generation'].to_list()
        if len(student_gen):
            student_gen = student_gen[0]
        else:
            student_gen = -1
        student_rank = node_info["Narrator Rank"].to_list()
        if len(student_rank):
            student_rank = student_rank[0]
        else:
            student_rank = 'ูู„ุงู†'
        node_name = node_info['Famous Name'].to_list()
        if len(node_name):
            node_name = node_name[0]
        else:
            node_name = 'ูู„ุงู†'
        if node == '99999':
            net.add_node(node, font = {'size':50, 'color': 'black'}, color = '#000000', label = f'{node_name} \n ID: {node} - Gen {student_gen}', x= pos[0]*x_stretch, y= -1*pos[1]*y_stretch, size= 70)   
        elif node in end_nodes:
            end_matn_info = df[df["End Transmitter ID"] == node]
            net.add_node(node, font = {'size':30, 'color': 'red'}, color = value_to_hex(student_narrations), label = f'{node_name} \n {student_rank} \n ID: {node} - Gen {student_gen} \n Hadith {" ".join(end_matn_info["Hadith Number"].tolist())}', x= pos[0]*x_stretch, y= -1*pos[1]*y_stretch, size= 50)
        else:
            net.add_node(node, font = {'size':30, 'color': 'red'}, color = value_to_hex(student_narrations), label = f'{node_name} \n {student_rank} \n ID: {node} - Gen {student_gen}', x= pos[0]*x_stretch, y= -1*pos[1]*y_stretch, size= 50)
    for _, row in isnad_hadith.iterrows():
        source = row['Source']
        target = row['Destination']
        net.add_edge(source, target, color = value_to_hex(int(row[f'{yaxis} Count'])), label = f"{row[f'{yaxis} Count']}")
    net.toggle_physics(False)    
    html = net.generate_html()
    html = html.replace("'", "\"")
    df = df.rename(columns = {'Generation': 'Gen.', 'Book Hadith Number': 'Hdth Num', 'End Transmitter ID': 'End Narrator ID', 'Hadith Number': 'Index', 'Book_Name': 'Book', 'Name':'Final Narrator'})
    return f"""<iframe style="width: 100%; height: 600px;margin:0 auto" name="result" allow="midi; geolocation; microphone; camera; 
  display-capture; encrypted-media;" sandbox="allow-modals allow-forms 
  allow-scripts allow-same-origin allow-popups 
  allow-top-navigation-by-user-activation allow-downloads" allowfullscreen="" 
  allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>""" , df.drop('Hdth Num', axis=1)
    

def taraf_booknum(taraf_num):
    taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
    return taraf[['matn', 'Book_ID', 'Hadith Number', 'Book_Name', 'Author']]

def visualize_hadith_isnad(df, yaxis):
    df['bookid_hadithid'] = df['Book_ID'].astype(str) + '_' + df['Hadith Number'].astype(str)
    hadith = matn_info[matn_info['bookid_hadithid'].isin(df['bookid_hadithid'])]
    taraf_hadith_split = [i.split('_') for i in hadith['bookid_hadithid'].to_list()]
    hadith_cleaned = isnad_info['Hadiths Cleaned'].apply(lambda x: any(i in x for i in taraf_hadith_split))
    isnad_hadith = isnad_info[hadith_cleaned]
    isnad_hadith['Teacher'] = isnad_hadith['Source'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
    isnad_hadith['Student'] = isnad_hadith['Destination'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
    isnad_hadith['Teacher'] = isnad_hadith['Teacher'].apply(lambda x: x[0] if len(x)==1 else 'ูู„ุงู†')
    isnad_hadith['Student'] = isnad_hadith['Student'].apply(lambda x: x[0] if len(x)==1 else 'ูู„ุงู†')

    G = nx.from_pandas_edgelist(isnad_hadith, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
    isnad_pos = nx.nx_agraph.graphviz_layout(G, prog='dot')
    x_stretch = 4
    y_stretch = 4
    net = Network(directed =True)

    for node, pos in isnad_pos.items():
        node_info = narrator_bios[narrator_bios['Rawi ID'] == int(node)]
        student_narrations = node_info['Number of Narrations'].to_list()
        if len(student_narrations):
            student_narrations = student_narrations[0]
        else:
            student_narrations = 1
        student_gen = node_info['Generation'].to_list()
        if len(student_gen):
            student_gen = student_gen[0]
        else:
            student_gen = -1
        student_rank = node_info["Narrator Rank"].to_list()
        if len(student_rank):
            student_rank = student_rank[0]
        else:
            student_rank = 'ูู„ุงู†'
        node_name = node_info['Famous Name'].to_list()
        if len(node_name):
            node_name = node_name[0]
        else:
            node_name = 'ูู„ุงู†'
        if node == '99999':
            net.add_node(node, font = {'size':50, 'color': 'black'}, color = '#000000', label = f'{node_name} \n ID: {node} - Gen {student_gen}', x= pos[0]*x_stretch, y= -1*pos[1]*y_stretch, size= 70)   
        # elif node in end_nodes:
        #     end_matn_info = df[df["End Transmitter ID"] == node]
        #     net.add_node(node, font = {'size':30, 'color': 'red'}, color = value_to_hex(student_narrations), label = f'{node_name} \n {student_rank} \n ID: {node} - Gen {student_gen} \n Hadith {" ".join(end_matn_info["Hadith Number"].tolist())}', x= pos[0]*x_stretch, y= -1*pos[1]*y_stretch, size= 50)
        else:
            net.add_node(node, font = {'size':30, 'color': 'red'}, color = value_to_hex(student_narrations), label = f'{node_name} \n {student_rank} \n ID: {node} - Gen {student_gen}', x= pos[0]*x_stretch, y= -1*pos[1]*y_stretch, size= 50)
    for _, row in isnad_hadith.iterrows():
        source = row['Source']
        target = row['Destination']
        net.add_edge(source, target, color = value_to_hex(int(row[f'{yaxis} Count'])), label = f"{row[f'{yaxis} Count']}")
    net.toggle_physics(False)    
    html = net.generate_html()
    html = html.replace("'", "\"")
    return f"""<iframe style="width: 100%; height: 600px;margin:0 auto" name="result" allow="midi; geolocation; microphone; camera; 
  display-capture; encrypted-media;" sandbox="allow-modals allow-forms 
  allow-scripts allow-same-origin allow-popups 
  allow-top-navigation-by-user-activation allow-downloads" allowfullscreen="" 
  allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>"""  


with gr.Blocks() as demo:
    with gr.Tab("Whole Taraf Visualizer"):
        Yaxis = gr.Dropdown(choices = ['Taraf', 'Hadith', 'Isnad', 'Book'], value = 'Taraf', label = 'Variable to Display', info = 'Choose the variable to visualize.')  
        taraf_number = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1)
        btn = gr.Button('Submit')
        btn.click(fn = visualize_isnad, inputs = [taraf_number, Yaxis], outputs = [gr.HTML(), gr.DataFrame(wrap=True, column_widths=[43, 8, 11,11,10,8, 9])])
    with gr.Tab("Book and Hadith Number Retriever"):
        taraf_num = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1)
        btn_num = gr.Button('Retrieve')
        btn_num.click(fn=taraf_booknum, inputs = [taraf_num], outputs= [gr.DataFrame(wrap=True)])
    with gr.Tab('Select Hadith Isnad Visualizer'):
        yyaxis = gr.Dropdown(choices = ['Taraf', 'Hadith', 'Isnad', 'Book'], value = 'Taraf', label = 'Variable to Display', info = 'Choose the variable to visualize.')
        hadith_selection =  gr.Dataframe(
            headers=["Book_ID", "Hadith Number"],
            datatype=["number", "number"],
            row_count=5,
            col_count=(2, "fixed"))
        btn_hadith = gr.Button('Visualize')
        btn_hadith.click(fn=visualize_hadith_isnad, inputs=[hadith_selection, yyaxis], outputs=[gr.HTML()])
demo.launch()