File size: 13,565 Bytes
c71a7d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a116d30
 
e072981
448861d
c71a7d8
 
 
 
261d935
c71a7d8
 
 
 
 
a116d30
 
261d935
 
c71a7d8
 
 
8edff27
c71a7d8
cffe818
 
 
 
 
 
a116d30
448861d
c71a7d8
a116d30
5d223db
a116d30
c71a7d8
 
5d223db
 
 
028d3cf
b8be8d2
2b5e2b5
5d223db
c71a7d8
 
 
 
 
 
 
5d223db
 
261d935
22de721
5d223db
3a572d4
261d935
 
a116d30
 
fa12504
261d935
 
 
 
 
5d223db
22de721
c71a7d8
a116d30
 
97ebd68
 
 
 
 
261d935
97ebd68
 
 
261d935
 
97ebd68
 
 
 
 
 
 
 
5ca5481
97ebd68
 
aef4401
97ebd68
 
 
 
 
46cc9fe
97ebd68
 
 
 
142c25c
 
 
 
 
cdbb8ad
 
142c25c
 
 
 
261d935
478a03c
261d935
142c25c
 
957c876
261d935
 
 
 
c71a7d8
 
 
261d935
c71a7d8
3291a65
 
028d3cf
3291a65
e78a4d2
028d3cf
d5207f2
 
 
248b1ce
 
 
 
 
d5207f2
248b1ce
 
 
d5207f2
 
aef4401
248b1ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5207f2
248b1ce
d5207f2
248b1ce
 
 
d5207f2
 
 
 
02d71bd
 
 
 
a116d30
c71a7d8
 
3291a65
02d71bd
3291a65
 
a2e40dc
3291a65
 
 
7bf20fc
02d71bd
e78a4d2
02d71bd
028d3cf
02d71bd
 
 
4a367b3
e78a4d2
3291a65
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
import gradio as gr
from pyvis.network import Network
import networkx as nx
import numpy as np
import pandas as pd
import os
from datasets import load_dataset
from datasets import Features
from datasets import Value
from datasets import Dataset
import matplotlib.pyplot as plt

import re

pattern = r'"(.*?)"'
# this pattern captures anything in a double quotes. 

Secret_token = os.getenv('HF_token')

dataset = load_dataset('FDSRashid/hadith_info',data_files = 'Basic_Edge_Information.csv', token = Secret_token, split = 'train')

edge_info = dataset.to_pandas()

features = Features({'Rawi ID': Value('int32'), 'Famous Name': Value('string'), 'Narrator Rank': Value('string'), 'Number of Narrations': Value('string'),  'Generation': Value('string')})
narrator_bios = load_dataset("FDSRashid/hadith_info", data_files = 'Teacher_Bios.csv', token = Secret_token,features=features )
narrator_bios = narrator_bios['train'].to_pandas()
narrator_bios.loc[49845, 'Narrator Rank'] = 'ุฑุณูˆู„ ุงู„ู„ู‡'
narrator_bios.loc[49845, 'Number of Narrations'] = 0
narrator_bios['Number of Narrations'] = narrator_bios['Number of Narrations'].astype(int)
narrator_bios.loc[49845, 'Number of Narrations'] = 327512
# 8125 Narrators have no Generation, listed in dataset as None
narrator_bios['Generation'] = narrator_bios['Generation'].replace([None], [-1])
narrator_bios['Generation'] = narrator_bios['Generation'].astype(int)

features = Features({'matn': Value('string'), 'taraf_ID': Value('string'), 'bookid_hadithid': Value('string')})

dataset = load_dataset("FDSRashid/hadith_info", data_files = 'All_Matns.csv',token = Secret_token, features = features)
matn_info = dataset['train'].to_pandas()
matn_info = matn_info.drop(97550)
matn_info = matn_info.drop(307206)
matn_info['taraf_ID'] = matn_info['taraf_ID'].replace('KeyAbsent', -1)

matn_info['taraf_ID'] = matn_info['taraf_ID'].astype(int)

# Isnad Info Hadiths column is structured like {"BookNum_HadithNum", ...} for each edge
isnad_info = load_dataset('FDSRashid/hadith_info',token = Secret_token, data_files = 'isnad_info.csv', split = 'train').to_pandas()
isnad_info['Hadiths Cleaned'] = isnad_info['Hadiths'].apply(lambda x: [re.findall(pattern, string)[0].split("_") for string in x[1:-1].split(',')])
# Hadiths Cleaned is a list of lists, each sub-list is Book Id, Hadith ID
taraf_max = np.max(matn_info['taraf_ID'].unique())
isnad_info['Tarafs Cleaned'] = isnad_info['Tarafs'].apply(lambda x: np.array([int(i.strip(' ')) for i in x[1:-1].split(',')]))

cmap = plt.colormaps['cool']

books = load_dataset('FDSRashid/Hadith_info', data_files='Books.csv', token = Secret_token)['train'].to_pandas()

matn_info['Book_ID'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[0]))
matn_info['Hadith Number'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[1]))
matn_info = pd.merge(matn_info, books, on='Book_ID')

def value_to_hex(value):
    rgba_color = cmap(value)
    return "#{:02X}{:02X}{:02X}".format(int(rgba_color[0] * 255), int(rgba_color[1] * 255), int(rgba_color[2] * 255))

#edge_info, matn_info, narrator_bios, isnad_info

def visualize_isnad(taraf_num, yaxis):
    taraf_hadith = matn_info[matn_info['taraf_ID'] == taraf_num]['bookid_hadithid'].to_list()
    taraf_matns = matn_info[matn_info['taraf_ID'] == taraf_num]['matn'].to_list()
    taraf_hadith_split = [i.split('_') for i in taraf_hadith]
    taraf_book = matn_info[matn_info['taraf_ID'] == taraf_num]['Book_Name'].to_list()
    taraf_author = matn_info[matn_info['taraf_ID'] == taraf_num]['Author'].to_list()
    taraf_hadith_number = matn_info[matn_info['taraf_ID'] == taraf_num]['Hadith Number'].to_list()
    lst_hadith = []
    for i in range(len(taraf_hadith_split)):
        # This checks each hadith in the Taraf, is that book id hadith id found in each of the edges of isnad_info
        #This loop get the end transmitter of each Hadith in the Taraf
        isnad_in_hadith1 = isnad_info['Hadiths Cleaned'].apply(lambda x: taraf_hadith_split[i] in x )
        isnad_hadith1 = isnad_info[isnad_in_hadith1][['Source', 'Destination']]
        G = nx.from_pandas_edgelist(isnad_hadith1, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
        node = [int(n) for n, d in G.out_degree() if d == 0][0]
        gen_node = narrator_bios[narrator_bios['Rawi ID']==node]['Generation'].iloc[0]
        name_node = narrator_bios[narrator_bios['Rawi ID']==node]['Famous Name'].iloc[0]
        lst_hadith.append([taraf_matns[i], gen_node, name_node, taraf_book[i], taraf_author[i], taraf_hadith_number[i]])
    df = pd.DataFrame(lst_hadith, columns = ['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Hadith Number'])
    
    #hadith_cleaned = isnad_info['Hadiths Cleaned'].apply(lambda x: any(i in x for i in taraf_hadith_split) )
    hadith_cleaned = isnad_info['Tarafs Cleaned'].apply(lambda x: taraf_num in x)
    isnad_hadith = isnad_info[hadith_cleaned]
    isnad_hadith['Teacher'] = isnad_hadith['Source'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
    isnad_hadith['Student'] = isnad_hadith['Destination'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
    isnad_hadith['Teacher'] = isnad_hadith['Teacher'].apply(lambda x: x[0] if len(x)==1 else 'ูู„ุงู†')
    isnad_hadith['Student'] = isnad_hadith['Student'].apply(lambda x: x[0] if len(x)==1 else 'ูู„ุงู†')
    net = Network(directed =True)
    for _, row in isnad_hadith.iterrows():
        source = row['Source']
        target = row['Destination']
        teacher_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Source'])]
        student_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Destination'])]
        
        teacher_narrations = teacher_info['Number of Narrations'].to_list()
        if len(teacher_narrations):
            teacher_narrations = teacher_narrations[0]
        else:
            teacher_narrations = row['Hadith Count']
        student_narrations = student_info['Number of Narrations'].to_list()
        if len(student_narrations):
            student_narrations = student_narrations[0]
        else:
            student_narrations = row['Hadith Count']

        teacher_gen = teacher_info['Generation'].to_list()
        if len(teacher_gen):
            teacher_gen = teacher_gen[0]
        else:
            teacher_gen = -1
        student_gen = student_info['Generation'].to_list()
        if len(student_gen):
            student_gen = student_gen[0]
        else:
            student_gen = -1
        teacher_rank = teacher_info["Narrator Rank"].to_list()
        if len(teacher_rank):
            teacher_rank = teacher_rank[0]
        else:
            teacher_rank = 'ูู„ุงู†'
            
        student_rank = student_info["Narrator Rank"].to_list()
        if len(student_rank):
            student_rank = student_rank[0]
        else:
            student_rank = 'ูู„ุงู†'
        if row['Source'] == '99999':
            net.add_node(source, font = {'size':50, 'color': 'Black'}, color = '#000000', label = f'{row["Teacher"]}')
        else:
            net.add_node(source, font = {'size':30, 'color': 'red'}, color = value_to_hex(teacher_narrations), label = f'{row["Teacher"]} \n {teacher_rank} \n ID: {row["Source"]} - Gen {teacher_gen}')
        net.add_node(target, font = {'size': 30, 'color': 'red'}, color = value_to_hex(student_narrations), label = f'{row["Student"]} \n{student_rank} \n ID: {row["Destination"]} - Gen {student_gen}')
        net.add_edge(source, target, color = value_to_hex(int(row[f'{yaxis} Count'])), label = f"{row[f'{yaxis} Count']}")
    net.barnes_hut(gravity=-5000, central_gravity=0.3, spring_length=200)
    html = net.generate_html()
    html = html.replace("'", "\"")
    return f"""<iframe style="width: 100%; height: 600px;margin:0 auto" name="result" allow="midi; geolocation; microphone; camera; 
  display-capture; encrypted-media;" sandbox="allow-modals allow-forms 
  allow-scripts allow-same-origin allow-popups 
  allow-top-navigation-by-user-activation allow-downloads" allowfullscreen="" 
  allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>""" , df

def taraf_booknum(taraf_num):
    taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
    return taraf[['matn', 'Book_ID', 'Hadith Number', 'Book_Name', 'Author']]

def visualize_subTaraf(df, yaxis):
    df['bookid_hadithid'] = df['Book_ID'].astype(str) + '_' + df['Hadith Number'].astype(str)
    hadith = matn_info[matn_info['bookid_hadithid'].isin(df['bookid_hadithid'])]
    taraf_hadith_split = [i.split('_') for i in hadith['bookid_hadithid'].to_list()]
    hadith_cleaned = isnad_info['Hadiths Cleaned'].apply(lambda x: any(i in x for i in taraf_hadith_split))
    isnad_hadith = isnad_info[hadith_cleaned]
    isnad_hadith['Teacher'] = isnad_hadith['Source'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
    isnad_hadith['Student'] = isnad_hadith['Destination'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
    isnad_hadith['Teacher'] = isnad_hadith['Teacher'].apply(lambda x: x[0] if len(x)==1 else 'ูู„ุงู†')
    isnad_hadith['Student'] = isnad_hadith['Student'].apply(lambda x: x[0] if len(x)==1 else 'ูู„ุงู†')
    net = Network(directed =True)
    for _, row in isnad_hadith.iterrows():
        source = row['Source']
        target = row['Destination']
        teacher_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Source'])]
        student_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Destination'])]
        
        teacher_narrations = teacher_info['Number of Narrations'].to_list()
        if len(teacher_narrations):
            teacher_narrations = teacher_narrations[0]
        else:
            teacher_narrations = row['Hadith Count']
        student_narrations = student_info['Number of Narrations'].to_list()
        if len(student_narrations):
            student_narrations = student_narrations[0]
        else:
            student_narrations = row['Hadith Count']

        teacher_gen = teacher_info['Generation'].to_list()
        if len(teacher_gen):
            teacher_gen = teacher_gen[0]
        else:
            teacher_gen = -1
        student_gen = student_info['Generation'].to_list()
        if len(student_gen):
            student_gen = student_gen[0]
        else:
            student_gen = -1
        teacher_rank = teacher_info["Narrator Rank"].to_list()
        if len(teacher_rank):
            teacher_rank = teacher_rank[0]
        else:
            teacher_rank = 'ูู„ุงู†'
            
        student_rank = student_info["Narrator Rank"].to_list()
        if len(student_rank):
            student_rank = student_rank[0]
        else:
            student_rank = 'ูู„ุงู†'
        if row['Source'] == '99999':
            net.add_node(source, font = {'size':50, 'color': 'Black'}, color = '#000000', label = f'{row["Teacher"]}')
        else:
            net.add_node(source, font = {'size':30, 'color': 'red'}, color = value_to_hex(teacher_narrations), label = f'{row["Teacher"]} \n {teacher_rank} \n ID: {row["Source"]} - Gen {teacher_gen}')
        net.add_node(target, font = {'size': 30, 'color': 'red'}, color = value_to_hex(student_narrations), label = f'{row["Student"]} \n{student_rank} \n ID: {row["Destination"]} - Gen {student_gen}')
        net.add_edge(source, target, color = value_to_hex(int(row[f'{yaxis} Count'])), label = f"{row[f'{yaxis} Count']}")
    net.barnes_hut(gravity=-5000, central_gravity=0.3, spring_length=200)
    html = net.generate_html()
    html = html.replace("'", "\"")
    return f"""<iframe style="width: 100%; height: 600px;margin:0 auto" name="result" allow="midi; geolocation; microphone; camera; 
    display-capture; encrypted-media;" sandbox="allow-modals allow-forms 
    allow-scripts allow-same-origin allow-popups 
    allow-top-navigation-by-user-activation allow-downloads" allowfullscreen="" 
    allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>"""     


with gr.Blocks() as demo:
    with gr.Tab("Whole Taraf Visualizer"):
        Yaxis = gr.Dropdown(choices = ['Taraf', 'Hadith', 'Isnad', 'Book'], value = 'Taraf', label = 'Variable to Display', info = 'Choose the variable to visualize.')  
        taraf_number = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1)
        btn = gr.Button('Submit')
        btn.click(fn = visualize_isnad, inputs = [taraf_number, Yaxis], outputs = [gr.HTML(), gr.DataFrame(wrap=True)])
    with gr.Tab("Book and Hadith Number Retriever"):
        taraf_num = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1)
        btn_num = gr.Button('Retrieve')
        btn_num.click(fn=taraf_booknum, inputs = [taraf_num], outputs= [gr.DataFrame(wrap=True)])
    with gr.Tab('Select Hadith Isnad Visualizer'):
        yyaxis = gr.Dropdown(choices = ['Taraf', 'Hadith', 'Isnad', 'Book'], value = 'Taraf', label = 'Variable to Display', info = 'Choose the variable to visualize.')
        hadith_selection =  gr.Dataframe(
            headers=["Book_ID", "Hadith Number"],
            datatype=["number", "number"],
            row_count=5,
            col_count=(2, "fixed"))
        btn_hadith = gr.Button('Visualize')
        btn_hadith.click(fn=visualize_subTaraf, inputs=[hadith_selection, yyaxis], outputs=[gr.HTML()])
demo.launch()