File size: 11,516 Bytes
c71a7d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a116d30
 
e072981
448861d
c71a7d8
 
 
 
261d935
c71a7d8
 
 
 
 
a116d30
 
261d935
 
c71a7d8
 
 
8edff27
c71a7d8
cffe818
 
 
 
 
 
a116d30
448861d
c71a7d8
a116d30
5d223db
a116d30
c71a7d8
 
5d223db
 
 
b8be8d2
 
 
5d223db
c71a7d8
 
 
 
 
 
 
5d223db
 
261d935
22de721
5d223db
3a572d4
261d935
 
a116d30
 
fa12504
261d935
 
 
 
 
5d223db
22de721
c71a7d8
a116d30
 
261d935
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02d71bd
261d935
 
 
 
c71a7d8
 
 
261d935
c71a7d8
3291a65
 
 
 
02d71bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a116d30
c71a7d8
 
3291a65
02d71bd
3291a65
 
6cfed4b
3291a65
 
 
6cfed4b
02d71bd
 
 
 
 
 
 
 
3291a65
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import gradio as gr
from pyvis.network import Network
import networkx as nx
import numpy as np
import pandas as pd
import os
from datasets import load_dataset
from datasets import Features
from datasets import Value
from datasets import Dataset
import matplotlib.pyplot as plt

import re

pattern = r'"(.*?)"'
# this pattern captures anything in a double quotes. 

Secret_token = os.getenv('HF_token')

dataset = load_dataset('FDSRashid/hadith_info',data_files = 'Basic_Edge_Information.csv', token = Secret_token, split = 'train')

edge_info = dataset.to_pandas()

features = Features({'Rawi ID': Value('int32'), 'Famous Name': Value('string'), 'Narrator Rank': Value('string'), 'Number of Narrations': Value('string'),  'Generation': Value('string')})
narrator_bios = load_dataset("FDSRashid/hadith_info", data_files = 'Teacher_Bios.csv', token = Secret_token,features=features )
narrator_bios = narrator_bios['train'].to_pandas()
narrator_bios.loc[49845, 'Narrator Rank'] = 'ุฑุณูˆู„ ุงู„ู„ู‡'
narrator_bios.loc[49845, 'Number of Narrations'] = 0
narrator_bios['Number of Narrations'] = narrator_bios['Number of Narrations'].astype(int)
narrator_bios.loc[49845, 'Number of Narrations'] = 327512
# 8125 Narrators have no Generation, listed in dataset as None
narrator_bios['Generation'] = narrator_bios['Generation'].replace([None], [-1])
narrator_bios['Generation'] = narrator_bios['Generation'].astype(int)

features = Features({'matn': Value('string'), 'taraf_ID': Value('string'), 'bookid_hadithid': Value('string')})

dataset = load_dataset("FDSRashid/hadith_info", data_files = 'All_Matns.csv',token = Secret_token, features = features)
matn_info = dataset['train'].to_pandas()
matn_info = matn_info.drop(97550)
matn_info = matn_info.drop(307206)
matn_info['taraf_ID'] = matn_info['taraf_ID'].replace('KeyAbsent', -1)

matn_info['taraf_ID'] = matn_info['taraf_ID'].astype(int)

# Isnad Info Hadiths column is structured like {"BookNum_HadithNum", ...} for each edge
isnad_info = load_dataset('FDSRashid/hadith_info',token = Secret_token, data_files = 'isnad_info.csv', split = 'train').to_pandas()
isnad_info['Hadiths Cleaned'] = isnad_info['Hadiths'].apply(lambda x: [re.findall(pattern, string)[0].split("_") for string in x[1:-1].split(',')])
# Hadiths Cleaned is a list of lists, each sub-list is Book Id, Hadith ID
taraf_max = np.max(matn_info['taraf_ID'].unique())
isnad_info['Tarafs Cleaned'] = isnad_info['Tarafs'].apply(lambda x: np.array([int(i.strip(' ')) for i in x[1:-1].split(',')]))

cmap = plt.colormaps['cool']

books = load_dataset('FDSRashid/Hadith_info', data_files='Books.csv', token = Secret_token)['train'].to_pandas()

matn_info['Book ID'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[0]))
matn_info['Hadith Number'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[1]))
matn_info = matn_info.join(books, on='Book ID')

def value_to_hex(value):
    rgba_color = cmap(value)
    return "#{:02X}{:02X}{:02X}".format(int(rgba_color[0] * 255), int(rgba_color[1] * 255), int(rgba_color[2] * 255))

#edge_info, matn_info, narrator_bios, isnad_info

def visualize_isnad(taraf_num, yaxis):
    taraf_hadith = matn_info[matn_info['taraf_ID'] == taraf_num]['bookid_hadithid'].to_list()
    taraf_matns = matn_info[matn_info['taraf_ID'] == taraf_num]['matn'].to_list()
    taraf_hadith_split = [i.split('_') for i in taraf_hadith]
    taraf_book = matn_info[matn_info['taraf_ID'] == taraf_num]['Book_Name'].to_list()
    taraf_author = matn_info[matn_info['taraf_ID'] == taraf_num]['Author'].to_list()
    taraf_hadith_number = matn_info[matn_info['taraf_ID'] == taraf_num]['Hadith Number'].to_list()
    lst_hadith = []
    for i in range(len(taraf_hadith_split)):
        # This checks each hadith in the Taraf, is that book id hadith id found in each of the edges of isnad_info
        #This loop get the end transmitter of each Hadith in the Taraf
        isnad_in_hadith1 = isnad_info['Hadiths Cleaned'].apply(lambda x: taraf_hadith_split[i] in x )
        isnad_hadith1 = isnad_info[isnad_in_hadith1][['Source', 'Destination']]
        G = nx.from_pandas_edgelist(isnad_hadith1, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
        node = [int(n) for n, d in G.out_degree() if d == 0][0]
        gen_node = narrator_bios[narrator_bios['Rawi ID']==node]['Generation'].iloc[0]
        name_node = narrator_bios[narrator_bios['Rawi ID']==node]['Famous Name'].iloc[0]
        lst_hadith.append([taraf_matns[i], gen_node, name_node, taraf_book[i], taraf_author[i], taraf_hadith_number[i]])
    df = pd.DataFrame(lst_hadith, columns = ['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Hadith Number'])
    
    #hadith_cleaned = isnad_info['Hadiths Cleaned'].apply(lambda x: any(i in x for i in taraf_hadith_split) )
    hadith_cleaned = isnad_info['Tarafs Cleaned'].apply(lambda x: taraf_num in x)
    isnad_hadith = isnad_info[hadith_cleaned][['Source', 'Destination']]
    narrators = isnad_hadith.applymap(lambda x: narrator_bios[narrator_bios['Rawi ID'] == int(x)]['Famous Name'].to_list()).rename(columns={"Source": "Teacher", "Destination": "Student"})
    isnad_hadith["Student"] = narrators['Student']
    isnad_hadith["Teacher"] = narrators['Teacher']
    filtered = isnad_hadith[(isnad_hadith['Teacher'].apply(lambda x: len(x)) == 1) & (isnad_hadith['Student'].apply(lambda x: len(x)) == 1)]
    filtered['Student'] = filtered['Student'].apply(lambda x: x[0])
    filtered['Teacher'] = filtered['Teacher'].apply(lambda x: x[0])
    net = Network(directed =True)
    for _, row in filtered.iterrows():
        source = row['Teacher']
        target = row['Student']
        teacher_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Source'])]
        student_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Destination'])]
        isnad = isnad_info[(isnad_info['Source'] == row['Source']) & (isnad_info['Destination'] == row['Destination'])]
        teacher_narrations = teacher_info['Number of Narrations'].to_list()[0]
        student_narrations = student_info['Number of Narrations'].to_list()[0]
        if row['Source'] == '99999':
            net.add_node(source, font = {'size':50, 'color': 'Black'}, color = '#000000')
        else:
            net.add_node(source, font = {'size':30, 'color': 'red'}, color = value_to_hex(teacher_narrations), label = f'{source} \n {teacher_info["Narrator Rank"].to_list()[0]}')
        net.add_node(target, font = {'size': 30, 'color': 'red'}, color = value_to_hex(student_narrations), label = f'{target} \n{student_info["Narrator Rank"].to_list()[0]}')
        net.add_edge(source, target, color = value_to_hex(int(isnad[f'{yaxis} Count'].to_list()[0])), label = f"{isnad[f'{yaxis} Count'].to_list()[0]}")
    net.barnes_hut(gravity=-5000, central_gravity=0.3, spring_length=200)
    html = net.generate_html()
    html = html.replace("'", "\"")
    return f"""<iframe style="width: 100%; height: 600px;margin:0 auto" name="result" allow="midi; geolocation; microphone; camera; 
  display-capture; encrypted-media;" sandbox="allow-modals allow-forms 
  allow-scripts allow-same-origin allow-popups 
  allow-top-navigation-by-user-activation allow-downloads" allowfullscreen="" 
  allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>""" , df

def taraf_booknum(taraf_num):
    taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
    return taraf[['matn', 'Book ID', 'Hadith Number']]

 def visualize_subTaraf(df):
     df['bookid_hadithid'] = df['Book ID'].astype(str) + '_' + df['Hadith Number'].astype(str)
     hadith = matn_info[matn_info['bookid_hadithid'].isin(df['bookid_hadithid'])]
     taraf_hadith_split = [i.split('_') for i in hadith['bookid_hadithid'].to_list()]
     hadith_cleaned = isnad_info['Hadiths Cleaned'].apply(lambda x: any(i in x for i in taraf_hadith_split))
     isnad_hadith = isnad_info[hadith_cleaned][['Source', 'Destination']]
     narrators = isnad_hadith.applymap(lambda x: narrator_bios[narrator_bios['Rawi ID'] == int(x)]['Famous Name'].to_list()).rename(columns={"Source": "Teacher", "Destination": "Student"})
     isnad_hadith["Student"] = narrators['Student']
     isnad_hadith["Teacher"] = narrators['Teacher']
     filtered = isnad_hadith[(isnad_hadith['Teacher'].apply(lambda x: len(x)) == 1) & (isnad_hadith['Student'].apply(lambda x: len(x)) == 1)]
     filtered['Student'] = filtered['Student'].apply(lambda x: x[0])
     filtered['Teacher'] = filtered['Teacher'].apply(lambda x: x[0])
     net = Network(directed =True)
     for _, row in filtered.iterrows():
         source = row['Teacher']
         target = row['Student']
         teacher_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Source'])]
         student_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Destination'])]
         isnad = isnad_info[(isnad_info['Source'] == row['Source']) & (isnad_info['Destination'] == row['Destination'])]
         teacher_narrations = teacher_info['Number of Narrations'].to_list()[0]
         student_narrations = student_info['Number of Narrations'].to_list()[0]
         if row['Source'] == '99999':
             net.add_node(source, font = {'size':50, 'color': 'Black'}, color = '#000000')
         else:
             net.add_node(source, font = {'size':30, 'color': 'red'}, color = value_to_hex(teacher_narrations), label = f'{source} \n {teacher_info["Narrator Rank"].to_list()[0]}')
        net.add_node(target, font = {'size': 30, 'color': 'red'}, color = value_to_hex(student_narrations), label = f'{target} \n{student_info["Narrator Rank"].to_list()[0]}')
        net.add_edge(source, target, color = value_to_hex(int(isnad[f'{yaxis} Count'].to_list()[0])), label = f"{isnad[f'{yaxis} Count'].to_list()[0]}")
     net.barnes_hut(gravity=-5000, central_gravity=0.3, spring_length=200)
     html = net.generate_html()
     html = html.replace("'", "\"")
     return f"""<iframe style="width: 100%; height: 600px;margin:0 auto" name="result" allow="midi; geolocation; microphone; camera; 
    display-capture; encrypted-media;" sandbox="allow-modals allow-forms 
    allow-scripts allow-same-origin allow-popups 
    allow-top-navigation-by-user-activation allow-downloads" allowfullscreen="" 
    allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>"""     


with gr.Blocks() as demo:
    with gr.Tab("Whole Taraf Visualizer"):
        Yaxis = gr.Dropdown(choices = ['Taraf', 'Hadith', 'Isnad', 'Book'], value = 'Taraf', label = 'Variable to Display', info = 'Choose the variable to visualize.')  
        taraf_number = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1)
        btn = gr.Button('Submit')
        btn.click(fn = visualize_isnad, inputs = [taraf_number, Yaxis], outputs = [gr.HTML(), gr.DataFrame()])
    with gr.Tab("Book and Hadith Number Retriever"):
        taraf_num = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1)
        btn_num = gr.Button('Retrieve')
        btn_num.click(fn=taraf_booknum, inputs = [taraf_num], outputs= [gr.DataFrame()])
    with gr.Tab('Select Hadith Isnad Visualizer'):
        hadith_selection =  gr.Dataframe(
            headers=["Book ID", "Hadith Number"],
            datatype=["number", "number"],
            row_count=5,
            col_count=(2, "fixed"))
        btn_hadith = gr.Button('Search')
        btn_hadith.click(fn=visualize_isnad, inputs=[hadith_selection], outputs=[gr.HTML()])
demo.launch()