File size: 7,931 Bytes
c71a7d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e072981
448861d
c71a7d8
 
 
 
261d935
c71a7d8
 
 
 
 
 
261d935
 
c71a7d8
 
 
 
8edff27
c71a7d8
cffe818
 
 
 
 
 
5d223db
 
 
 
 
69e22b7
cffe818
 
448861d
c71a7d8
 
5d223db
c71a7d8
 
5d223db
 
 
b8be8d2
 
 
5d223db
c71a7d8
 
 
 
 
 
 
5d223db
 
261d935
22de721
5d223db
3a572d4
261d935
 
fa12504
261d935
 
 
 
 
5d223db
22de721
c71a7d8
261d935
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c71a7d8
 
 
261d935
c71a7d8
3291a65
 
 
 
c71a7d8
 
3291a65
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import gradio as gr
from pyvis.network import Network
import networkx as nx
import numpy as np
import pandas as pd
import os
from datasets import load_dataset
from datasets import Features
from datasets import Value
from datasets import Dataset
import matplotlib.pyplot as plt

import re

pattern = r'"(.*?)"'
Secret_token = os.getenv('HF_token')

dataset = load_dataset('FDSRashid/hadith_info',data_files = 'Basic_Edge_Information.csv', token = Secret_token, split = 'train')

edge_info = dataset.to_pandas()

features = Features({'Rawi ID': Value('int32'), 'Famous Name': Value('string'), 'Narrator Rank': Value('string'), 'Number of Narrations': Value('string'),  'Generation': Value('string')})
narrator_bios = load_dataset("FDSRashid/hadith_info", data_files = 'Teacher_Bios.csv', token = Secret_token,features=features )
narrator_bios = narrator_bios['train'].to_pandas()
narrator_bios.loc[49845, 'Narrator Rank'] = 'ุฑุณูˆู„ ุงู„ู„ู‡'
narrator_bios.loc[49845, 'Number of Narrations'] = 0
narrator_bios['Number of Narrations'] = narrator_bios['Number of Narrations'].astype(int)
narrator_bios.loc[49845, 'Number of Narrations'] = 443471
narrator_bios['Generation'] = narrator_bios['Generation'].replace([None], [-1])
narrator_bios['Generation'] = narrator_bios['Generation'].astype(int)


features = Features({'matn': Value('string'), 'taraf_ID': Value('string'), 'bookid_hadithid': Value('string')})

dataset = load_dataset("FDSRashid/hadith_info", data_files = 'All_Matns.csv',token = Secret_token, features = features)
matn_info = dataset['train'].to_pandas()
matn_info = matn_info.drop(97550)
matn_info = matn_info.drop(307206)
matn_info['taraf_ID'] = matn_info['taraf_ID'].replace('KeyAbsent', -1)


matn_info['taraf_ID'] = matn_info['taraf_ID'].astype(int)
# matn_info = matn_info.sort_values('taraf_ID')
# tarafs = matn_info['taraf_ID'].unique()
# for i, taraf in enumerate(tarafs):
#     matn_info.loc[matn_info['taraf_ID'] == taraf, 'taraf_ID_New'] = i + 1 # Replace 'a' with 'e' in column 'C' where the condition is met
# matn_info['taraf_ID_New'] = matn_info['taraf_ID_New'].astype(int)



isnad_info = load_dataset('FDSRashid/hadith_info',token = Secret_token, data_files = 'isnad_info.csv', split = 'train').to_pandas()
isnad_info['Hadiths Cleaned'] = isnad_info['Hadiths'].apply(lambda x: [re.findall(pattern, string)[0].split("_") for string in x[1:-1].split(',')])

taraf_max = np.max(matn_info['taraf_ID'].unique())

cmap = plt.colormaps['cool']

books = load_dataset('FDSRashid/Hadith_info', data_files='Books.csv', token = Secret_token)['train'].to_pandas()

matn_info['Book ID'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[0]))
matn_info['Hadith Number'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[1]))
matn_info = matn_info.join(books, on='Book ID')

def value_to_hex(value):
    rgba_color = cmap(value)
    return "#{:02X}{:02X}{:02X}".format(int(rgba_color[0] * 255), int(rgba_color[1] * 255), int(rgba_color[2] * 255))

#edge_info, matn_info, narrator_bios, isnad_info

def visualize_isnad(taraf_num, yaxis):
    taraf_hadith = matn_info[matn_info['taraf_ID'] == taraf_num]['bookid_hadithid'].to_list()
    taraf_matns = matn_info[matn_info['taraf_ID'] == taraf_num]['matn'].to_list()
    taraf_hadith_split = [i.split('_') for i in taraf_hadith]
    taraf_book = matn_info[matn_info['taraf_ID'] == taraf_num]['Book_Name'].to_list()
    taraf_author = matn_info[matn_info['taraf_ID'] == taraf_num]['Author'].to_list()
    taraf_hadith_number = matn_info[matn_info['taraf_ID'] == taraf_num]['Hadith Number'].to_list()
    lst_hadith = []
    for i in range(len(taraf_hadith_split)):
        isnad_in_hadith1 = isnad_info['Hadiths Cleaned'].apply(lambda x: taraf_hadith_split[i] in x )
        isnad_hadith1 = isnad_info[isnad_in_hadith1][['Source', 'Destination']]
        G = nx.from_pandas_edgelist(isnad_hadith1, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
        node = [int(n) for n, d in G.out_degree() if d == 0][0]
        gen_node = narrator_bios[narrator_bios['Rawi ID']==node]['Generation'].iloc[0]
        name_node = narrator_bios[narrator_bios['Rawi ID']==node]['Famous Name'].iloc[0]
        lst_hadith.append([taraf_matns[i], gen_node, name_node, taraf_book[i], taraf_author[i], taraf_hadith_number[i]])
    df = pd.DataFrame(lst_hadith, columns = ['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Hadith Number'])
    
    hadith_cleaned = isnad_info['Hadiths Cleaned'].apply(lambda x: any(i in x for i in taraf_hadith_split) )
    isnad_hadith = isnad_info[hadith_cleaned][['Source', 'Destination']]
    narrators = isnad_hadith.applymap(lambda x: narrator_bios[narrator_bios['Rawi ID'] == int(x)]['Famous Name'].to_list()).rename(columns={"Source": "Teacher", "Destination": "Student"})
    isnad_hadith["Student"] = narrators['Student']
    isnad_hadith["Teacher"] = narrators['Teacher']
    filtered = isnad_hadith[(isnad_hadith['Teacher'].apply(lambda x: len(x)) == 1) & (isnad_hadith['Student'].apply(lambda x: len(x)) == 1)]
    filtered['Student'] = filtered['Student'].apply(lambda x: x[0])
    filtered['Teacher'] = filtered['Teacher'].apply(lambda x: x[0])
    net = Network(directed =True)
    for _, row in filtered.iterrows():
        source = row['Teacher']
        target = row['Student']
        teacher_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Source'])]
        student_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Destination'])]
        isnad = isnad_info[(isnad_info['Source'] == row['Source']) & (isnad_info['Destination'] == row['Destination'])]
        teacher_narrations = teacher_info['Number of Narrations'].to_list()[0]
        student_narrations = student_info['Number of Narrations'].to_list()[0]
        if row['Source'] == '99999':
            net.add_node(source, font = {'size':50, 'color': 'Black'}, color = '#000000')
        else:
            net.add_node(source, font = {'size':30, 'color': 'red'}, color = value_to_hex(teacher_narrations), label = f'{source} \n {teacher_info["Narrator Rank"].to_list()[0]}')
        net.add_node(target, font = {'size': 30, 'color': 'red'}, color = value_to_hex(student_narrations), label = f'{target} \n{student_info["Narrator Rank"].to_list()[0]}')
        net.add_edge(source, target, color = value_to_hex(int(isnad['Hadith Count'].to_list()[0])), label = f"{isnad['Hadith Count'].to_list()[0]}")
    net.barnes_hut(gravity=-5000, central_gravity=0.3, spring_length=200)
    html = net.generate_html()
    html = html.replace("'", "\"")
    return f"""<iframe style="width: 100%; height: 600px;margin:0 auto" name="result" allow="midi; geolocation; microphone; camera; 
  display-capture; encrypted-media;" sandbox="allow-modals allow-forms 
  allow-scripts allow-same-origin allow-popups 
  allow-top-navigation-by-user-activation allow-downloads" allowfullscreen="" 
  allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>""" , df

def taraf_booknum(taraf_num):
    taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
    return taraf[['matn', 'Book ID', 'Hadith Number']]


with gr.Blocks() as demo:
    with gr.Tab("Whole Taraf Visualizer"):
        Yaxis = gr.Dropdown(choices = ['Tarafs', 'Hadiths', 'Isnads', 'Books'], value = 'Tarafs', label = 'Variable to Display', info = 'Choose the variable to visualize.')  
        taraf_number = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1)
        btn = gr.Button('Submit')
    with gr.Tab("Book and Hadith Number Retriever"):
        taraf_num = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1)
        btn_num = gr.Button('Retrieve')
    btn.click(fn = visualize_isnad, inputs = [taraf_number, Yaxis], outputs = [gr.HTML(), gr.DataFrame()])
    btn_num.click(fn=taraf_booknum, inputs = [taraf_num], outputs= [gr.DataFrame()])
demo.launch()