FDSRashid's picture
added book and author information to Taraf
5d223db verified
raw
history blame
7.54 kB
import gradio as gr
from pyvis.network import Network
import networkx as nx
import numpy as np
import pandas as pd
import os
from datasets import load_dataset
from datasets import Features
from datasets import Value
from datasets import Dataset
import matplotlib.pyplot as plt
import re
pattern = r'"(.*?)"'
Secret_token = os.getenv('HF_token')
dataset = load_dataset('FDSRashid/hadith_info',data_files = 'Basic_Edge_Information.csv', token = Secret_token, split = 'train')
edge_info = dataset.to_pandas()
features = Features({'Rawi ID': Value('int32'), 'Famous Name': Value('string'), 'Narrator Rank': Value('string'), 'Number of Narrations': Value('string'), 'Generation': Value('string')})
narrator_bios = load_dataset("FDSRashid/hadith_info", data_files = 'Teacher_Bios.csv', token = Secret_token,features=features )
narrator_bios = narrator_bios['train'].to_pandas()
narrator_bios.loc[49845, 'Narrator Rank'] = 'ุฑุณูˆู„ ุงู„ู„ู‡'
narrator_bios.loc[49845, 'Number of Narrations'] = 0
narrator_bios['Number of Narrations'] = narrator_bios['Number of Narrations'].astype(int)
narrator_bios.loc[49845, 'Number of Narrations'] = 443471
narrator_bios['Generation'] = narrator_bios['Generation'].replace([None], [-1])
narrator_bios['Generation'] = narrator_bios['Generation'].astype(int)
features = Features({'matn': Value('string'), 'taraf_ID': Value('string'), 'bookid_hadithid': Value('string')})
dataset = load_dataset("FDSRashid/hadith_info", data_files = 'All_Matns.csv',token = Secret_token, features = features)
matn_info = dataset['train'].to_pandas()
matn_info = matn_info.drop(97550)
matn_info = matn_info.drop(307206)
matn_info['taraf_ID'] = matn_info['taraf_ID'].replace('KeyAbsent', -1)
matn_info['taraf_ID'] = matn_info['taraf_ID'].astype(int)
# matn_info = matn_info.sort_values('taraf_ID')
# tarafs = matn_info['taraf_ID'].unique()
# for i, taraf in enumerate(tarafs):
# matn_info.loc[matn_info['taraf_ID'] == taraf, 'taraf_ID_New'] = i + 1 # Replace 'a' with 'e' in column 'C' where the condition is met
# matn_info['taraf_ID_New'] = matn_info['taraf_ID_New'].astype(int)
isnad_info = load_dataset('FDSRashid/hadith_info',token = Secret_token, data_files = 'isnad_info.csv', split = 'train').to_pandas()
isnad_info['Hadiths Cleaned'] = isnad_info['Hadiths'].apply(lambda x: [re.findall(pattern, string)[0].split("_") for string in x[1:-1].split(',')])
taraf_max = np.max(matn_info['taraf_ID'].unique())
cmap = plt.colormaps['cool']
books = load_dataset('FDSRashid/Hadith_info', data_files='Books.csv', token = Secret_token)['train'].to_pandas()
matn_info['Book'] = matn_info['bookid_hadithid'].apply(lambda x: books[books['Book_ID'] == int(x.split('_')[0])]['Book_Name'].to_list()[0])
matn_info['Author'] = matn_info['bookid_hadithid'].apply(lambda x: books[books['Book_ID'] == int(x.split('_')[0])]['Author'].to_list()[0])
matn_info['Hadith Number'] = matn_info['bookid_hadithid'].apply(lambda x: x.split('_')[1])
def value_to_hex(value):
rgba_color = cmap(value)
return "#{:02X}{:02X}{:02X}".format(int(rgba_color[0] * 255), int(rgba_color[1] * 255), int(rgba_color[2] * 255))
#edge_info, matn_info, narrator_bios, isnad_info
def visualize_isnad(taraf_num, yaxis):
taraf_hadith = matn_info[matn_info['taraf_ID'] == taraf_num]['bookid_hadithid'].to_list()
taraf_matns = matn_info[matn_info['taraf_ID'] == taraf_num]['matn'].to_list()
taraf_hadith_split = [i.split('_') for i in taraf_hadith]
taraf_book = matn_info[matn_info['taraf_ID'] == taraf_num]['Book'].to_list()
taraf_author = matn_info[matn_info['taraf_ID'] == taraf_num]['Author'].to_list()
taraf_hadith_number = taraf_book = matn_info[matn_info['taraf_ID'] == taraf_num]['Hadith Number'].to_list()
lst_hadith = []
for i in range(len(taraf_hadith_split)):
isnad_in_hadith1 = isnad_info['Hadiths Cleaned'].apply(lambda x: taraf_hadith_split[i] in x )
isnad_hadith1 = isnad_info[isnad_in_hadith1][['Source', 'Destination']]
G = nx.from_pandas_edgelist(isnad_hadith1, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
node = [int(n) for n, d in G.out_degree() if d == 0][0]
gen_node = narrator_bios[narrator_bios['Rawi ID']==node]['Generation'].iloc[0]
name_node = narrator_bios[narrator_bios['Rawi ID']==node]['Famous Name'].iloc[0]
lst_hadith.append([taraf_matns[i], gen_node, name_node, taraf_book[i], taraf_author[i], taraf_hadith_number[i]])
df = pd.DataFrame(lst_hadith, columns = ['Matn', 'Generation', 'Name'])
hadith_cleaned = isnad_info['Hadiths Cleaned'].apply(lambda x: any(i in x for i in taraf_hadith_split) )
isnad_hadith = isnad_info[hadith_cleaned][['Source', 'Destination']]
narrators = isnad_hadith.applymap(lambda x: narrator_bios[narrator_bios['Rawi ID'] == int(x)]['Famous Name'].to_list()).rename(columns={"Source": "Teacher", "Destination": "Student"})
isnad_hadith["Student"] = narrators['Student']
isnad_hadith["Teacher"] = narrators['Teacher']
filtered = isnad_hadith[(isnad_hadith['Teacher'].apply(lambda x: len(x)) == 1) & (isnad_hadith['Student'].apply(lambda x: len(x)) == 1)]
filtered['Student'] = filtered['Student'].apply(lambda x: x[0])
filtered['Teacher'] = filtered['Teacher'].apply(lambda x: x[0])
net = Network(directed =True)
for _, row in filtered.iterrows():
source = row['Teacher']
target = row['Student']
teacher_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Source'])]
student_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Destination'])]
isnad = isnad_info[(isnad_info['Source'] == row['Source']) & (isnad_info['Destination'] == row['Destination'])]
teacher_narrations = teacher_info['Number of Narrations'].to_list()[0]
student_narrations = student_info['Number of Narrations'].to_list()[0]
if row['Source'] == '99999':
net.add_node(source, font = {'size':50, 'color': 'Black'}, color = '#000000')
else:
net.add_node(source, font = {'size':30, 'color': 'red'}, color = value_to_hex(teacher_narrations), label = f'{source} \n {teacher_info["Narrator Rank"].to_list()[0]}')
net.add_node(target, font = {'size': 30, 'color': 'red'}, color = value_to_hex(student_narrations), label = f'{target} \n{student_info["Narrator Rank"].to_list()[0]}')
net.add_edge(source, target, color = value_to_hex(int(isnad['Hadith Count'].to_list()[0])), label = f"{isnad['Hadith Count'].to_list()[0]}")
net.barnes_hut(gravity=-5000, central_gravity=0.3, spring_length=200)
html = net.generate_html()
html = html.replace("'", "\"")
return f"""<iframe style="width: 100%; height: 600px;margin:0 auto" name="result" allow="midi; geolocation; microphone; camera;
display-capture; encrypted-media;" sandbox="allow-modals allow-forms
allow-scripts allow-same-origin allow-popups
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>""" , df
with gr.Blocks() as demo:
Yaxis = gr.Dropdown(choices = ['Tarafs', 'Hadiths', 'Isnads', 'Books'], value = 'Tarafs', label = 'Variable to Display', info = 'Choose the variable to visualize.')
taraf_number = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1)
btn = gr.Button('Submit')
btn.click(fn = visualize_isnad, inputs = [taraf_number, Yaxis], outputs = [gr.HTML(), gr.DataFrame()])
demo.launch()