Spaces:

HikmaLabs
/

Isnad_Taraf_Visualizer

Sleeping

App Files Files Community

FDSRashid commited on Jul 11, 2024

Commit

a116d30

verified ·

1 Parent(s): 6cfed4b

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -12

app.py CHANGED Viewed

@@ -13,6 +13,8 @@ import matplotlib.pyplot as plt
 import re
 pattern = r'"(.*?)"'
 Secret_token = os.getenv('HF_token')
 dataset = load_dataset('FDSRashid/hadith_info',data_files = 'Basic_Edge_Information.csv', token = Secret_token, split = 'train')
@@ -25,11 +27,11 @@ narrator_bios = narrator_bios['train'].to_pandas()
 narrator_bios.loc[49845, 'Narrator Rank'] = 'رسول الله'
 narrator_bios.loc[49845, 'Number of Narrations'] = 0
 narrator_bios['Number of Narrations'] = narrator_bios['Number of Narrations'].astype(int)
-narrator_bios.loc[49845, 'Number of Narrations'] = 443471
 narrator_bios['Generation'] = narrator_bios['Generation'].replace([None], [-1])
 narrator_bios['Generation'] = narrator_bios['Generation'].astype(int)
 features = Features({'matn': Value('string'), 'taraf_ID': Value('string'), 'bookid_hadithid': Value('string')})
 dataset = load_dataset("FDSRashid/hadith_info", data_files = 'All_Matns.csv',token = Secret_token, features = features)
@@ -38,20 +40,14 @@ matn_info = matn_info.drop(97550)
 matn_info = matn_info.drop(307206)
 matn_info['taraf_ID'] = matn_info['taraf_ID'].replace('KeyAbsent', -1)
 matn_info['taraf_ID'] = matn_info['taraf_ID'].astype(int)
-# matn_info = matn_info.sort_values('taraf_ID')
-# tarafs = matn_info['taraf_ID'].unique()
-# for i, taraf in enumerate(tarafs):
-#     matn_info.loc[matn_info['taraf_ID'] == taraf, 'taraf_ID_New'] = i + 1 # Replace 'a' with 'e' in column 'C' where the condition is met
-# matn_info['taraf_ID_New'] = matn_info['taraf_ID_New'].astype(int)
 isnad_info = load_dataset('FDSRashid/hadith_info',token = Secret_token, data_files = 'isnad_info.csv', split = 'train').to_pandas()
 isnad_info['Hadiths Cleaned'] = isnad_info['Hadiths'].apply(lambda x: [re.findall(pattern, string)[0].split("_") for string in x[1:-1].split(',')])
 taraf_max = np.max(matn_info['taraf_ID'].unique())
 cmap = plt.colormaps['cool']
@@ -76,6 +72,8 @@ def visualize_isnad(taraf_num, yaxis):
     taraf_hadith_number = matn_info[matn_info['taraf_ID'] == taraf_num]['Hadith Number'].to_list()
     lst_hadith = []
     for i in range(len(taraf_hadith_split)):
         isnad_in_hadith1 = isnad_info['Hadiths Cleaned'].apply(lambda x: taraf_hadith_split[i] in x )
         isnad_hadith1 = isnad_info[isnad_in_hadith1][['Source', 'Destination']]
         G = nx.from_pandas_edgelist(isnad_hadith1, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
@@ -85,7 +83,8 @@ def visualize_isnad(taraf_num, yaxis):
         lst_hadith.append([taraf_matns[i], gen_node, name_node, taraf_book[i], taraf_author[i], taraf_hadith_number[i]])
     df = pd.DataFrame(lst_hadith, columns = ['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Hadith Number'])
-    hadith_cleaned = isnad_info['Hadiths Cleaned'].apply(lambda x: any(i in x for i in taraf_hadith_split) )
     isnad_hadith = isnad_info[hadith_cleaned][['Source', 'Destination']]
     narrators = isnad_hadith.applymap(lambda x: narrator_bios[narrator_bios['Rawi ID'] == int(x)]['Famous Name'].to_list()).rename(columns={"Source": "Teacher", "Destination": "Student"})
     isnad_hadith["Student"] = narrators['Student']
@@ -121,6 +120,10 @@ def taraf_booknum(taraf_num):
     taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
     return taraf[['matn', 'Book ID', 'Hadith Number']]
 with gr.Blocks() as demo:
     with gr.Tab("Whole Taraf Visualizer"):

 import re
 pattern = r'"(.*?)"'
+# this pattern captures anything in a double quotes.
 Secret_token = os.getenv('HF_token')
 dataset = load_dataset('FDSRashid/hadith_info',data_files = 'Basic_Edge_Information.csv', token = Secret_token, split = 'train')
 narrator_bios.loc[49845, 'Narrator Rank'] = 'رسول الله'
 narrator_bios.loc[49845, 'Number of Narrations'] = 0
 narrator_bios['Number of Narrations'] = narrator_bios['Number of Narrations'].astype(int)
+narrator_bios.loc[49845, 'Number of Narrations'] = 327512
+# 8125 Narrators have no Generation, listed in dataset as None
 narrator_bios['Generation'] = narrator_bios['Generation'].replace([None], [-1])
 narrator_bios['Generation'] = narrator_bios['Generation'].astype(int)
 features = Features({'matn': Value('string'), 'taraf_ID': Value('string'), 'bookid_hadithid': Value('string')})
 dataset = load_dataset("FDSRashid/hadith_info", data_files = 'All_Matns.csv',token = Secret_token, features = features)
 matn_info = matn_info.drop(307206)
 matn_info['taraf_ID'] = matn_info['taraf_ID'].replace('KeyAbsent', -1)
 matn_info['taraf_ID'] = matn_info['taraf_ID'].astype(int)
+# Isnad Info Hadiths column is structured like {"BookNum_HadithNum", ...} for each edge
 isnad_info = load_dataset('FDSRashid/hadith_info',token = Secret_token, data_files = 'isnad_info.csv', split = 'train').to_pandas()
 isnad_info['Hadiths Cleaned'] = isnad_info['Hadiths'].apply(lambda x: [re.findall(pattern, string)[0].split("_") for string in x[1:-1].split(',')])
+# Hadiths Cleaned is a list of lists, each sub-list is Book Id, Hadith ID
 taraf_max = np.max(matn_info['taraf_ID'].unique())
+isnad_info['Tarafs Cleaned'] = isnad_info['Tarafs'].apply(lambda x: np.array([int(i.strip(' ')) for i in x[1:-1].split(',')]))
 cmap = plt.colormaps['cool']
     taraf_hadith_number = matn_info[matn_info['taraf_ID'] == taraf_num]['Hadith Number'].to_list()
     lst_hadith = []
     for i in range(len(taraf_hadith_split)):
+        # This checks each hadith in the Taraf, is that book id hadith id found in each of the edges of isnad_info
+        #This loop get the end transmitter of each Hadith in the Taraf
         isnad_in_hadith1 = isnad_info['Hadiths Cleaned'].apply(lambda x: taraf_hadith_split[i] in x )
         isnad_hadith1 = isnad_info[isnad_in_hadith1][['Source', 'Destination']]
         G = nx.from_pandas_edgelist(isnad_hadith1, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
         lst_hadith.append([taraf_matns[i], gen_node, name_node, taraf_book[i], taraf_author[i], taraf_hadith_number[i]])
     df = pd.DataFrame(lst_hadith, columns = ['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Hadith Number'])
+    #hadith_cleaned = isnad_info['Hadiths Cleaned'].apply(lambda x: any(i in x for i in taraf_hadith_split) )
+    hadith_cleaned = isnad_info['Tarafs Cleaned'].apply(lambda x: taraf_num in x)
     isnad_hadith = isnad_info[hadith_cleaned][['Source', 'Destination']]
     narrators = isnad_hadith.applymap(lambda x: narrator_bios[narrator_bios['Rawi ID'] == int(x)]['Famous Name'].to_list()).rename(columns={"Source": "Teacher", "Destination": "Student"})
     isnad_hadith["Student"] = narrators['Student']
     taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
     return taraf[['matn', 'Book ID', 'Hadith Number']]
+def visualize_subTaraf(df):
+    df['bookid_hadithid']  = df.apply(lambda x : )
 with gr.Blocks() as demo:
     with gr.Tab("Whole Taraf Visualizer"):