Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -13,6 +13,8 @@ import matplotlib.pyplot as plt
|
|
13 |
import re
|
14 |
|
15 |
pattern = r'"(.*?)"'
|
|
|
|
|
16 |
Secret_token = os.getenv('HF_token')
|
17 |
|
18 |
dataset = load_dataset('FDSRashid/hadith_info',data_files = 'Basic_Edge_Information.csv', token = Secret_token, split = 'train')
|
@@ -25,11 +27,11 @@ narrator_bios = narrator_bios['train'].to_pandas()
|
|
25 |
narrator_bios.loc[49845, 'Narrator Rank'] = 'رسول الله'
|
26 |
narrator_bios.loc[49845, 'Number of Narrations'] = 0
|
27 |
narrator_bios['Number of Narrations'] = narrator_bios['Number of Narrations'].astype(int)
|
28 |
-
narrator_bios.loc[49845, 'Number of Narrations'] =
|
|
|
29 |
narrator_bios['Generation'] = narrator_bios['Generation'].replace([None], [-1])
|
30 |
narrator_bios['Generation'] = narrator_bios['Generation'].astype(int)
|
31 |
|
32 |
-
|
33 |
features = Features({'matn': Value('string'), 'taraf_ID': Value('string'), 'bookid_hadithid': Value('string')})
|
34 |
|
35 |
dataset = load_dataset("FDSRashid/hadith_info", data_files = 'All_Matns.csv',token = Secret_token, features = features)
|
@@ -38,20 +40,14 @@ matn_info = matn_info.drop(97550)
|
|
38 |
matn_info = matn_info.drop(307206)
|
39 |
matn_info['taraf_ID'] = matn_info['taraf_ID'].replace('KeyAbsent', -1)
|
40 |
|
41 |
-
|
42 |
matn_info['taraf_ID'] = matn_info['taraf_ID'].astype(int)
|
43 |
-
# matn_info = matn_info.sort_values('taraf_ID')
|
44 |
-
# tarafs = matn_info['taraf_ID'].unique()
|
45 |
-
# for i, taraf in enumerate(tarafs):
|
46 |
-
# matn_info.loc[matn_info['taraf_ID'] == taraf, 'taraf_ID_New'] = i + 1 # Replace 'a' with 'e' in column 'C' where the condition is met
|
47 |
-
# matn_info['taraf_ID_New'] = matn_info['taraf_ID_New'].astype(int)
|
48 |
-
|
49 |
-
|
50 |
|
|
|
51 |
isnad_info = load_dataset('FDSRashid/hadith_info',token = Secret_token, data_files = 'isnad_info.csv', split = 'train').to_pandas()
|
52 |
isnad_info['Hadiths Cleaned'] = isnad_info['Hadiths'].apply(lambda x: [re.findall(pattern, string)[0].split("_") for string in x[1:-1].split(',')])
|
53 |
-
|
54 |
taraf_max = np.max(matn_info['taraf_ID'].unique())
|
|
|
55 |
|
56 |
cmap = plt.colormaps['cool']
|
57 |
|
@@ -76,6 +72,8 @@ def visualize_isnad(taraf_num, yaxis):
|
|
76 |
taraf_hadith_number = matn_info[matn_info['taraf_ID'] == taraf_num]['Hadith Number'].to_list()
|
77 |
lst_hadith = []
|
78 |
for i in range(len(taraf_hadith_split)):
|
|
|
|
|
79 |
isnad_in_hadith1 = isnad_info['Hadiths Cleaned'].apply(lambda x: taraf_hadith_split[i] in x )
|
80 |
isnad_hadith1 = isnad_info[isnad_in_hadith1][['Source', 'Destination']]
|
81 |
G = nx.from_pandas_edgelist(isnad_hadith1, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
|
@@ -85,7 +83,8 @@ def visualize_isnad(taraf_num, yaxis):
|
|
85 |
lst_hadith.append([taraf_matns[i], gen_node, name_node, taraf_book[i], taraf_author[i], taraf_hadith_number[i]])
|
86 |
df = pd.DataFrame(lst_hadith, columns = ['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Hadith Number'])
|
87 |
|
88 |
-
hadith_cleaned = isnad_info['Hadiths Cleaned'].apply(lambda x: any(i in x for i in taraf_hadith_split) )
|
|
|
89 |
isnad_hadith = isnad_info[hadith_cleaned][['Source', 'Destination']]
|
90 |
narrators = isnad_hadith.applymap(lambda x: narrator_bios[narrator_bios['Rawi ID'] == int(x)]['Famous Name'].to_list()).rename(columns={"Source": "Teacher", "Destination": "Student"})
|
91 |
isnad_hadith["Student"] = narrators['Student']
|
@@ -121,6 +120,10 @@ def taraf_booknum(taraf_num):
|
|
121 |
taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
|
122 |
return taraf[['matn', 'Book ID', 'Hadith Number']]
|
123 |
|
|
|
|
|
|
|
|
|
124 |
|
125 |
with gr.Blocks() as demo:
|
126 |
with gr.Tab("Whole Taraf Visualizer"):
|
|
|
13 |
import re
|
14 |
|
15 |
pattern = r'"(.*?)"'
|
16 |
+
# this pattern captures anything in a double quotes.
|
17 |
+
|
18 |
Secret_token = os.getenv('HF_token')
|
19 |
|
20 |
dataset = load_dataset('FDSRashid/hadith_info',data_files = 'Basic_Edge_Information.csv', token = Secret_token, split = 'train')
|
|
|
27 |
narrator_bios.loc[49845, 'Narrator Rank'] = 'رسول الله'
|
28 |
narrator_bios.loc[49845, 'Number of Narrations'] = 0
|
29 |
narrator_bios['Number of Narrations'] = narrator_bios['Number of Narrations'].astype(int)
|
30 |
+
narrator_bios.loc[49845, 'Number of Narrations'] = 327512
|
31 |
+
# 8125 Narrators have no Generation, listed in dataset as None
|
32 |
narrator_bios['Generation'] = narrator_bios['Generation'].replace([None], [-1])
|
33 |
narrator_bios['Generation'] = narrator_bios['Generation'].astype(int)
|
34 |
|
|
|
35 |
features = Features({'matn': Value('string'), 'taraf_ID': Value('string'), 'bookid_hadithid': Value('string')})
|
36 |
|
37 |
dataset = load_dataset("FDSRashid/hadith_info", data_files = 'All_Matns.csv',token = Secret_token, features = features)
|
|
|
40 |
matn_info = matn_info.drop(307206)
|
41 |
matn_info['taraf_ID'] = matn_info['taraf_ID'].replace('KeyAbsent', -1)
|
42 |
|
|
|
43 |
matn_info['taraf_ID'] = matn_info['taraf_ID'].astype(int)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
+
# Isnad Info Hadiths column is structured like {"BookNum_HadithNum", ...} for each edge
|
46 |
isnad_info = load_dataset('FDSRashid/hadith_info',token = Secret_token, data_files = 'isnad_info.csv', split = 'train').to_pandas()
|
47 |
isnad_info['Hadiths Cleaned'] = isnad_info['Hadiths'].apply(lambda x: [re.findall(pattern, string)[0].split("_") for string in x[1:-1].split(',')])
|
48 |
+
# Hadiths Cleaned is a list of lists, each sub-list is Book Id, Hadith ID
|
49 |
taraf_max = np.max(matn_info['taraf_ID'].unique())
|
50 |
+
isnad_info['Tarafs Cleaned'] = isnad_info['Tarafs'].apply(lambda x: np.array([int(i.strip(' ')) for i in x[1:-1].split(',')]))
|
51 |
|
52 |
cmap = plt.colormaps['cool']
|
53 |
|
|
|
72 |
taraf_hadith_number = matn_info[matn_info['taraf_ID'] == taraf_num]['Hadith Number'].to_list()
|
73 |
lst_hadith = []
|
74 |
for i in range(len(taraf_hadith_split)):
|
75 |
+
# This checks each hadith in the Taraf, is that book id hadith id found in each of the edges of isnad_info
|
76 |
+
#This loop get the end transmitter of each Hadith in the Taraf
|
77 |
isnad_in_hadith1 = isnad_info['Hadiths Cleaned'].apply(lambda x: taraf_hadith_split[i] in x )
|
78 |
isnad_hadith1 = isnad_info[isnad_in_hadith1][['Source', 'Destination']]
|
79 |
G = nx.from_pandas_edgelist(isnad_hadith1, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
|
|
|
83 |
lst_hadith.append([taraf_matns[i], gen_node, name_node, taraf_book[i], taraf_author[i], taraf_hadith_number[i]])
|
84 |
df = pd.DataFrame(lst_hadith, columns = ['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Hadith Number'])
|
85 |
|
86 |
+
#hadith_cleaned = isnad_info['Hadiths Cleaned'].apply(lambda x: any(i in x for i in taraf_hadith_split) )
|
87 |
+
hadith_cleaned = isnad_info['Tarafs Cleaned'].apply(lambda x: taraf_num in x)
|
88 |
isnad_hadith = isnad_info[hadith_cleaned][['Source', 'Destination']]
|
89 |
narrators = isnad_hadith.applymap(lambda x: narrator_bios[narrator_bios['Rawi ID'] == int(x)]['Famous Name'].to_list()).rename(columns={"Source": "Teacher", "Destination": "Student"})
|
90 |
isnad_hadith["Student"] = narrators['Student']
|
|
|
120 |
taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
|
121 |
return taraf[['matn', 'Book ID', 'Hadith Number']]
|
122 |
|
123 |
+
def visualize_subTaraf(df):
|
124 |
+
df['bookid_hadithid'] = df.apply(lambda x : )
|
125 |
+
|
126 |
+
|
127 |
|
128 |
with gr.Blocks() as demo:
|
129 |
with gr.Tab("Whole Taraf Visualizer"):
|