FDSRashid commited on
Commit
a116d30
·
verified ·
1 Parent(s): 6cfed4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -12
app.py CHANGED
@@ -13,6 +13,8 @@ import matplotlib.pyplot as plt
13
  import re
14
 
15
  pattern = r'"(.*?)"'
 
 
16
  Secret_token = os.getenv('HF_token')
17
 
18
  dataset = load_dataset('FDSRashid/hadith_info',data_files = 'Basic_Edge_Information.csv', token = Secret_token, split = 'train')
@@ -25,11 +27,11 @@ narrator_bios = narrator_bios['train'].to_pandas()
25
  narrator_bios.loc[49845, 'Narrator Rank'] = 'رسول الله'
26
  narrator_bios.loc[49845, 'Number of Narrations'] = 0
27
  narrator_bios['Number of Narrations'] = narrator_bios['Number of Narrations'].astype(int)
28
- narrator_bios.loc[49845, 'Number of Narrations'] = 443471
 
29
  narrator_bios['Generation'] = narrator_bios['Generation'].replace([None], [-1])
30
  narrator_bios['Generation'] = narrator_bios['Generation'].astype(int)
31
 
32
-
33
  features = Features({'matn': Value('string'), 'taraf_ID': Value('string'), 'bookid_hadithid': Value('string')})
34
 
35
  dataset = load_dataset("FDSRashid/hadith_info", data_files = 'All_Matns.csv',token = Secret_token, features = features)
@@ -38,20 +40,14 @@ matn_info = matn_info.drop(97550)
38
  matn_info = matn_info.drop(307206)
39
  matn_info['taraf_ID'] = matn_info['taraf_ID'].replace('KeyAbsent', -1)
40
 
41
-
42
  matn_info['taraf_ID'] = matn_info['taraf_ID'].astype(int)
43
- # matn_info = matn_info.sort_values('taraf_ID')
44
- # tarafs = matn_info['taraf_ID'].unique()
45
- # for i, taraf in enumerate(tarafs):
46
- # matn_info.loc[matn_info['taraf_ID'] == taraf, 'taraf_ID_New'] = i + 1 # Replace 'a' with 'e' in column 'C' where the condition is met
47
- # matn_info['taraf_ID_New'] = matn_info['taraf_ID_New'].astype(int)
48
-
49
-
50
 
 
51
  isnad_info = load_dataset('FDSRashid/hadith_info',token = Secret_token, data_files = 'isnad_info.csv', split = 'train').to_pandas()
52
  isnad_info['Hadiths Cleaned'] = isnad_info['Hadiths'].apply(lambda x: [re.findall(pattern, string)[0].split("_") for string in x[1:-1].split(',')])
53
-
54
  taraf_max = np.max(matn_info['taraf_ID'].unique())
 
55
 
56
  cmap = plt.colormaps['cool']
57
 
@@ -76,6 +72,8 @@ def visualize_isnad(taraf_num, yaxis):
76
  taraf_hadith_number = matn_info[matn_info['taraf_ID'] == taraf_num]['Hadith Number'].to_list()
77
  lst_hadith = []
78
  for i in range(len(taraf_hadith_split)):
 
 
79
  isnad_in_hadith1 = isnad_info['Hadiths Cleaned'].apply(lambda x: taraf_hadith_split[i] in x )
80
  isnad_hadith1 = isnad_info[isnad_in_hadith1][['Source', 'Destination']]
81
  G = nx.from_pandas_edgelist(isnad_hadith1, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
@@ -85,7 +83,8 @@ def visualize_isnad(taraf_num, yaxis):
85
  lst_hadith.append([taraf_matns[i], gen_node, name_node, taraf_book[i], taraf_author[i], taraf_hadith_number[i]])
86
  df = pd.DataFrame(lst_hadith, columns = ['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Hadith Number'])
87
 
88
- hadith_cleaned = isnad_info['Hadiths Cleaned'].apply(lambda x: any(i in x for i in taraf_hadith_split) )
 
89
  isnad_hadith = isnad_info[hadith_cleaned][['Source', 'Destination']]
90
  narrators = isnad_hadith.applymap(lambda x: narrator_bios[narrator_bios['Rawi ID'] == int(x)]['Famous Name'].to_list()).rename(columns={"Source": "Teacher", "Destination": "Student"})
91
  isnad_hadith["Student"] = narrators['Student']
@@ -121,6 +120,10 @@ def taraf_booknum(taraf_num):
121
  taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
122
  return taraf[['matn', 'Book ID', 'Hadith Number']]
123
 
 
 
 
 
124
 
125
  with gr.Blocks() as demo:
126
  with gr.Tab("Whole Taraf Visualizer"):
 
13
  import re
14
 
15
  pattern = r'"(.*?)"'
16
+ # this pattern captures anything in a double quotes.
17
+
18
  Secret_token = os.getenv('HF_token')
19
 
20
  dataset = load_dataset('FDSRashid/hadith_info',data_files = 'Basic_Edge_Information.csv', token = Secret_token, split = 'train')
 
27
  narrator_bios.loc[49845, 'Narrator Rank'] = 'رسول الله'
28
  narrator_bios.loc[49845, 'Number of Narrations'] = 0
29
  narrator_bios['Number of Narrations'] = narrator_bios['Number of Narrations'].astype(int)
30
+ narrator_bios.loc[49845, 'Number of Narrations'] = 327512
31
+ # 8125 Narrators have no Generation, listed in dataset as None
32
  narrator_bios['Generation'] = narrator_bios['Generation'].replace([None], [-1])
33
  narrator_bios['Generation'] = narrator_bios['Generation'].astype(int)
34
 
 
35
  features = Features({'matn': Value('string'), 'taraf_ID': Value('string'), 'bookid_hadithid': Value('string')})
36
 
37
  dataset = load_dataset("FDSRashid/hadith_info", data_files = 'All_Matns.csv',token = Secret_token, features = features)
 
40
  matn_info = matn_info.drop(307206)
41
  matn_info['taraf_ID'] = matn_info['taraf_ID'].replace('KeyAbsent', -1)
42
 
 
43
  matn_info['taraf_ID'] = matn_info['taraf_ID'].astype(int)
 
 
 
 
 
 
 
44
 
45
+ # Isnad Info Hadiths column is structured like {"BookNum_HadithNum", ...} for each edge
46
  isnad_info = load_dataset('FDSRashid/hadith_info',token = Secret_token, data_files = 'isnad_info.csv', split = 'train').to_pandas()
47
  isnad_info['Hadiths Cleaned'] = isnad_info['Hadiths'].apply(lambda x: [re.findall(pattern, string)[0].split("_") for string in x[1:-1].split(',')])
48
+ # Hadiths Cleaned is a list of lists, each sub-list is Book Id, Hadith ID
49
  taraf_max = np.max(matn_info['taraf_ID'].unique())
50
+ isnad_info['Tarafs Cleaned'] = isnad_info['Tarafs'].apply(lambda x: np.array([int(i.strip(' ')) for i in x[1:-1].split(',')]))
51
 
52
  cmap = plt.colormaps['cool']
53
 
 
72
  taraf_hadith_number = matn_info[matn_info['taraf_ID'] == taraf_num]['Hadith Number'].to_list()
73
  lst_hadith = []
74
  for i in range(len(taraf_hadith_split)):
75
+ # This checks each hadith in the Taraf, is that book id hadith id found in each of the edges of isnad_info
76
+ #This loop get the end transmitter of each Hadith in the Taraf
77
  isnad_in_hadith1 = isnad_info['Hadiths Cleaned'].apply(lambda x: taraf_hadith_split[i] in x )
78
  isnad_hadith1 = isnad_info[isnad_in_hadith1][['Source', 'Destination']]
79
  G = nx.from_pandas_edgelist(isnad_hadith1, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
 
83
  lst_hadith.append([taraf_matns[i], gen_node, name_node, taraf_book[i], taraf_author[i], taraf_hadith_number[i]])
84
  df = pd.DataFrame(lst_hadith, columns = ['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Hadith Number'])
85
 
86
+ #hadith_cleaned = isnad_info['Hadiths Cleaned'].apply(lambda x: any(i in x for i in taraf_hadith_split) )
87
+ hadith_cleaned = isnad_info['Tarafs Cleaned'].apply(lambda x: taraf_num in x)
88
  isnad_hadith = isnad_info[hadith_cleaned][['Source', 'Destination']]
89
  narrators = isnad_hadith.applymap(lambda x: narrator_bios[narrator_bios['Rawi ID'] == int(x)]['Famous Name'].to_list()).rename(columns={"Source": "Teacher", "Destination": "Student"})
90
  isnad_hadith["Student"] = narrators['Student']
 
120
  taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
121
  return taraf[['matn', 'Book ID', 'Hadith Number']]
122
 
123
+ def visualize_subTaraf(df):
124
+ df['bookid_hadithid'] = df.apply(lambda x : )
125
+
126
+
127
 
128
  with gr.Blocks() as demo:
129
  with gr.Tab("Whole Taraf Visualizer"):