FDSRashid commited on
Commit
3178215
·
verified ·
1 Parent(s): 334b575

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -0
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pyarabic.araby as araby
2
+ import pandas as pd
3
+ import numpy as np
4
+ import re
5
+ from datasets import load_dataset
6
+ from datasets import Features
7
+ from datasets import Value
8
+ from datasets import Dataset
9
+
10
+
11
+ Secret_token = os.getenv('HF_token')
12
+
13
+ dataset = load_dataset("FDSRashid/embed_matn", token = Secret_token)
14
+ books = load_dataset('FDSRashid/Hadith_info', data_files='Books.csv', token=Secret_token)['train'].to_pandas()
15
+ df = dataset["train"].to_pandas()
16
+
17
+
18
+ dataset = load_dataset("FDSRashid/hadith_info", data_files = 'All_Matns.csv',token = Secret_token, features = features)
19
+ matn_info = dataset['train'].to_pandas()
20
+ matn_info = matn_info.drop(97550)
21
+ matn_info = matn_info.drop(307206)
22
+ matn_info['taraf_ID'] = matn_info['taraf_ID'].replace('KeyAbsent', -1)
23
+ matn_info['taraf_ID'] = matn_info['taraf_ID'].astype(int)
24
+
25
+ matn_info['Book ID'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[0]))
26
+ matn_info['Hadith Number'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[1]))
27
+ matn_info = matn_info.join(books, on='Book ID')
28
+ cols_to_use = df.columns.difference(matn_info.columns)
29
+
30
+ joined_df = matn_info.merge(df[cols_to_use], left_index=True, right_on='__index_level_0__')
31
+ df = joined_df.copy()
32
+
33
+
34
+