Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pyarabic.araby as araby
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import re
|
5 |
+
from datasets import load_dataset
|
6 |
+
from datasets import Features
|
7 |
+
from datasets import Value
|
8 |
+
from datasets import Dataset
|
9 |
+
|
10 |
+
|
11 |
+
Secret_token = os.getenv('HF_token')
|
12 |
+
|
13 |
+
dataset = load_dataset("FDSRashid/embed_matn", token = Secret_token)
|
14 |
+
books = load_dataset('FDSRashid/Hadith_info', data_files='Books.csv', token=Secret_token)['train'].to_pandas()
|
15 |
+
df = dataset["train"].to_pandas()
|
16 |
+
|
17 |
+
|
18 |
+
dataset = load_dataset("FDSRashid/hadith_info", data_files = 'All_Matns.csv',token = Secret_token, features = features)
|
19 |
+
matn_info = dataset['train'].to_pandas()
|
20 |
+
matn_info = matn_info.drop(97550)
|
21 |
+
matn_info = matn_info.drop(307206)
|
22 |
+
matn_info['taraf_ID'] = matn_info['taraf_ID'].replace('KeyAbsent', -1)
|
23 |
+
matn_info['taraf_ID'] = matn_info['taraf_ID'].astype(int)
|
24 |
+
|
25 |
+
matn_info['Book ID'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[0]))
|
26 |
+
matn_info['Hadith Number'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[1]))
|
27 |
+
matn_info = matn_info.join(books, on='Book ID')
|
28 |
+
cols_to_use = df.columns.difference(matn_info.columns)
|
29 |
+
|
30 |
+
joined_df = matn_info.merge(df[cols_to_use], left_index=True, right_on='__index_level_0__')
|
31 |
+
df = joined_df.copy()
|
32 |
+
|
33 |
+
|
34 |
+
|