Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -6,7 +6,9 @@ from datasets import load_dataset
|
|
6 |
from datasets import Features
|
7 |
from datasets import Value
|
8 |
from datasets import Dataset
|
9 |
-
|
|
|
|
|
10 |
|
11 |
Secret_token = os.getenv('HF_token')
|
12 |
|
@@ -30,5 +32,10 @@ cols_to_use = df.columns.difference(matn_info.columns)
|
|
30 |
joined_df = matn_info.merge(df[cols_to_use], left_index=True, right_on='__index_level_0__')
|
31 |
df = joined_df.copy()
|
32 |
|
|
|
|
|
33 |
|
|
|
|
|
|
|
34 |
|
|
|
6 |
from datasets import Features
|
7 |
from datasets import Value
|
8 |
from datasets import Dataset
|
9 |
+
from sentence_transformers import SentenceTransformer
|
10 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
11 |
+
import os
|
12 |
|
13 |
Secret_token = os.getenv('HF_token')
|
14 |
|
|
|
32 |
joined_df = matn_info.merge(df[cols_to_use], left_index=True, right_on='__index_level_0__')
|
33 |
df = joined_df.copy()
|
34 |
|
35 |
+
model = SentenceTransformer('FDSRashid/QulBERT', token=Secret_token)
|
36 |
+
|
37 |
|
38 |
+
def find_most_similar_matn(text, n):
|
39 |
+
embed_text = model.encode(araby.strip_diacritics(text))
|
40 |
+
|
41 |
|