Spaces:
Running
Running
added cross encoder
Browse files
app.py
CHANGED
@@ -7,6 +7,7 @@ from datasets import Features
|
|
7 |
from datasets import Value
|
8 |
from datasets import Dataset
|
9 |
from sentence_transformers import SentenceTransformer
|
|
|
10 |
from sklearn.metrics.pairwise import cosine_similarity
|
11 |
import os
|
12 |
import gradio as gr
|
@@ -39,14 +40,20 @@ df = joined_df.copy()
|
|
39 |
|
40 |
|
41 |
model = SentenceTransformer('FDSRashid/QulBERT', token=Secret_token)
|
|
|
42 |
arr = np.array(df['embed'].to_list())
|
43 |
|
44 |
def find_most_similar_matn(text, n):
|
45 |
-
|
|
|
46 |
cos_sim = cosine_similarity(embed_text.reshape(1, -1), arr)
|
47 |
indices = np.argsort(cos_sim)[0][-n:]
|
48 |
matns = df.iloc[indices]
|
49 |
matns['Similarity'] = cos_sim[0][indices]
|
|
|
|
|
|
|
|
|
50 |
return matns[['Book_Name', 'matn', 'taraf_ID', 'Book_ID', 'Hadith Number', 'Author', 'Similarity']]
|
51 |
|
52 |
with gr.Blocks() as demo:
|
|
|
7 |
from datasets import Value
|
8 |
from datasets import Dataset
|
9 |
from sentence_transformers import SentenceTransformer
|
10 |
+
from sentence_transformers.cross_encoder import CrossEncoder
|
11 |
from sklearn.metrics.pairwise import cosine_similarity
|
12 |
import os
|
13 |
import gradio as gr
|
|
|
40 |
|
41 |
|
42 |
model = SentenceTransformer('FDSRashid/QulBERT', token=Secret_token)
|
43 |
+
model_CE = CrossEncoder('FDSRashid/QulBERT-CE-2.0', token=Secret_token)
|
44 |
arr = np.array(df['embed'].to_list())
|
45 |
|
46 |
def find_most_similar_matn(text, n):
|
47 |
+
prep_text = araby.strip_diacritics(text)
|
48 |
+
embed_text = model.encode(prep_text)
|
49 |
cos_sim = cosine_similarity(embed_text.reshape(1, -1), arr)
|
50 |
indices = np.argsort(cos_sim)[0][-n:]
|
51 |
matns = df.iloc[indices]
|
52 |
matns['Similarity'] = cos_sim[0][indices]
|
53 |
+
matns_prep = [araby.strip_diacritics(text) for text in matns['matn']]
|
54 |
+
to_compare = [(i, prep_text) for i in matns_prep]
|
55 |
+
is_taraf = model_CE.predict(to_compare)
|
56 |
+
matns = matns[is_taraf> .5]
|
57 |
return matns[['Book_Name', 'matn', 'taraf_ID', 'Book_ID', 'Hadith Number', 'Author', 'Similarity']]
|
58 |
|
59 |
with gr.Blocks() as demo:
|