Spaces:
Sleeping
Sleeping
Update get_hadiths.py
Browse files- get_hadiths.py +10 -8
get_hadiths.py
CHANGED
@@ -47,24 +47,26 @@ class HadithSearch:
|
|
47 |
except Exception as e:
|
48 |
pass
|
49 |
|
50 |
-
|
51 |
-
self.data['similarities'] = self.data.embeding.apply(lambda x: self._cosine_similarity(x,
|
52 |
|
53 |
-
results = self.data.sort_values('similarities', ascending=False).head(int(num_hadiths))
|
54 |
try:
|
55 |
results.drop(columns=["id","hadith_id", "embeding"], inplace=True)
|
56 |
except:
|
57 |
pass
|
|
|
|
|
58 |
formatted_results = self._format_results(results.to_dict(orient="records"))
|
59 |
return formatted_results
|
60 |
|
61 |
def _format_results(self, results):
|
62 |
-
for
|
63 |
formatted_output = ""
|
64 |
-
formatted_output += "### Source: " + str(
|
65 |
-
formatted_output += "Similarity with query: " + str(round(
|
66 |
-
formatted_output += "### Hadith content:" + "\n\n" + str(
|
67 |
-
formatted_output += "Arabic version: \n\n" + str(
|
68 |
formatted_output += "\n\n-----------------------------------------------------------------------------------------------------\n\n"
|
69 |
formatted_output = formatted_output.replace("`", "")
|
70 |
return formatted_output
|
|
|
47 |
except Exception as e:
|
48 |
pass
|
49 |
|
50 |
+
user_embedding = self._get_embedding(user_input, model='text-embedding-ada-002')
|
51 |
+
self.data['similarities'] = self.data.embeding.apply(lambda x: self._cosine_similarity(x, user_embedding))
|
52 |
|
53 |
+
results = self.data.sort_values('similarities', ascending=False).head(int(num_hadiths)).copy()
|
54 |
try:
|
55 |
results.drop(columns=["id","hadith_id", "embeding"], inplace=True)
|
56 |
except:
|
57 |
pass
|
58 |
+
print(f"Number of hadiths to display: {num_hadiths}")
|
59 |
+
print(f"Shape of df: {str(results.shape)}")
|
60 |
formatted_results = self._format_results(results.to_dict(orient="records"))
|
61 |
return formatted_results
|
62 |
|
63 |
def _format_results(self, results):
|
64 |
+
for result in results:
|
65 |
formatted_output = ""
|
66 |
+
formatted_output += "### Source: " + str(result["source"]) + " | Chapter name : "+ str(result["chapter"]) +" | Chapter number: " + str(result["chapter_no"]) + " | Hadith number : " + str(result["chapter_no"]) + "\n\n"
|
67 |
+
formatted_output += "Similarity with query: " + str(round(result["similarities"]*100,2)) + "%" +" | Chain index: " + str(result["chain_indx"]) + "\n\n"
|
68 |
+
formatted_output += "### Hadith content:" + "\n\n" + str(result["text_en"]) + "\n\n"
|
69 |
+
formatted_output += "Arabic version: \n\n" + str(result["text_ar"])
|
70 |
formatted_output += "\n\n-----------------------------------------------------------------------------------------------------\n\n"
|
71 |
formatted_output = formatted_output.replace("`", "")
|
72 |
return formatted_output
|