Adr740 commited on
Commit
b3f461b
·
1 Parent(s): c56334c

Update get_hadiths.py

Browse files
Files changed (1) hide show
  1. get_hadiths.py +10 -8
get_hadiths.py CHANGED
@@ -47,24 +47,26 @@ class HadithSearch:
47
  except Exception as e:
48
  pass
49
 
50
- embedding = self._get_embedding(user_input, model='text-embedding-ada-002')
51
- self.data['similarities'] = self.data.embeding.apply(lambda x: self._cosine_similarity(x, embedding))
52
 
53
- results = self.data.sort_values('similarities', ascending=False).head(int(num_hadiths))
54
  try:
55
  results.drop(columns=["id","hadith_id", "embeding"], inplace=True)
56
  except:
57
  pass
 
 
58
  formatted_results = self._format_results(results.to_dict(orient="records"))
59
  return formatted_results
60
 
61
  def _format_results(self, results):
62
- for r in results:
63
  formatted_output = ""
64
- formatted_output += "### Source: " + str(r["source"]) + " | Chapter name : "+ str(r["chapter"]) +" | Chapter number: " + str(r["chapter_no"]) + " | Hadith number : " + str(r["chapter_no"]) + "\n\n"
65
- formatted_output += "Similarity with query: " + str(round(r["similarities"]*100,2)) + "%" +" | Chain index: " + str(r["chain_indx"]) + "\n\n"
66
- formatted_output += "### Hadith content:" + "\n\n" + str(r["text_en"]) + "\n\n"
67
- formatted_output += "Arabic version: \n\n" + str(r["text_ar"])
68
  formatted_output += "\n\n-----------------------------------------------------------------------------------------------------\n\n"
69
  formatted_output = formatted_output.replace("`", "")
70
  return formatted_output
 
47
  except Exception as e:
48
  pass
49
 
50
+ user_embedding = self._get_embedding(user_input, model='text-embedding-ada-002')
51
+ self.data['similarities'] = self.data.embeding.apply(lambda x: self._cosine_similarity(x, user_embedding))
52
 
53
+ results = self.data.sort_values('similarities', ascending=False).head(int(num_hadiths)).copy()
54
  try:
55
  results.drop(columns=["id","hadith_id", "embeding"], inplace=True)
56
  except:
57
  pass
58
+ print(f"Number of hadiths to display: {num_hadiths}")
59
+ print(f"Shape of df: {str(results.shape)}")
60
  formatted_results = self._format_results(results.to_dict(orient="records"))
61
  return formatted_results
62
 
63
  def _format_results(self, results):
64
+ for result in results:
65
  formatted_output = ""
66
+ formatted_output += "### Source: " + str(result["source"]) + " | Chapter name : "+ str(result["chapter"]) +" | Chapter number: " + str(result["chapter_no"]) + " | Hadith number : " + str(result["chapter_no"]) + "\n\n"
67
+ formatted_output += "Similarity with query: " + str(round(result["similarities"]*100,2)) + "%" +" | Chain index: " + str(result["chain_indx"]) + "\n\n"
68
+ formatted_output += "### Hadith content:" + "\n\n" + str(result["text_en"]) + "\n\n"
69
+ formatted_output += "Arabic version: \n\n" + str(result["text_ar"])
70
  formatted_output += "\n\n-----------------------------------------------------------------------------------------------------\n\n"
71
  formatted_output = formatted_output.replace("`", "")
72
  return formatted_output