Batnini commited on
Commit
15bf53a
·
verified ·
1 Parent(s): 65b1fd2

Create quran_search.py

Browse files
Files changed (1) hide show
  1. tools/quran_search.py +40 -0
tools/quran_search.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sentence_transformers import SentenceTransformer
3
+ from sklearn.metrics.pairwise import cosine_similarity
4
+ import numpy as np
5
+
6
+ class QuranSearchEngine:
7
+ def __init__(self):
8
+ self.data_loaded = False
9
+
10
+ def load_data(self):
11
+ """Lazy load data and model"""
12
+ if not self.data_loaded:
13
+ # Load Quran data
14
+ self.quran_df = pd.read_csv("https://raw.githubusercontent.com/mafahim/quran-json/main/quran_clean.csv")
15
+
16
+ # Load model
17
+ self.model = SentenceTransformer(
18
+ 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2'
19
+ )
20
+
21
+ # Pre-compute embeddings
22
+ self.verse_embeddings = self.model.encode(self.quran_df['text'].tolist())
23
+ self.data_loaded = True
24
+
25
+ def search(self, query, top_k=5):
26
+ self.load_data()
27
+ query_embedding = self.model.encode([query])
28
+ similarities = cosine_similarity(query_embedding, self.verse_embeddings)[0]
29
+ top_indices = np.argsort(similarities)[-top_k:][::-1]
30
+
31
+ results = []
32
+ for idx in top_indices:
33
+ verse = self.quran_df.iloc[idx]
34
+ results.append({
35
+ "surah": verse['surah'],
36
+ "ayah": verse['ayah'],
37
+ "text": verse['text'],
38
+ "similarity": f"{similarities[idx]:.2f}"
39
+ })
40
+ return results