supertskone commited on
Commit
8d3e170
·
verified ·
1 Parent(s): 0c62911

Delete app/search_engine.py

Browse files
Files changed (1) hide show
  1. app/search_engine.py +0 -54
app/search_engine.py DELETED
@@ -1,54 +0,0 @@
1
- import numpy as np
2
- from typing import List, Tuple
3
-
4
- from .similarity import cosine_similarity
5
- from .vectorizer import Vectorizer
6
- import logging
7
-
8
- # Configure logging
9
- logging.basicConfig(level=logging.INFO)
10
- logger = logging.getLogger(__name__)
11
-
12
-
13
- class PromptSearchEngine:
14
- def __init__(self):
15
- self.vectorizer = Vectorizer(init_pinecone=False)
16
- self.vectorizer._data_loaded = True
17
- self.prompts = self.vectorizer.prompts
18
- self.corpus_vectors = self.vectorizer.transform(self.prompts)
19
- self.index_name = self.vectorizer.pinecone_index_name
20
-
21
- def most_similar(self, query: str, n: int = 5, use_pinecone=True) -> List[Tuple[float, str]]:
22
- logger.info(f"Encoding query: {query}")
23
- query_vector = self.vectorizer.transform([query])[0]
24
- logger.info(f"Encoded query vector: {query_vector}")
25
- if use_pinecone:
26
- logger.info(f"I'm doing pinecone vector search because the use_pinecone is: {use_pinecone}")
27
- try:
28
- # Convert numpy array to list of native Python floats
29
- query_vector_list = query_vector.tolist()
30
- search_result = self.vectorizer.index.query(
31
- vector=query_vector_list,
32
- top_k=n,
33
- include_metadata=True
34
- )
35
- logger.info(f"Search result: {search_result}")
36
-
37
- # Retrieve and format the results
38
- results = [(match['score'], match['metadata']['text']) for match in search_result['matches'] if
39
- 'text' in match['metadata']]
40
- except Exception as e:
41
- logger.error(f"Pinecone query failed: {e}")
42
- logger.info("Falling back to cosine similarity search.")
43
-
44
- # Fallback to cosine similarity search
45
- similarities = cosine_similarity(query_vector, self.corpus_vectors)
46
- top_n_indices = np.argsort(similarities)[-n:][::-1]
47
- results = [(float(similarities[i]), self.prompts[i]) for i in top_n_indices]
48
- else:
49
- logger.info(f"I'm cosine similarity search because the use_pinecone is: {use_pinecone}")
50
- logger.info("Using cosine similarity for search")
51
- similarities = cosine_similarity(query_vector, self.corpus_vectors)
52
- top_n_indices = np.argsort(similarities)[-n:][::-1]
53
- results = [(float(similarities[i]), self.prompts[i]) for i in top_n_indices]
54
- return results