Rohil Bansal commited on
Commit
8778311
·
1 Parent(s): 56912a0

search improved

Browse files
course_search/search_system/rag_system.py CHANGED
@@ -1,19 +1,12 @@
1
- from langchain_community.document_loaders import DataFrameLoader
2
- from langchain_community.embeddings import HuggingFaceEmbeddings
3
- from langchain_community.vectorstores import FAISS
4
- from langchain.text_splitter import RecursiveCharacterTextSplitter
5
- from langchain.chains import RetrievalQA
6
- from langchain_community.llms import HuggingFaceHub
7
  import pandas as pd
 
8
  import logging
9
  from typing import List, Dict
10
- import os
11
- from dotenv import load_dotenv
12
  from pathlib import Path
13
- import numpy as np
14
- import faiss
15
- from sentence_transformers import SentenceTransformer
16
 
 
17
  logger = logging.getLogger(__name__)
18
 
19
  class RAGSystem:
@@ -142,4 +135,41 @@ class RAGSystem:
142
 
143
  except Exception as e:
144
  logger.error(f"Error in search_courses: {str(e)}")
145
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ import numpy as np
 
 
 
 
3
  import pandas as pd
4
+ import faiss
5
  import logging
6
  from typing import List, Dict
 
 
7
  from pathlib import Path
 
 
 
8
 
9
+ logging.basicConfig(level=logging.INFO)
10
  logger = logging.getLogger(__name__)
11
 
12
  class RAGSystem:
 
135
 
136
  except Exception as e:
137
  logger.error(f"Error in search_courses: {str(e)}")
138
+ raise
139
+
140
+ def calculate_text_similarity(self, text1: str, text2: str) -> float:
141
+ """
142
+ Calculate text similarity between two strings using word overlap
143
+
144
+ Args:
145
+ text1 (str): First text string
146
+ text2 (str): Second text string
147
+
148
+ Returns:
149
+ float: Similarity score between 0 and 1
150
+ """
151
+ try:
152
+ # Convert to lowercase and split into words
153
+ text1 = str(text1).lower()
154
+ text2 = str(text2).lower()
155
+
156
+ words1 = set(text1.split())
157
+ words2 = set(text2.split())
158
+
159
+ if not words1 or not words2:
160
+ return 0.0
161
+
162
+ # Calculate Jaccard similarity
163
+ intersection = len(words1.intersection(words2))
164
+ union = len(words1.union(words2))
165
+
166
+ if union == 0:
167
+ return 0.0
168
+
169
+ similarity = intersection / union
170
+
171
+ return similarity
172
+
173
+ except Exception as e:
174
+ logger.error(f"Error calculating text similarity: {str(e)}")
175
+ return 0.0