Spaces:
Runtime error
Runtime error
Rohil Bansal
commited on
Commit
·
8778311
1
Parent(s):
56912a0
search improved
Browse files
course_search/search_system/rag_system.py
CHANGED
@@ -1,19 +1,12 @@
|
|
1 |
-
from
|
2 |
-
|
3 |
-
from langchain_community.vectorstores import FAISS
|
4 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
5 |
-
from langchain.chains import RetrievalQA
|
6 |
-
from langchain_community.llms import HuggingFaceHub
|
7 |
import pandas as pd
|
|
|
8 |
import logging
|
9 |
from typing import List, Dict
|
10 |
-
import os
|
11 |
-
from dotenv import load_dotenv
|
12 |
from pathlib import Path
|
13 |
-
import numpy as np
|
14 |
-
import faiss
|
15 |
-
from sentence_transformers import SentenceTransformer
|
16 |
|
|
|
17 |
logger = logging.getLogger(__name__)
|
18 |
|
19 |
class RAGSystem:
|
@@ -142,4 +135,41 @@ class RAGSystem:
|
|
142 |
|
143 |
except Exception as e:
|
144 |
logger.error(f"Error in search_courses: {str(e)}")
|
145 |
-
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sentence_transformers import SentenceTransformer
|
2 |
+
import numpy as np
|
|
|
|
|
|
|
|
|
3 |
import pandas as pd
|
4 |
+
import faiss
|
5 |
import logging
|
6 |
from typing import List, Dict
|
|
|
|
|
7 |
from pathlib import Path
|
|
|
|
|
|
|
8 |
|
9 |
+
logging.basicConfig(level=logging.INFO)
|
10 |
logger = logging.getLogger(__name__)
|
11 |
|
12 |
class RAGSystem:
|
|
|
135 |
|
136 |
except Exception as e:
|
137 |
logger.error(f"Error in search_courses: {str(e)}")
|
138 |
+
raise
|
139 |
+
|
140 |
+
def calculate_text_similarity(self, text1: str, text2: str) -> float:
|
141 |
+
"""
|
142 |
+
Calculate text similarity between two strings using word overlap
|
143 |
+
|
144 |
+
Args:
|
145 |
+
text1 (str): First text string
|
146 |
+
text2 (str): Second text string
|
147 |
+
|
148 |
+
Returns:
|
149 |
+
float: Similarity score between 0 and 1
|
150 |
+
"""
|
151 |
+
try:
|
152 |
+
# Convert to lowercase and split into words
|
153 |
+
text1 = str(text1).lower()
|
154 |
+
text2 = str(text2).lower()
|
155 |
+
|
156 |
+
words1 = set(text1.split())
|
157 |
+
words2 = set(text2.split())
|
158 |
+
|
159 |
+
if not words1 or not words2:
|
160 |
+
return 0.0
|
161 |
+
|
162 |
+
# Calculate Jaccard similarity
|
163 |
+
intersection = len(words1.intersection(words2))
|
164 |
+
union = len(words1.union(words2))
|
165 |
+
|
166 |
+
if union == 0:
|
167 |
+
return 0.0
|
168 |
+
|
169 |
+
similarity = intersection / union
|
170 |
+
|
171 |
+
return similarity
|
172 |
+
|
173 |
+
except Exception as e:
|
174 |
+
logger.error(f"Error calculating text similarity: {str(e)}")
|
175 |
+
return 0.0
|