Spaces:
Sleeping
Sleeping
Update rag.py
Browse files
rag.py
CHANGED
@@ -26,7 +26,6 @@ dataset_embeddings = similarity_model.encode(dataset_questions, convert_to_tenso
|
|
26 |
# Use absolute path for unmatched_queries.csv
|
27 |
base_dir = os.path.dirname(os.path.abspath(__file__))
|
28 |
file_path = os.path.join(base_dir, "unmatched_queries.csv")
|
29 |
-
print(f"[DEBUG] Writing to absolute path: {file_path}")
|
30 |
|
31 |
def query_groq_llm(prompt, model_name="llama3-70b-8192"):
|
32 |
try:
|
@@ -41,13 +40,30 @@ def query_groq_llm(prompt, model_name="llama3-70b-8192"):
|
|
41 |
)
|
42 |
return chat_completion.choices[0].message.content.strip()
|
43 |
except Exception as e:
|
44 |
-
print(f"
|
45 |
return ""
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
def get_best_answer(user_input):
|
48 |
user_input_lower = user_input.lower().strip()
|
49 |
|
50 |
-
#
|
51 |
if any(keyword in user_input_lower for keyword in ["fee", "fees", "charges", "semester fee"]):
|
52 |
return (
|
53 |
"π° For complete and up-to-date fee details for this program, we recommend visiting the official University of Education fee structure page.\n"
|
@@ -61,31 +77,11 @@ def get_best_answer(user_input):
|
|
61 |
best_match_idx = similarities.argmax().item()
|
62 |
best_score = similarities[best_match_idx].item()
|
63 |
|
64 |
-
# βοΈ Log
|
65 |
if best_score < 0.65:
|
66 |
-
|
67 |
|
68 |
-
|
69 |
-
if not os.path.exists(file_path):
|
70 |
-
print(f"[DEBUG] File {file_path} does not exist. Creating file with header.")
|
71 |
-
try:
|
72 |
-
with open(file_path, mode="w", newline="", encoding="utf-8") as file:
|
73 |
-
writer = csv.writer(file)
|
74 |
-
writer.writerow(["Unmatched Queries"])
|
75 |
-
print(f"[DEBUG] Header written successfully.")
|
76 |
-
except Exception as e:
|
77 |
-
print(f"[ERROR] Failed to create file: {e}")
|
78 |
-
|
79 |
-
# Append unmatched query
|
80 |
-
try:
|
81 |
-
with open(file_path, mode="a", newline="", encoding="utf-8") as file:
|
82 |
-
writer = csv.writer(file)
|
83 |
-
writer.writerow([user_input])
|
84 |
-
print(f"[DEBUG] Query logged: {user_input}")
|
85 |
-
except Exception as e:
|
86 |
-
print(f"[ERROR] Failed to write query to CSV: {e}")
|
87 |
-
|
88 |
-
# π§ Construct prompt
|
89 |
if best_score >= 0.65:
|
90 |
original_answer = dataset_answers[best_match_idx]
|
91 |
prompt = f"""As an official assistant for University of Education Lahore, provide a clear response:
|
@@ -99,21 +95,19 @@ def get_best_answer(user_input):
|
|
99 |
Question: {user_input}
|
100 |
Official Answer:"""
|
101 |
|
102 |
-
#
|
103 |
llm_response = query_groq_llm(prompt)
|
104 |
|
105 |
-
#
|
106 |
if llm_response:
|
107 |
for marker in ["Improved Answer:", "Official Answer:"]:
|
108 |
if marker in llm_response:
|
109 |
-
|
110 |
-
|
111 |
-
else:
|
112 |
-
response = llm_response
|
113 |
else:
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
|
|
26 |
# Use absolute path for unmatched_queries.csv
|
27 |
base_dir = os.path.dirname(os.path.abspath(__file__))
|
28 |
file_path = os.path.join(base_dir, "unmatched_queries.csv")
|
|
|
29 |
|
30 |
def query_groq_llm(prompt, model_name="llama3-70b-8192"):
|
31 |
try:
|
|
|
40 |
)
|
41 |
return chat_completion.choices[0].message.content.strip()
|
42 |
except Exception as e:
|
43 |
+
print(f"[ERROR] Groq API: {e}")
|
44 |
return ""
|
45 |
|
46 |
+
def log_unmatched_query(query):
|
47 |
+
try:
|
48 |
+
# Create file with header if not exists
|
49 |
+
if not os.path.exists(file_path):
|
50 |
+
with open(file_path, mode="w", newline="", encoding="utf-8") as file:
|
51 |
+
writer = csv.writer(file)
|
52 |
+
writer.writerow(["Unmatched Queries"])
|
53 |
+
|
54 |
+
# Append unmatched query
|
55 |
+
with open(file_path, mode="a", newline="", encoding="utf-8") as file:
|
56 |
+
writer = csv.writer(file)
|
57 |
+
writer.writerow([query])
|
58 |
+
print(f"[DEBUG] Logged unmatched query: {query}")
|
59 |
+
|
60 |
+
except Exception as e:
|
61 |
+
print(f"[ERROR] Logging unmatched query failed: {e}")
|
62 |
+
|
63 |
def get_best_answer(user_input):
|
64 |
user_input_lower = user_input.lower().strip()
|
65 |
|
66 |
+
# π§Ύ Fee-specific shortcut
|
67 |
if any(keyword in user_input_lower for keyword in ["fee", "fees", "charges", "semester fee"]):
|
68 |
return (
|
69 |
"π° For complete and up-to-date fee details for this program, we recommend visiting the official University of Education fee structure page.\n"
|
|
|
77 |
best_match_idx = similarities.argmax().item()
|
78 |
best_score = similarities[best_match_idx].item()
|
79 |
|
80 |
+
# βοΈ Log unmatched queries
|
81 |
if best_score < 0.65:
|
82 |
+
log_unmatched_query(user_input)
|
83 |
|
84 |
+
# π§ Prompt for LLM
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
if best_score >= 0.65:
|
86 |
original_answer = dataset_answers[best_match_idx]
|
87 |
prompt = f"""As an official assistant for University of Education Lahore, provide a clear response:
|
|
|
95 |
Question: {user_input}
|
96 |
Official Answer:"""
|
97 |
|
98 |
+
# π Query Groq LLM
|
99 |
llm_response = query_groq_llm(prompt)
|
100 |
|
101 |
+
# βοΈ Process LLM output
|
102 |
if llm_response:
|
103 |
for marker in ["Improved Answer:", "Official Answer:"]:
|
104 |
if marker in llm_response:
|
105 |
+
return llm_response.split(marker)[-1].strip()
|
106 |
+
return llm_response
|
|
|
|
|
107 |
else:
|
108 |
+
return dataset_answers[best_match_idx] if best_score >= 0.65 else (
|
109 |
+
"For official information:\n"
|
110 |
+
"π +92-42-99262231-33\n"
|
111 |
+
"βοΈ info@ue.edu.pk\n"
|
112 |
+
"π ue.edu.pk"
|
113 |
+
)
|