midrees2806 commited on
Commit
1c1b54f
Β·
verified Β·
1 Parent(s): 4888b78

Update rag.py

Browse files
Files changed (1) hide show
  1. rag.py +19 -3
rag.py CHANGED
@@ -8,6 +8,7 @@ from PIL import Image, ImageDraw, ImageFont
8
  import numpy as np
9
  from dotenv import load_dotenv
10
  import os
 
11
 
12
  # Load environment variables
13
  load_dotenv()
@@ -18,7 +19,7 @@ groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
18
  # Load models and dataset
19
  similarity_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
20
 
21
- # Load dataset (automatically using the path)
22
  with open('dataset.json', 'r') as f:
23
  dataset = json.load(f)
24
 
@@ -46,7 +47,7 @@ def query_groq_llm(prompt, model_name="llama3-70b-8192"):
46
  def get_best_answer(user_input):
47
  user_input_lower = user_input.lower().strip()
48
 
49
- # πŸ‘‰ Check if question is about fee
50
  if any(keyword in user_input_lower for keyword in ["fee", "fees", "charges", "semester fee"]):
51
  return (
52
  "πŸ’° For complete and up-to-date fee details for this program, we recommend visiting the official University of Education fee structure page.\n"
@@ -54,12 +55,25 @@ def get_best_answer(user_input):
54
  "πŸ”— https://ue.edu.pk/allfeestructure.php"
55
  )
56
 
57
- # πŸ” Continue with normal similarity-based logic
58
  user_embedding = similarity_model.encode(user_input_lower, convert_to_tensor=True)
59
  similarities = util.pytorch_cos_sim(user_embedding, dataset_embeddings)[0]
60
  best_match_idx = similarities.argmax().item()
61
  best_score = similarities[best_match_idx].item()
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  if best_score >= 0.65:
64
  original_answer = dataset_answers[best_match_idx]
65
  prompt = f"""As an official assistant for University of Education Lahore, provide a clear response:
@@ -73,8 +87,10 @@ def get_best_answer(user_input):
73
  Question: {user_input}
74
  Official Answer:"""
75
 
 
76
  llm_response = query_groq_llm(prompt)
77
 
 
78
  if llm_response:
79
  for marker in ["Improved Answer:", "Official Answer:"]:
80
  if marker in llm_response:
 
8
  import numpy as np
9
  from dotenv import load_dotenv
10
  import os
11
+ import pandas as pd # <-- Required for Excel logging
12
 
13
  # Load environment variables
14
  load_dotenv()
 
19
  # Load models and dataset
20
  similarity_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
21
 
22
+ # Load dataset
23
  with open('dataset.json', 'r') as f:
24
  dataset = json.load(f)
25
 
 
47
  def get_best_answer(user_input):
48
  user_input_lower = user_input.lower().strip()
49
 
50
+ # πŸ‘‰ Fee-specific shortcut
51
  if any(keyword in user_input_lower for keyword in ["fee", "fees", "charges", "semester fee"]):
52
  return (
53
  "πŸ’° For complete and up-to-date fee details for this program, we recommend visiting the official University of Education fee structure page.\n"
 
55
  "πŸ”— https://ue.edu.pk/allfeestructure.php"
56
  )
57
 
58
+ # πŸ” Similarity matching
59
  user_embedding = similarity_model.encode(user_input_lower, convert_to_tensor=True)
60
  similarities = util.pytorch_cos_sim(user_embedding, dataset_embeddings)[0]
61
  best_match_idx = similarities.argmax().item()
62
  best_score = similarities[best_match_idx].item()
63
 
64
+ # ✏️ If not matched well, log to Excel
65
+ if best_score < 0.65:
66
+ file_path = "unmatched_queries.xlsx"
67
+ if os.path.exists(file_path):
68
+ try:
69
+ df = pd.read_excel(file_path)
70
+ new_row = {"Unmatched Queries": user_input}
71
+ df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
72
+ df.to_excel(file_path, index=False)
73
+ except Exception as e:
74
+ print(f"Error updating unmatched_queries.xlsx: {e}")
75
+
76
+ # 🧠 Prompt construction
77
  if best_score >= 0.65:
78
  original_answer = dataset_answers[best_match_idx]
79
  prompt = f"""As an official assistant for University of Education Lahore, provide a clear response:
 
87
  Question: {user_input}
88
  Official Answer:"""
89
 
90
+ # 🧠 Query LLM
91
  llm_response = query_groq_llm(prompt)
92
 
93
+ # 🧾 Process LLM output
94
  if llm_response:
95
  for marker in ["Improved Answer:", "Official Answer:"]:
96
  if marker in llm_response: