midrees2806 commited on
Commit
cb720fe
Β·
verified Β·
1 Parent(s): 7b27360

Update rag.py

Browse files
Files changed (1) hide show
  1. rag.py +15 -21
rag.py CHANGED
@@ -27,12 +27,6 @@ GREETINGS = [
27
  "hey there", "greetings"
28
  ]
29
 
30
- # Normalize user input for internal processing (with 'which' to 'what' replacement)
31
- def normalize_input(text):
32
- text = text.lower().strip()
33
- text = text.replace("which", "what") # Add your requested replacement
34
- return text
35
-
36
  # Load local dataset
37
  try:
38
  with open('dataset.json', 'r') as f:
@@ -43,8 +37,8 @@ except Exception as e:
43
  print(f"Error loading dataset: {e}")
44
  dataset = []
45
 
46
- # Precompute normalized dataset embeddings
47
- dataset_questions = [normalize_input(item.get("Question", "")) for item in dataset]
48
  dataset_answers = [item.get("Answer", "") for item in dataset]
49
  dataset_embeddings = similarity_model.encode(dataset_questions, convert_to_tensor=True)
50
 
@@ -82,17 +76,12 @@ def query_groq_llm(prompt, model_name="llama3-70b-8192"):
82
  print(f"Error querying Groq API: {e}")
83
  return ""
84
 
85
- # Main logic function (with hidden 'which' to 'what' replacement)
86
  def get_best_answer(user_input):
87
  if not user_input.strip():
88
  return "Please enter a valid question."
89
 
90
- # Preserve original input for display
91
- original_input = user_input
92
-
93
- # Normalize input for processing (with hidden replacement)
94
- processed_input = normalize_input(user_input)
95
- user_input_lower = processed_input # Use normalized version for processing
96
 
97
  if len(user_input_lower.split()) < 3 and not any(greet in user_input_lower for greet in GREETINGS):
98
  return "Please ask your question properly with at least 3 words."
@@ -100,7 +89,7 @@ def get_best_answer(user_input):
100
  if any(greet in user_input_lower for greet in GREETINGS):
101
  greeting_response = query_groq_llm(
102
  f"You are an official assistant for University of Education Lahore. "
103
- f"Respond to this greeting in a friendly and professional manner: {original_input}"
104
  )
105
  return greeting_response if greeting_response else "Hello! How can I assist you today?"
106
 
@@ -111,33 +100,38 @@ def get_best_answer(user_input):
111
  "πŸ”— https://ue.edu.pk/allfeestructure.php"
112
  )
113
 
114
- # Use normalized input for similarity matching
115
  user_embedding = similarity_model.encode(user_input_lower, convert_to_tensor=True)
116
  similarities = util.pytorch_cos_sim(user_embedding, dataset_embeddings)[0]
117
  best_match_idx = similarities.argmax().item()
118
  best_score = similarities[best_match_idx].item()
119
 
120
  if best_score < 0.65:
121
- manage_unmatched_queries(original_input) # Store original query
122
 
123
  if best_score >= 0.65:
124
  original_answer = dataset_answers[best_match_idx]
125
  prompt = f"""Name is UOE AI Assistant! You are an official assistant for the University of Education Lahore.
 
126
  Rephrase the following official answer clearly and professionally.
127
  Use structured formatting (like headings, bullet points, or numbered lists) where appropriate.
128
  DO NOT add any new or extra information. ONLY rephrase and improve the clarity and formatting of the original answer.
 
129
  ### Question:
130
- {original_input} # Show original to user
 
131
  ### Original Answer:
132
  {original_answer}
 
133
  ### Rephrased Answer:
134
  """
135
  else:
136
  prompt = f"""Name is UOE AI Assistant! As an official assistant for University of Education Lahore, provide a helpful response:
137
  Include relevant details about university policies.
138
  If unsure, direct to official channels.
 
139
  ### Question:
140
- {original_input} # Show original to user
 
141
  ### Official Answer:
142
  """
143
 
@@ -154,4 +148,4 @@ If unsure, direct to official channels.
154
  "πŸ“ž +92-42-99262231-33\n"
155
  "βœ‰οΈ [email protected]\n"
156
  "🌐 https://ue.edu.pk"
157
- )
 
27
  "hey there", "greetings"
28
  ]
29
 
 
 
 
 
 
 
30
  # Load local dataset
31
  try:
32
  with open('dataset.json', 'r') as f:
 
37
  print(f"Error loading dataset: {e}")
38
  dataset = []
39
 
40
+ # Precompute embeddings
41
+ dataset_questions = [item.get("Question", "").lower().strip() for item in dataset]
42
  dataset_answers = [item.get("Answer", "") for item in dataset]
43
  dataset_embeddings = similarity_model.encode(dataset_questions, convert_to_tensor=True)
44
 
 
76
  print(f"Error querying Groq API: {e}")
77
  return ""
78
 
79
+ # Main logic function to be called from Gradio
80
  def get_best_answer(user_input):
81
  if not user_input.strip():
82
  return "Please enter a valid question."
83
 
84
+ user_input_lower = user_input.lower().strip()
 
 
 
 
 
85
 
86
  if len(user_input_lower.split()) < 3 and not any(greet in user_input_lower for greet in GREETINGS):
87
  return "Please ask your question properly with at least 3 words."
 
89
  if any(greet in user_input_lower for greet in GREETINGS):
90
  greeting_response = query_groq_llm(
91
  f"You are an official assistant for University of Education Lahore. "
92
+ f"Respond to this greeting in a friendly and professional manner: {user_input}"
93
  )
94
  return greeting_response if greeting_response else "Hello! How can I assist you today?"
95
 
 
100
  "πŸ”— https://ue.edu.pk/allfeestructure.php"
101
  )
102
 
 
103
  user_embedding = similarity_model.encode(user_input_lower, convert_to_tensor=True)
104
  similarities = util.pytorch_cos_sim(user_embedding, dataset_embeddings)[0]
105
  best_match_idx = similarities.argmax().item()
106
  best_score = similarities[best_match_idx].item()
107
 
108
  if best_score < 0.65:
109
+ manage_unmatched_queries(user_input)
110
 
111
  if best_score >= 0.65:
112
  original_answer = dataset_answers[best_match_idx]
113
  prompt = f"""Name is UOE AI Assistant! You are an official assistant for the University of Education Lahore.
114
+
115
  Rephrase the following official answer clearly and professionally.
116
  Use structured formatting (like headings, bullet points, or numbered lists) where appropriate.
117
  DO NOT add any new or extra information. ONLY rephrase and improve the clarity and formatting of the original answer.
118
+
119
  ### Question:
120
+ {user_input}
121
+
122
  ### Original Answer:
123
  {original_answer}
124
+
125
  ### Rephrased Answer:
126
  """
127
  else:
128
  prompt = f"""Name is UOE AI Assistant! As an official assistant for University of Education Lahore, provide a helpful response:
129
  Include relevant details about university policies.
130
  If unsure, direct to official channels.
131
+
132
  ### Question:
133
+ {user_input}
134
+
135
  ### Official Answer:
136
  """
137
 
 
148
  "πŸ“ž +92-42-99262231-33\n"
149
  "βœ‰οΈ [email protected]\n"
150
  "🌐 https://ue.edu.pk"
151
+ )