import json from sentence_transformers import SentenceTransformer, util from groq import Groq import datetime import requests from io import BytesIO from PIL import Image, ImageDraw, ImageFont import numpy as np from dotenv import load_dotenv import os import pandas as pd import csv import os # Load environment variables load_dotenv() # Initialize Groq client groq_client = Groq(api_key=os.getenv("GROQ_API_KEY")) # Load models and dataset similarity_model = SentenceTransformer('paraphrase-MiniLM-L6-v2') # Load dataset with open('dataset.json', 'r') as f: dataset = json.load(f) # Precompute embeddings dataset_questions = [item.get("input", "").lower().strip() for item in dataset] dataset_answers = [item.get("response", "") for item in dataset] dataset_embeddings = similarity_model.encode(dataset_questions, convert_to_tensor=True) def query_groq_llm(prompt, model_name="llama3-70b-8192"): try: chat_completion = groq_client.chat.completions.create( messages=[{ "role": "user", "content": prompt }], model=model_name, temperature=0.7, max_tokens=500 ) return chat_completion.choices[0].message.content.strip() except Exception as e: print(f"Error querying Groq API: {e}") return "" def get_best_answer(user_input): user_input_lower = user_input.lower().strip() # ๐Ÿ‘‰ Fee-specific shortcut if any(keyword in user_input_lower for keyword in ["fee", "fees", "charges", "semester fee"]): return ( "๐Ÿ’ฐ For complete and up-to-date fee details for this program, we recommend visiting the official University of Education fee structure page.\n" "Youโ€™ll find comprehensive information regarding tuition, admission charges, and other applicable fees there.\n" "๐Ÿ”— https://ue.edu.pk/allfeestructure.php" ) # ๐Ÿ” Similarity matching user_embedding = similarity_model.encode(user_input_lower, convert_to_tensor=True) similarities = util.pytorch_cos_sim(user_embedding, dataset_embeddings)[0] best_match_idx = similarities.argmax().item() best_score = similarities[best_match_idx].item() # โœ๏ธ If not matched well, log to CSV if best_score < 0.65: file_path = "unmatched_queries.csv" print(f"[DEBUG] Similarity score too low: {best_score}. Logging query to: {file_path}") # Check if file exists if not os.path.exists(file_path): print(f"[DEBUG] File {file_path} does not exist. Creating file with header.") try: with open(file_path, mode="w", newline="", encoding="utf-8") as file: writer = csv.writer(file) writer.writerow(["Unmatched Queries"]) print(f"[DEBUG] Header written successfully.") except Exception as e: print(f"[ERROR] Failed to create file: {e}") try: with open(file_path, mode="a", newline="", encoding="utf-8") as file: writer = csv.writer(file) writer.writerow([user_input]) print(f"[DEBUG] Query logged: {user_input}") except Exception as e: print(f"[ERROR] Failed to write query to CSV: {e}") # ๐Ÿง  Prompt construction if best_score >= 0.65: original_answer = dataset_answers[best_match_idx] prompt = f"""As an official assistant for University of Education Lahore, provide a clear response: Question: {user_input} Original Answer: {original_answer} Improved Answer:""" else: prompt = f"""As an official assistant for University of Education Lahore, provide a helpful response: Include relevant details about university policies. If unsure, direct to official channels. Question: {user_input} Official Answer:""" # ๐Ÿง  Query LLM llm_response = query_groq_llm(prompt) # ๐Ÿงพ Process LLM output if llm_response: for marker in ["Improved Answer:", "Official Answer:"]: if marker in llm_response: response = llm_response.split(marker)[-1].strip() break else: response = llm_response else: response = dataset_answers[best_match_idx] if best_score >= 0.65 else """For official information: ๐Ÿ“ž +92-42-99262231-33 โœ‰๏ธ info@ue.edu.pk ๐ŸŒ ue.edu.pk""" return response