import os import json import requests from sentence_transformers import SentenceTransformer, util from dotenv import load_dotenv # ✅ Load .env file load_dotenv() # ✅ Hugging Face API Token check HF_API_TOKEN = os.getenv("HF_API_TOKEN") if not HF_API_TOKEN: raise ValueError("Error: Hugging Face API Token is missing! Please check your .env file.") # ✅ Hugging Face GPT Model API Endpoint GPT_MODEL_API = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2" # ✅ Headers for API request headers = {"Authorization": f"Bearer {HF_API_TOKEN}"} # ✅ Load sentence transformer model for intent matching similarity_model = SentenceTransformer('paraphrase-MiniLM-L6-v2') # ✅ Load dataset with open("ds.json") as f: dataset = json.load(f) # ✅ Precompute embeddings for dataset questions dataset_inputs = [item.get("input", "").lower().strip() for item in dataset] dataset_answers = [item.get("response", "") for item in dataset] dataset_embeddings = similarity_model.encode(dataset_inputs, convert_to_tensor=True) # ✅ Function to detect greetings def is_greeting(text): greetings = ["hello", "hi", "hey", "salam", "assalam o alaikum", "assalamu alaikum", "assalamualaikum"] return text.lower().strip() in greetings # ✅ Function to get the best matching answer def get_best_answer(user_input): if is_greeting(user_input): return "Hello! 😊 How can I assist you today with university-related information?" user_input_embedding = similarity_model.encode(user_input.lower().strip(), convert_to_tensor=True) similarities = util.pytorch_cos_sim(user_input_embedding, dataset_embeddings)[0] # ✅ Find the best match and its similarity score best_match_index = similarities.argmax().item() best_score = similarities[best_match_index].item() # ✅ Set a similarity threshold (tune as needed) THRESHOLD = 0.65 if best_score < THRESHOLD: return ( "I'm sorry, I couldn't find an exact answer to your question. " "You may kindly try rephrasing your question gently for better results. " "Also, feel free to visit the UOE official website for information: https://ue.edu.pk/" ) best_answer = dataset_answers[best_match_index] return rephrase_answer(best_answer) # ✅ Function to rephrase answer using GPT def rephrase_answer(answer): prompt = ( f"Rephrase the following university-related answer while keeping the meaning unchanged:\n\n" f"Original Answer: {answer}\n\n" f"Rephrased Answer:" ) payload = {"inputs": prompt} # ✅ Structured prompt to prevent extra details response = requests.post(GPT_MODEL_API, headers=headers, json=payload) if response.status_code == 200: result = response.json() if isinstance(result, list) and result: generated_text = result[0].get("generated_text", answer).strip() # ✅ Ensure only the rephrased answer is returned if "Rephrased Answer:" in generated_text: return generated_text.split("Rephrased Answer:")[-1].strip() return generated_text return answer # ✅ Return original answer if API fails