Spaces:
Sleeping
Sleeping
import os | |
import json | |
import requests | |
from sentence_transformers import SentenceTransformer, util | |
from dotenv import load_dotenv | |
# β Load .env file | |
load_dotenv() | |
# β Hugging Face API Token check | |
HF_API_TOKEN = os.getenv("HF_API_TOKEN") | |
if not HF_API_TOKEN: | |
raise ValueError("Error: Hugging Face API Token is missing! Please check your .env file.") | |
# β Hugging Face GPT Model API Endpoint | |
GPT_MODEL_API = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2" | |
# β Headers for API request | |
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"} | |
# β Load sentence transformer model for intent matching | |
similarity_model = SentenceTransformer('paraphrase-MiniLM-L6-v2') | |
# β Load dataset | |
with open("ds.json") as f: | |
dataset = json.load(f) | |
# β Precompute embeddings for dataset questions | |
dataset_inputs = [item.get("input", "").lower().strip() for item in dataset] | |
dataset_answers = [item.get("response", "") for item in dataset] | |
dataset_embeddings = similarity_model.encode(dataset_inputs, convert_to_tensor=True) | |
# β Function to detect greetings | |
def is_greeting(text): | |
greetings = ["hello", "hi", "hey", "salam", "assalam o alaikum", "assalamu alaikum", "assalamualaikum"] | |
return text.lower().strip() in greetings | |
# β Function to get the best matching answer | |
def get_best_answer(user_input): | |
if is_greeting(user_input): | |
return "Hello! π How can I assist you today with university-related information?" | |
user_input_embedding = similarity_model.encode(user_input.lower().strip(), convert_to_tensor=True) | |
similarities = util.pytorch_cos_sim(user_input_embedding, dataset_embeddings)[0] | |
# β Find the best match and its similarity score | |
best_match_index = similarities.argmax().item() | |
best_score = similarities[best_match_index].item() | |
# β Set a similarity threshold (tune as needed) | |
THRESHOLD = 0.65 | |
if best_score < THRESHOLD: | |
return ( | |
"I'm sorry, I couldn't find an exact answer to your question. " | |
"You may kindly try rephrasing your question gently for better results. " | |
"Also, feel free to visit the UOE official website for information: https://ue.edu.pk/" | |
) | |
best_answer = dataset_answers[best_match_index] | |
return rephrase_answer(best_answer) | |
# β Function to rephrase answer using GPT | |
def rephrase_answer(answer): | |
prompt = ( | |
f"Rephrase the following university-related answer while keeping the meaning unchanged:\n\n" | |
f"Original Answer: {answer}\n\n" | |
f"Rephrased Answer:" | |
) | |
payload = {"inputs": prompt} # β Structured prompt to prevent extra details | |
response = requests.post(GPT_MODEL_API, headers=headers, json=payload) | |
if response.status_code == 200: | |
result = response.json() | |
if isinstance(result, list) and result: | |
generated_text = result[0].get("generated_text", answer).strip() | |
# β Ensure only the rephrased answer is returned | |
if "Rephrased Answer:" in generated_text: | |
return generated_text.split("Rephrased Answer:")[-1].strip() | |
return generated_text | |
return answer # β Return original answer if API fails | |