Spaces:
Sleeping
Sleeping
import os | |
import json | |
import requests | |
from sentence_transformers import SentenceTransformer, util | |
from dotenv import load_dotenv | |
# Load environment variables | |
load_dotenv() | |
# API configurations | |
HF_API_TOKEN = os.getenv("HF_API_TOKEN") | |
if not HF_API_TOKEN: | |
raise ValueError("Hugging Face API Token is missing from .env file") | |
# DeepSeek API endpoint | |
DEEPSEEK_API = "https://api-inference.huggingface.co/models/deepseek-ai/deepseek-llm-7b" | |
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"} | |
# Load models | |
similarity_model = SentenceTransformer('paraphrase-MiniLM-L6-v2') | |
# Load dataset | |
with open("ds.json") as f: | |
dataset = json.load(f) | |
# Precompute embeddings | |
dataset_questions = [item.get("Question", "").lower().strip() for item in dataset] | |
dataset_answers = [item.get("Answer", "") for item in dataset] | |
dataset_embeddings = similarity_model.encode(dataset_questions, convert_to_tensor=True) | |
def query_deepseek(prompt): | |
payload = { | |
"inputs": prompt, | |
"parameters": { | |
"max_new_tokens": 500, | |
"temperature": 0.7, | |
"do_sample": True | |
} | |
} | |
try: | |
response = requests.post(DEEPSEEK_API, headers=headers, json=payload) | |
response.raise_for_status() | |
result = response.json() | |
return result[0].get("generated_text", "").strip() if isinstance(result, list) and result else "" | |
except Exception as e: | |
print(f"DeepSeek API error: {e}") | |
return "" | |
def get_best_answer(user_input): | |
# Find best match from dataset | |
user_embedding = similarity_model.encode(user_input.lower().strip(), convert_to_tensor=True) | |
similarities = util.pytorch_cos_sim(user_embedding, dataset_embeddings)[0] | |
best_match_idx = similarities.argmax().item() | |
best_score = similarities[best_match_idx].item() | |
if best_score >= 0.65: # Good match found | |
original_answer = dataset_answers[best_match_idx] | |
prompt = f"""Rephrase this university answer to be more helpful while keeping key information: | |
Question: {user_input} | |
Original Answer: {original_answer} | |
Improved Answer:""" | |
else: # No good match | |
prompt = f"""As a university assistant, provide a helpful response to: | |
Question: {user_input} | |
Answer:""" | |
deepseek_response = query_deepseek(prompt) | |
if deepseek_response: | |
for marker in ["Improved Answer:", "Answer:"]: | |
if marker in deepseek_response: | |
return deepseek_response.split(marker)[-1].strip() | |
return deepseek_response | |
return dataset_answers[best_match_idx] if best_score >= 0.65 else """I couldn't find specific information. | |
Please visit the UOE website: https://ue.edu.pk/""" |