Hell / utils.py
midrees2806's picture
Update utils.py
ebf68db verified
import os
import json
import requests
from sentence_transformers import SentenceTransformer, util
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# API configurations
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
if not HF_API_TOKEN:
raise ValueError("Hugging Face API Token is missing from .env file")
# DeepSeek API endpoint
DEEPSEEK_API = "https://api-inference.huggingface.co/models/deepseek-ai/deepseek-llm-7b"
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
# Load models
similarity_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
# Load dataset
with open("ds.json") as f:
dataset = json.load(f)
# Precompute embeddings
dataset_questions = [item.get("Question", "").lower().strip() for item in dataset]
dataset_answers = [item.get("Answer", "") for item in dataset]
dataset_embeddings = similarity_model.encode(dataset_questions, convert_to_tensor=True)
def query_deepseek(prompt):
payload = {
"inputs": prompt,
"parameters": {
"max_new_tokens": 500,
"temperature": 0.7,
"do_sample": True
}
}
try:
response = requests.post(DEEPSEEK_API, headers=headers, json=payload)
response.raise_for_status()
result = response.json()
return result[0].get("generated_text", "").strip() if isinstance(result, list) and result else ""
except Exception as e:
print(f"DeepSeek API error: {e}")
return ""
def get_best_answer(user_input):
# Find best match from dataset
user_embedding = similarity_model.encode(user_input.lower().strip(), convert_to_tensor=True)
similarities = util.pytorch_cos_sim(user_embedding, dataset_embeddings)[0]
best_match_idx = similarities.argmax().item()
best_score = similarities[best_match_idx].item()
if best_score >= 0.65: # Good match found
original_answer = dataset_answers[best_match_idx]
prompt = f"""Rephrase this university answer to be more helpful while keeping key information:
Question: {user_input}
Original Answer: {original_answer}
Improved Answer:"""
else: # No good match
prompt = f"""As a university assistant, provide a helpful response to:
Question: {user_input}
Answer:"""
deepseek_response = query_deepseek(prompt)
if deepseek_response:
for marker in ["Improved Answer:", "Answer:"]:
if marker in deepseek_response:
return deepseek_response.split(marker)[-1].strip()
return deepseek_response
return dataset_answers[best_match_idx] if best_score >= 0.65 else """I couldn't find specific information.
Please visit the UOE website: https://ue.edu.pk/"""