Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files
app.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import json
|
3 |
+
from utils import get_best_answer
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
|
6 |
+
# β
Load environment variables
|
7 |
+
load_dotenv()
|
8 |
+
|
9 |
+
# β
Load dataset
|
10 |
+
with open("dataset.json", "r") as f:
|
11 |
+
dataset = json.load(f)
|
12 |
+
|
13 |
+
# β
Gradio UI Function
|
14 |
+
def chatbot_response(user_input):
|
15 |
+
try:
|
16 |
+
best_answer = get_best_answer(user_input)
|
17 |
+
return best_answer # β
Displays the rephrased best answer
|
18 |
+
except Exception as e:
|
19 |
+
return "Oops! Something went wrong. Please try again."
|
20 |
+
|
21 |
+
# β
Gradio Interface
|
22 |
+
iface = gr.Interface(
|
23 |
+
fn=chatbot_response,
|
24 |
+
inputs="text",
|
25 |
+
outputs="text",
|
26 |
+
title="UOE Academic Chatbot",
|
27 |
+
description="Hello! π Welcome to **Academic Navigator** β the university information assistant for UOE.\nI can answer your university-related questions, such as admissions, scholarships."
|
28 |
+
)
|
29 |
+
|
30 |
+
# β
Launch App
|
31 |
+
iface.launch(share=True)
|
32 |
+
|
33 |
+
|
ds.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
sentence-transformers
|
3 |
+
torch
|
4 |
+
transformers
|
5 |
+
requests
|
6 |
+
python-dotenv
|
7 |
+
# β
For spelling correction using TextBlob
|
utils.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
import requests
|
4 |
+
from sentence_transformers import SentenceTransformer, util
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
|
7 |
+
|
8 |
+
# β
Load .env file
|
9 |
+
load_dotenv()
|
10 |
+
|
11 |
+
# β
Hugging Face API Token check
|
12 |
+
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
13 |
+
|
14 |
+
if not HF_API_TOKEN:
|
15 |
+
raise ValueError("Error: Hugging Face API Token is missing! Please check your .env file.")
|
16 |
+
|
17 |
+
# β
Hugging Face GPT Model API Endpoint
|
18 |
+
GPT_MODEL_API = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
|
19 |
+
|
20 |
+
# β
Headers for API request
|
21 |
+
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
|
22 |
+
|
23 |
+
# β
Load sentence transformer model for intent matching
|
24 |
+
similarity_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
|
25 |
+
|
26 |
+
# β
Load dataset
|
27 |
+
with open("dataset.json") as f:
|
28 |
+
dataset = json.load(f)
|
29 |
+
|
30 |
+
# β
Precompute embeddings for dataset questions
|
31 |
+
dataset_inputs = [item.get("input", "").lower().strip() for item in dataset]
|
32 |
+
dataset_answers = [item.get("response", "") for item in dataset]
|
33 |
+
dataset_embeddings = similarity_model.encode(dataset_inputs, convert_to_tensor=True)
|
34 |
+
|
35 |
+
# β
Function to detect greetings
|
36 |
+
def is_greeting(text):
|
37 |
+
greetings = ["hello", "hi", "hey", "salam", "assalam o alaikum", "assalamu alaikum", "assalamualaikum"]
|
38 |
+
return text.lower().strip() in greetings
|
39 |
+
|
40 |
+
# β
Function to get the best matching answer
|
41 |
+
def get_best_answer(user_input):
|
42 |
+
if is_greeting(user_input):
|
43 |
+
return "Hello! π How can I assist you today with university-related information?"
|
44 |
+
|
45 |
+
user_input_embedding = similarity_model.encode(user_input.lower().strip(), convert_to_tensor=True)
|
46 |
+
similarities = util.pytorch_cos_sim(user_input_embedding, dataset_embeddings)[0]
|
47 |
+
|
48 |
+
# β
Find the best match and its similarity score
|
49 |
+
best_match_index = similarities.argmax().item()
|
50 |
+
best_score = similarities[best_match_index].item()
|
51 |
+
|
52 |
+
# β
Set a similarity threshold (tune as needed)
|
53 |
+
THRESHOLD = 0.65
|
54 |
+
|
55 |
+
if best_score < THRESHOLD:
|
56 |
+
return (
|
57 |
+
"I'm sorry, I couldn't find an exact answer to your question. "
|
58 |
+
"You may kindly try rephrasing your question gently for better results. "
|
59 |
+
"Also, feel free to visit the UOE official website for information: https://ue.edu.pk/"
|
60 |
+
)
|
61 |
+
|
62 |
+
best_answer = dataset_answers[best_match_index]
|
63 |
+
return rephrase_answer(best_answer)
|
64 |
+
|
65 |
+
# β
Function to rephrase answer using GPT
|
66 |
+
def rephrase_answer(answer):
|
67 |
+
prompt = (
|
68 |
+
f"Rephrase the following university-related answer while keeping the meaning unchanged:\n\n"
|
69 |
+
f"Original Answer: {answer}\n\n"
|
70 |
+
f"Rephrased Answer:"
|
71 |
+
)
|
72 |
+
|
73 |
+
payload = {"inputs": prompt} # β
Structured prompt to prevent extra details
|
74 |
+
|
75 |
+
response = requests.post(GPT_MODEL_API, headers=headers, json=payload)
|
76 |
+
|
77 |
+
if response.status_code == 200:
|
78 |
+
result = response.json()
|
79 |
+
if isinstance(result, list) and result:
|
80 |
+
generated_text = result[0].get("generated_text", answer).strip()
|
81 |
+
|
82 |
+
# β
Ensure only the rephrased answer is returned
|
83 |
+
if "Rephrased Answer:" in generated_text:
|
84 |
+
return generated_text.split("Rephrased Answer:")[-1].strip()
|
85 |
+
return generated_text
|
86 |
+
|
87 |
+
return answer # β
Return original answer if API fails
|
88 |
+
|
89 |
+
|