midrees2806 commited on
Commit
d1ba382
Β·
verified Β·
1 Parent(s): ee90c3f

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +33 -0
  2. ds.json +0 -0
  3. requirements.txt +7 -0
  4. utils.py +89 -0
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ from utils import get_best_answer
4
+ from dotenv import load_dotenv
5
+
6
+ # βœ… Load environment variables
7
+ load_dotenv()
8
+
9
+ # βœ… Load dataset
10
+ with open("dataset.json", "r") as f:
11
+ dataset = json.load(f)
12
+
13
+ # βœ… Gradio UI Function
14
+ def chatbot_response(user_input):
15
+ try:
16
+ best_answer = get_best_answer(user_input)
17
+ return best_answer # βœ… Displays the rephrased best answer
18
+ except Exception as e:
19
+ return "Oops! Something went wrong. Please try again."
20
+
21
+ # βœ… Gradio Interface
22
+ iface = gr.Interface(
23
+ fn=chatbot_response,
24
+ inputs="text",
25
+ outputs="text",
26
+ title="UOE Academic Chatbot",
27
+ description="Hello! πŸŽ“ Welcome to **Academic Navigator** – the university information assistant for UOE.\nI can answer your university-related questions, such as admissions, scholarships."
28
+ )
29
+
30
+ # βœ… Launch App
31
+ iface.launch(share=True)
32
+
33
+
ds.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ sentence-transformers
3
+ torch
4
+ transformers
5
+ requests
6
+ python-dotenv
7
+ # βœ… For spelling correction using TextBlob
utils.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import requests
4
+ from sentence_transformers import SentenceTransformer, util
5
+ from dotenv import load_dotenv
6
+
7
+
8
+ # βœ… Load .env file
9
+ load_dotenv()
10
+
11
+ # βœ… Hugging Face API Token check
12
+ HF_API_TOKEN = os.getenv("HF_API_TOKEN")
13
+
14
+ if not HF_API_TOKEN:
15
+ raise ValueError("Error: Hugging Face API Token is missing! Please check your .env file.")
16
+
17
+ # βœ… Hugging Face GPT Model API Endpoint
18
+ GPT_MODEL_API = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
19
+
20
+ # βœ… Headers for API request
21
+ headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
22
+
23
+ # βœ… Load sentence transformer model for intent matching
24
+ similarity_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
25
+
26
+ # βœ… Load dataset
27
+ with open("dataset.json") as f:
28
+ dataset = json.load(f)
29
+
30
+ # βœ… Precompute embeddings for dataset questions
31
+ dataset_inputs = [item.get("input", "").lower().strip() for item in dataset]
32
+ dataset_answers = [item.get("response", "") for item in dataset]
33
+ dataset_embeddings = similarity_model.encode(dataset_inputs, convert_to_tensor=True)
34
+
35
+ # βœ… Function to detect greetings
36
+ def is_greeting(text):
37
+ greetings = ["hello", "hi", "hey", "salam", "assalam o alaikum", "assalamu alaikum", "assalamualaikum"]
38
+ return text.lower().strip() in greetings
39
+
40
+ # βœ… Function to get the best matching answer
41
+ def get_best_answer(user_input):
42
+ if is_greeting(user_input):
43
+ return "Hello! 😊 How can I assist you today with university-related information?"
44
+
45
+ user_input_embedding = similarity_model.encode(user_input.lower().strip(), convert_to_tensor=True)
46
+ similarities = util.pytorch_cos_sim(user_input_embedding, dataset_embeddings)[0]
47
+
48
+ # βœ… Find the best match and its similarity score
49
+ best_match_index = similarities.argmax().item()
50
+ best_score = similarities[best_match_index].item()
51
+
52
+ # βœ… Set a similarity threshold (tune as needed)
53
+ THRESHOLD = 0.65
54
+
55
+ if best_score < THRESHOLD:
56
+ return (
57
+ "I'm sorry, I couldn't find an exact answer to your question. "
58
+ "You may kindly try rephrasing your question gently for better results. "
59
+ "Also, feel free to visit the UOE official website for information: https://ue.edu.pk/"
60
+ )
61
+
62
+ best_answer = dataset_answers[best_match_index]
63
+ return rephrase_answer(best_answer)
64
+
65
+ # βœ… Function to rephrase answer using GPT
66
+ def rephrase_answer(answer):
67
+ prompt = (
68
+ f"Rephrase the following university-related answer while keeping the meaning unchanged:\n\n"
69
+ f"Original Answer: {answer}\n\n"
70
+ f"Rephrased Answer:"
71
+ )
72
+
73
+ payload = {"inputs": prompt} # βœ… Structured prompt to prevent extra details
74
+
75
+ response = requests.post(GPT_MODEL_API, headers=headers, json=payload)
76
+
77
+ if response.status_code == 200:
78
+ result = response.json()
79
+ if isinstance(result, list) and result:
80
+ generated_text = result[0].get("generated_text", answer).strip()
81
+
82
+ # βœ… Ensure only the rephrased answer is returned
83
+ if "Rephrased Answer:" in generated_text:
84
+ return generated_text.split("Rephrased Answer:")[-1].strip()
85
+ return generated_text
86
+
87
+ return answer # βœ… Return original answer if API fails
88
+
89
+