InkeyDevelopment commited on
Commit
59d1ac8
·
verified ·
1 Parent(s): 23f5802

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +194 -0
app.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
3
+ from flask import Flask, render_template, request, jsonify
4
+ import faiss
5
+ import numpy as np
6
+ import json
7
+ from sentence_transformers import SentenceTransformer
8
+ from langchain.prompts import PromptTemplate
9
+ from langchain_groq import ChatGroq
10
+ import re
11
+ from dotenv import load_dotenv
12
+ load_dotenv()
13
+
14
+ app = Flask(__name__)
15
+
16
+ # Load Model and FAISS Index
17
+ model = SentenceTransformer('./sentence-transformers_all-MiniLM-L6-v2')
18
+ index = faiss.read_index("faiss_index.bin")
19
+ groq_api_key = os.getenv('GROQ_API_KEY')
20
+ model_name = "llama-3.3-70b-versatile"
21
+
22
+ llm = ChatGroq(
23
+ temperature=0,
24
+ groq_api_key=groq_api_key,
25
+ model_name=model_name
26
+ )
27
+
28
+ with open("metadata.json") as f:
29
+ metadata = json.load(f)
30
+
31
+
32
+ def categorize_query(query):
33
+ """
34
+ Categorizes user queries into different types (greetings, small talk, unrelated, etc.).
35
+ """
36
+ query = query.lower().strip()
37
+
38
+ # Greetings
39
+ greeting_patterns = [
40
+ r"\bhello\b", r"\bhi\b", r"\bhey\b", r"\bhola\b", r"\bgreetings\b",
41
+ r"\bwhat('s| is) up\b", r"\bhowdy\b", r"\bhiya\b", r"\byo\b",
42
+ r"\bgood (morning|afternoon|evening|day|night)\b",
43
+ r"\bhow (are|r) you\b", r"\bhow's it going\b", r"\bhow have you been\b",
44
+ r"\bhope you are (doing )?(well|good|fine)\b", r"\bnice to meet you\b",
45
+ r"\bpleased to meet you\b"
46
+ ]
47
+
48
+ # Thank-you messages
49
+ thank_you_patterns = [
50
+ r"\bthank(s| you)\b", r"\bthanks a lot\b", r"\bthanks so much\b",
51
+ r"\bthank you very much\b", r"\bappreciate it\b", r"\bmuch obliged\b",
52
+ r"\bgrateful\b", r"\bcheers\b"
53
+ ]
54
+
55
+ # Small talk
56
+ small_talk_patterns = [
57
+ r"\bhow (are|r) you\b", r"\bhow's your day\b", r"\bwhat's up\b",
58
+ r"\bhow's it going\b", r"\bhow have you been\b", r"\bhope you are well\b"
59
+ ]
60
+
61
+ # Unrelated topics
62
+ unrelated_patterns = [
63
+ r"\btell me a joke\b", r"\bwho won\b", r"\bwhat is ai\b", r"\bexplain blockchain\b"
64
+ ]
65
+
66
+
67
+ # Goodbye messages
68
+ goodbye_patterns = [
69
+ r"\bbye\b", r"\bgoodbye\b", r"\bsee you\b", r"\bhave a nice day\b"
70
+ ]
71
+
72
+ # Rude or inappropriate messages
73
+ rude_patterns = [
74
+ r"\bstupid\b", r"\bdumb\b", r"\buseless\b", r"\bshut up\b"
75
+ ]
76
+
77
+ if any(re.search(pattern, query) for pattern in greeting_patterns):
78
+ return "greeting"
79
+ if any(re.search(pattern, query) for pattern in thank_you_patterns):
80
+ return "thank_you"
81
+ if any(re.search(pattern, query) for pattern in small_talk_patterns):
82
+ return "small_talk"
83
+ if any(re.search(pattern, query) for pattern in unrelated_patterns):
84
+ return "unrelated"
85
+ if any(re.search(pattern, query) for pattern in goodbye_patterns):
86
+ return "goodbye"
87
+ if any(re.search(pattern, query) for pattern in rude_patterns):
88
+ return "rude"
89
+
90
+ return "normal"
91
+
92
+ # Function to Search for Relevant Answers
93
+ def search_text(query, top_k=2):
94
+ query_embedding = np.array(model.encode(query, convert_to_numpy=True)).astype("float32").reshape(1, -1)
95
+ distances, indices = index.search(query_embedding, top_k)
96
+
97
+ results = []
98
+ for idx in indices[0]:
99
+ if idx >= 0:
100
+ results.append(metadata[idx])
101
+
102
+ return results
103
+
104
+ # Serve HTML Page
105
+ @app.route("/")
106
+ def home():
107
+ return render_template("index.html")
108
+
109
+ @app.route("/query", methods=["POST"])
110
+ def query_pdf():
111
+ query = request.json.get("query")
112
+
113
+ query_type = categorize_query(query)
114
+
115
+ if query_type == "greeting":
116
+ return jsonify({"text": "Hello! How can I assist you with Exelsys EasyHR?", "images": []})
117
+
118
+ if query_type == "thank_you":
119
+ return jsonify({"text": "You're welcome! How can I assist you further?", "images": []})
120
+
121
+ if query_type == "small_talk":
122
+ return jsonify({"text": "I'm here to assist with Exelsys EasyHR. How can I help?", "images": []})
123
+
124
+ if query_type == "unrelated":
125
+ return jsonify({"text": "I'm here to assist with Exelsys easyHR queries only.", "images": []})
126
+
127
+ if query_type == "vague":
128
+ return jsonify({"text": "Could you please provide more details?", "images": []})
129
+
130
+ if query_type == "goodbye":
131
+ return jsonify({"text": "You're welcome! Have a great day!", "images": []})
132
+
133
+ if query_type == "rude":
134
+ return jsonify({"text": "I'm here to assist you professionally.", "images": []})
135
+
136
+
137
+
138
+ # Search for relevant PDF content using FAISS
139
+ results = search_text(query, top_k=3)
140
+
141
+ if not results:
142
+ return jsonify({"text": "No relevant results found in the PDF.", "images": []})
143
+
144
+ # Merge multiple text results
145
+ retrieved_text = "\n\n---\n\n".join([res["text"] for res in results])
146
+ print(retrieved_text)
147
+
148
+ prompt_extract = PromptTemplate.from_template(
149
+ """
150
+ ### YOU ARE AN EXELSYS EASYHR GUIDE ASSISTANT:
151
+ ### INSTRUCTIONS:
152
+ - Your job is to provide step-by-step guidance for the following user query based on the provided context.
153
+ - Base your response **only** on the retrieved context from the PDF.
154
+ - If no relevant information is found, simply respond with: "Not found."
155
+ - If the user greets you (e.g., "Hello", "Hi", "Good morning"), respond politely but keep it brief.
156
+ - If the query is unrelated to Exelsys easyHR, respond with: "I'm here to assist with Exelsys easyHR queries only."
157
+ - Provide clear and concise answers.
158
+ - Provide all the links that inside any topic in <a> tag.
159
+
160
+
161
+ ### USER QUERY:
162
+ {query}
163
+
164
+ ### CONTEXT FROM PDF:
165
+ {retrieved_text}
166
+
167
+ ### ANSWER:
168
+ """
169
+ )
170
+
171
+ # Chain the prompt with ChatGroq
172
+ chain_extract = prompt_extract | llm
173
+ chat_response = chain_extract.invoke({"query": query, "retrieved_text": retrieved_text})
174
+
175
+ # Convert response to string
176
+ response_text = str(chat_response.content)
177
+
178
+ # Determine if images should be included
179
+ # retrieved_images = []
180
+ # if "Not found." not in response_text and "I'm here to assist" not in response_text:
181
+ # retrieved_images = [img for res in results if "images" in res for img in res["images"]]
182
+
183
+ # Final response JSON
184
+ response = {
185
+ "text": response_text,
186
+ # "images": retrieved_images
187
+ }
188
+ print(response)
189
+
190
+ return jsonify(response)
191
+
192
+ if __name__ == "__main__":
193
+ app.run(host="0.0.0.0", port=7860)
194
+