Spaces:
Running
Running
Delete conversational
Browse files- conversational/Json_contracts/RTC0112011A_Project_Management_Consultancy_Services.json +0 -0
- conversational/Json_contracts/RTC0112011B_Project_Management_Consultancy_Services.json +0 -0
- conversational/Json_contracts/RTC0112011C_Project_Management_Consultancy_Services.json +0 -0
- conversational/Json_contracts/RTC0382012_Red_Line_North_UG.json +0 -0
- conversational/Json_contracts/RTC0402012_Gold_Line_UG.json +0 -3
- conversational/Json_contracts/RTC0412012_Green_Line_UG.json +0 -3
- conversational/Json_contracts/RTC0742013_Red_Line_South_Elevated_&_At-Grade.json +0 -0
- conversational/Json_contracts/RTC0822013_Green_Line_Elevated_&_At-Grade.json +0 -0
- conversational/__pycache__/utils.cpython-312.pyc +0 -0
- conversational/ada3_embeddings/RTC0112011A_Project_Management_Consultancy_Services.npy +0 -3
- conversational/ada3_embeddings/RTC0112011B_Project_Management_Consultancy_Services.npy +0 -3
- conversational/ada3_embeddings/RTC0112011C_Project_Management_Consultancy_Services.npy +0 -3
- conversational/ada3_embeddings/RTC0382012_Red_Line_North_UG.npy +0 -3
- conversational/ada3_embeddings/RTC0402012_Gold_Line_UG.npy +0 -3
- conversational/ada3_embeddings/RTC0412012_Green_Line_UG.npy +0 -3
- conversational/ada3_embeddings/RTC0742013_Red_Line_South_Elevated_&_At-Grade.npy +0 -3
- conversational/ada3_embeddings/RTC0822013_Green_Line_Elevated_&_At-Grade.npy +0 -3
- conversational/utils.py +0 -548
conversational/Json_contracts/RTC0112011A_Project_Management_Consultancy_Services.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
conversational/Json_contracts/RTC0112011B_Project_Management_Consultancy_Services.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
conversational/Json_contracts/RTC0112011C_Project_Management_Consultancy_Services.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
conversational/Json_contracts/RTC0382012_Red_Line_North_UG.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
conversational/Json_contracts/RTC0402012_Gold_Line_UG.json
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:b8fa2133c097e0d6b3098c7763227699b22bc04498eab10b1cf2cd9fcb925c7d
|
3 |
-
size 33192847
|
|
|
|
|
|
|
|
conversational/Json_contracts/RTC0412012_Green_Line_UG.json
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:291e6aac05d6519df18cedefa13788fcfe2359b88dec3309b7072896cbd75b92
|
3 |
-
size 12482176
|
|
|
|
|
|
|
|
conversational/Json_contracts/RTC0742013_Red_Line_South_Elevated_&_At-Grade.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
conversational/Json_contracts/RTC0822013_Green_Line_Elevated_&_At-Grade.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
conversational/__pycache__/utils.cpython-312.pyc
DELETED
Binary file (22.6 kB)
|
|
conversational/ada3_embeddings/RTC0112011A_Project_Management_Consultancy_Services.npy
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:1defb39d60be2cebadf7ef3609267b2e75db11428093fed65a40c60aecf1faa9
|
3 |
-
size 1142912
|
|
|
|
|
|
|
|
conversational/ada3_embeddings/RTC0112011B_Project_Management_Consultancy_Services.npy
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:8f6dc8b1baa08610c20b5982e8451de7728f85bd058c44c2c002581089f3cc40
|
3 |
-
size 921728
|
|
|
|
|
|
|
|
conversational/ada3_embeddings/RTC0112011C_Project_Management_Consultancy_Services.npy
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:ffce9253a6bc5e32af1583548a8175a49435fca7c9db71ccd97eaa42507cb88a
|
3 |
-
size 1314944
|
|
|
|
|
|
|
|
conversational/ada3_embeddings/RTC0382012_Red_Line_North_UG.npy
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:5a7e37aeeea69a9befd16cbd8f6fdef927afaae2186afeb42e36d79ef4544b5f
|
3 |
-
size 34148480
|
|
|
|
|
|
|
|
conversational/ada3_embeddings/RTC0402012_Gold_Line_UG.npy
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:b9e987d05958a433d9d8538747b0b4b904550d68a16c74de95b83ad5492aae2f
|
3 |
-
size 176222336
|
|
|
|
|
|
|
|
conversational/ada3_embeddings/RTC0412012_Green_Line_UG.npy
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:3aec4225219596fc68256fd3c5e52704f0b8b8668e3c769695d5a0c1cab3e36c
|
3 |
-
size 52113536
|
|
|
|
|
|
|
|
conversational/ada3_embeddings/RTC0742013_Red_Line_South_Elevated_&_At-Grade.npy
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:e90b8b96795c7d91b36a229b51f938208b7d7c04108e5ff2dda046ced4a02542
|
3 |
-
size 25276544
|
|
|
|
|
|
|
|
conversational/ada3_embeddings/RTC0822013_Green_Line_Elevated_&_At-Grade.npy
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:af447f13735a1ab6a81dbcd23850750de139d2f630b27893b2a552385c947711
|
3 |
-
size 41668736
|
|
|
|
|
|
|
|
conversational/utils.py
DELETED
@@ -1,548 +0,0 @@
|
|
1 |
-
from collections import defaultdict
|
2 |
-
from json_repair import repair_json
|
3 |
-
from rank_bm25 import BM25Okapi
|
4 |
-
from openai import OpenAI
|
5 |
-
from tqdm import tqdm
|
6 |
-
import numpy as np
|
7 |
-
import unicodedata
|
8 |
-
import tiktoken
|
9 |
-
import faiss
|
10 |
-
import time
|
11 |
-
import json
|
12 |
-
import os
|
13 |
-
import re
|
14 |
-
|
15 |
-
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
|
16 |
-
|
17 |
-
# <<<<< Client >>>>>
|
18 |
-
OPENAI_API_KEY = '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'
|
19 |
-
|
20 |
-
client = OpenAI(api_key=OPENAI_API_KEY)
|
21 |
-
|
22 |
-
def generate_embeddings(text, model="text-embedding-3-small"): # model = "deployment_name"
|
23 |
-
return client.embeddings.create(input = [text], model=model).data[0].embedding
|
24 |
-
|
25 |
-
enc = tiktoken.get_encoding("o200k_base")
|
26 |
-
assert enc.decode(enc.encode("hello world")) == "hello world"
|
27 |
-
enc = tiktoken.encoding_for_model("gpt-4o")
|
28 |
-
|
29 |
-
# <<<<< Initials >>>>>
|
30 |
-
|
31 |
-
# Load All Jsons
|
32 |
-
folder_path = "conversational/Json_contracts"
|
33 |
-
json_list = []
|
34 |
-
|
35 |
-
for filename in sorted(os.listdir(folder_path)):
|
36 |
-
if filename.endswith(".json"):
|
37 |
-
full_path = os.path.join(folder_path, filename)
|
38 |
-
with open(full_path, "r", encoding="utf-8") as f:
|
39 |
-
data = json.load(f)
|
40 |
-
json_list.append(data)
|
41 |
-
|
42 |
-
print(f"✅ Loaded {len(json_list)} contracts.")
|
43 |
-
|
44 |
-
def fetch_json(contract_index: int, item_index: int) -> dict | None:
|
45 |
-
try:
|
46 |
-
return json_list[contract_index][item_index]
|
47 |
-
except (IndexError, TypeError):
|
48 |
-
return None
|
49 |
-
|
50 |
-
|
51 |
-
def build_vector_of_faiss_indices_from_folder(folder_path):
|
52 |
-
|
53 |
-
faiss_indices = []
|
54 |
-
file_names = []
|
55 |
-
|
56 |
-
for file in sorted(os.listdir(folder_path)):
|
57 |
-
if file.endswith(".npy"):
|
58 |
-
file_path = os.path.join(folder_path, file)
|
59 |
-
embeddings = np.load(file_path).astype(np.float32)
|
60 |
-
# embeddings = np.load(file_path, allow_pickle=False).astype(np.float32)
|
61 |
-
|
62 |
-
faiss.normalize_L2(embeddings)
|
63 |
-
|
64 |
-
dim = embeddings.shape[1]
|
65 |
-
index = faiss.IndexFlatIP(dim)
|
66 |
-
index.add(embeddings)
|
67 |
-
|
68 |
-
faiss_indices.append(index)
|
69 |
-
file_names.append(file)
|
70 |
-
|
71 |
-
return faiss_indices, file_names
|
72 |
-
|
73 |
-
|
74 |
-
def normalize_text(text: str) -> str:
|
75 |
-
if not text:
|
76 |
-
return ""
|
77 |
-
|
78 |
-
# 1. Unicode normalization (standard form)
|
79 |
-
text = unicodedata.normalize("NFKC", text)
|
80 |
-
|
81 |
-
# 2. Remove invisible control characters (except tabs)
|
82 |
-
text = re.sub(r'[\u200b-\u200f\u202a-\u202e\u2060-\u206f]', '', text)
|
83 |
-
|
84 |
-
# 3. Replace line/paragraph breaks and unicode separators with space
|
85 |
-
text = re.sub(r'[\r\n\u2028\u2029]+', ' ', text)
|
86 |
-
|
87 |
-
# 4. Collapse multiple spaces and tabs
|
88 |
-
text = re.sub(r'\s+', ' ', text)
|
89 |
-
|
90 |
-
# 5. Lowercase (optional, for BM25 or standard IR)
|
91 |
-
text = text.lower()
|
92 |
-
# 6. normalize to singular
|
93 |
-
|
94 |
-
# 7. Strip leading/trailing space
|
95 |
-
return text.strip()
|
96 |
-
|
97 |
-
def s_stripper(sent):
|
98 |
-
words = sent.split()
|
99 |
-
processed = []
|
100 |
-
|
101 |
-
for word in words:
|
102 |
-
if len(word) >= 3 and word.endswith('s'):
|
103 |
-
processed.append(word[:-1])
|
104 |
-
else:
|
105 |
-
processed.append(word)
|
106 |
-
|
107 |
-
return ' '.join(processed)
|
108 |
-
|
109 |
-
|
110 |
-
def tokenize(text):
|
111 |
-
text=s_stripper(text)
|
112 |
-
return text.lower().split()
|
113 |
-
|
114 |
-
BM25_vectors = []
|
115 |
-
|
116 |
-
for contract_json in tqdm(json_list, desc="Normalizing texts"):
|
117 |
-
|
118 |
-
docs = [normalize_text(item["text"]) for item in contract_json if item.get("text", "").strip()]
|
119 |
-
tokenized_docs = [tokenize(doc) for doc in docs]
|
120 |
-
|
121 |
-
bm25_index = BM25Okapi(tokenized_docs)
|
122 |
-
BM25_vectors.append(bm25_index)
|
123 |
-
|
124 |
-
def check_json(input_string: str) -> bool:
|
125 |
-
return "json" in input_string.lower()
|
126 |
-
|
127 |
-
|
128 |
-
embedding_path="conversational/ada3_embeddings"
|
129 |
-
|
130 |
-
vector_of_indices,f_names = build_vector_of_faiss_indices_from_folder(embedding_path)
|
131 |
-
|
132 |
-
contract_code_names = [
|
133 |
-
"PMC_A_Jacobs", # 0
|
134 |
-
"PMC_B_Hill", # 1
|
135 |
-
"PMC_C_Louis Berger", # 2
|
136 |
-
"DB_Red_Line_North_UG", # 3
|
137 |
-
"DB_Gold_Line_UG", # 4
|
138 |
-
"DB_Green_Line_UG", # 5
|
139 |
-
"DB_Red_Line_South_Elevated", # 6
|
140 |
-
"DB_Green_Line_Elevated" # 7
|
141 |
-
]
|
142 |
-
|
143 |
-
def Get_Context(final_indices: list[dict]) -> str:
|
144 |
-
|
145 |
-
contract_names = [contract_code_names[item["contract_index"]] for item in final_indices]
|
146 |
-
|
147 |
-
cxt = f"Number of contracts: {len(final_indices)}\nContract-names: {contract_names}\n"
|
148 |
-
|
149 |
-
for contract in final_indices:
|
150 |
-
i = contract["contract_index"]
|
151 |
-
page_indices = contract["page_indices"]
|
152 |
-
|
153 |
-
cxt += "\n#####\n"
|
154 |
-
meta_data = fetch_json(i, page_indices[0]) # Use the first page to get contract metadata
|
155 |
-
cxt += "contract_name: " + meta_data["contract_name"] + "\n"
|
156 |
-
|
157 |
-
for pos in page_indices:
|
158 |
-
page = fetch_json(i, pos)
|
159 |
-
cxt += (
|
160 |
-
"file_name: " + page["file_name"] + "\n" +
|
161 |
-
"path: " + page["path"] + "\n" +
|
162 |
-
"Page Number: " + str(page["page"]) + " " + page["text"] + "\n\n"
|
163 |
-
)
|
164 |
-
|
165 |
-
return cxt
|
166 |
-
|
167 |
-
def Get_Faiss_indices(
|
168 |
-
query: str,
|
169 |
-
contract_index: list[int],
|
170 |
-
vector_of_indices: list[faiss.IndexFlatIP],
|
171 |
-
K: int
|
172 |
-
) -> list[dict]:
|
173 |
-
|
174 |
-
vquery = np.array(generate_embeddings(query)).reshape(1, -1).astype('float32')
|
175 |
-
faiss.normalize_L2(vquery)
|
176 |
-
|
177 |
-
json_index = []
|
178 |
-
for i in contract_index:
|
179 |
-
index = vector_of_indices[i]
|
180 |
-
D, I = index.search(vquery, K)
|
181 |
-
json_index.append({"contract_index":i, "page_indices": I[0]})
|
182 |
-
|
183 |
-
|
184 |
-
return json_index
|
185 |
-
|
186 |
-
def Get_BM25_indices(
|
187 |
-
query: str,
|
188 |
-
contract_index: list[int],
|
189 |
-
bm25_vectors: list,
|
190 |
-
K: int
|
191 |
-
) -> list[dict]:
|
192 |
-
|
193 |
-
def tokenize(text):
|
194 |
-
return text.lower().split()
|
195 |
-
|
196 |
-
tokens = tokenize(query)
|
197 |
-
|
198 |
-
json_index=[]
|
199 |
-
for i in contract_index:
|
200 |
-
|
201 |
-
bm25 = bm25_vectors[i]
|
202 |
-
json_data = json_list[i]
|
203 |
-
scores = bm25.get_scores(tokens)
|
204 |
-
top_indices = np.argsort(scores)[::-1][:K]
|
205 |
-
|
206 |
-
json_index.append({"contract_index":i, "page_indices": top_indices})
|
207 |
-
|
208 |
-
return json_index
|
209 |
-
|
210 |
-
def merge_contracts_extended(obj1, obj2):
|
211 |
-
|
212 |
-
merged = defaultdict(set)
|
213 |
-
|
214 |
-
def expand_indices(indices):
|
215 |
-
# For each page, include page-1, page, page+1
|
216 |
-
expanded = set()
|
217 |
-
for p in indices:
|
218 |
-
expanded.update([p - 1, p, p + 1])
|
219 |
-
return expanded
|
220 |
-
|
221 |
-
# Add pages from obj1
|
222 |
-
for entry in obj1:
|
223 |
-
idx = entry['contract_index']
|
224 |
-
merged[idx].update(expand_indices(entry['page_indices']))
|
225 |
-
|
226 |
-
# Add pages from obj2
|
227 |
-
for entry in obj2:
|
228 |
-
idx = entry['contract_index']
|
229 |
-
merged[idx].update(expand_indices(entry['page_indices']))
|
230 |
-
|
231 |
-
# Convert sets to sorted lists
|
232 |
-
return [{'contract_index': idx, 'page_indices': sorted(pages)} for idx, pages in merged.items()]
|
233 |
-
|
234 |
-
|
235 |
-
def reciprocal_rank_fusion(bm25_indices, faiss_indices, Top_K=10, k=60):
|
236 |
-
|
237 |
-
rrf_scores = defaultdict(float)
|
238 |
-
|
239 |
-
def add_scores(source):
|
240 |
-
for contract in source:
|
241 |
-
contract_index = contract['contract_index']
|
242 |
-
pages = contract['page_indices']
|
243 |
-
for rank, page_index in enumerate(pages):
|
244 |
-
key = (contract_index, page_index)
|
245 |
-
rrf_scores[key] += 1 / (k + rank)
|
246 |
-
|
247 |
-
add_scores(bm25_indices)
|
248 |
-
add_scores(faiss_indices)
|
249 |
-
|
250 |
-
contract_pages = defaultdict(list)
|
251 |
-
for (contract_index, page_index), score in rrf_scores.items():
|
252 |
-
contract_pages[contract_index].append((page_index, score))
|
253 |
-
|
254 |
-
output = []
|
255 |
-
for contract_index, pages in contract_pages.items():
|
256 |
-
sorted_pages = sorted(pages, key=lambda x: x[1], reverse=True)[:Top_K]
|
257 |
-
page_indices = np.array([p[0] for p in sorted_pages], dtype=np.int64)
|
258 |
-
output.append({'contract_index': contract_index, 'page_indices': page_indices})
|
259 |
-
|
260 |
-
return output
|
261 |
-
|
262 |
-
def chat_gpt_Agentic_RAG(messages):
|
263 |
-
|
264 |
-
JSON_FLAG = messages.contracts
|
265 |
-
|
266 |
-
history = [{"role": m.role, "content": m.content} for m in messages.messages]
|
267 |
-
|
268 |
-
original_message= history[0]['content']
|
269 |
-
|
270 |
-
user_message = history[-1]["content"]
|
271 |
-
|
272 |
-
print("Histppry ", history)
|
273 |
-
print("Origina MSG ", original_message)
|
274 |
-
|
275 |
-
if not JSON_FLAG:
|
276 |
-
|
277 |
-
SYS_PROMPT = SYS_QRAIL_O4_plus
|
278 |
-
else:
|
279 |
-
SYS_PROMPT = f"""You are a helpful assistant that answers questions based on the provided context.
|
280 |
-
If you don't have enough information, ask for more details.\n context : {cxt}"""
|
281 |
-
|
282 |
-
history_openai_format = []
|
283 |
-
|
284 |
-
history_openai_format.append({"role": "system", "content": SYS_PROMPT})
|
285 |
-
|
286 |
-
history_openai_format.extend(history)
|
287 |
-
|
288 |
-
history_openai_format.append({"role": "user", "content": "Query :" + user_message})
|
289 |
-
|
290 |
-
response = call_gpt(history_openai_format)
|
291 |
-
|
292 |
-
json_response = response
|
293 |
-
|
294 |
-
if check_json(response) and not JSON_FLAG:
|
295 |
-
|
296 |
-
json_result=repair_json(response)
|
297 |
-
|
298 |
-
json_result=json.loads(json_result)
|
299 |
-
|
300 |
-
key_intent=call_gpt_intent(s_stripper(original_message))
|
301 |
-
|
302 |
-
n_contracts=len(json_result["contract_names"])
|
303 |
-
|
304 |
-
responses = []
|
305 |
-
|
306 |
-
for nc in range(n_contracts):
|
307 |
-
|
308 |
-
faiss_indices=Get_Faiss_indices(key_intent,[json_result["contract_indices"][nc]],vector_of_indices,5)
|
309 |
-
|
310 |
-
BM25_indices=Get_BM25_indices(key_intent,[json_result["contract_indices"][nc]],BM25_vectors,10)
|
311 |
-
|
312 |
-
final_indices = merge_contracts_extended(BM25_indices,faiss_indices)
|
313 |
-
|
314 |
-
cxt=Get_Context(final_indices)
|
315 |
-
|
316 |
-
# Total_tokens=count_tokens(cxt)
|
317 |
-
|
318 |
-
# response_agent = call_Context_Answer_per_contract(original_message, cxt)
|
319 |
-
|
320 |
-
async def event_stream():
|
321 |
-
response_agent = ""
|
322 |
-
for chunk in call_Context_Answer_per_contract(original_message, cxt):
|
323 |
-
await asyncio.sleep(0.08)
|
324 |
-
response_agent += chunk
|
325 |
-
|
326 |
-
yield json.dumps({"type": "stream", "data": {"ai_message": response_agent }}) + "\n"
|
327 |
-
|
328 |
-
responses.append(response_agent)
|
329 |
-
|
330 |
-
response = "\n\n".join(responses)
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
return response, json_response
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
# <<<<< GPTs >>>>>
|
339 |
-
def call_gpt(message_text):
|
340 |
-
completion = client.chat.completions.create(
|
341 |
-
model="gpt-4.1-mini",
|
342 |
-
# model="gpt-4o",
|
343 |
-
messages=message_text,
|
344 |
-
temperature=0.0,
|
345 |
-
max_tokens=1000,
|
346 |
-
top_p=0.95,
|
347 |
-
frequency_penalty=0,
|
348 |
-
presence_penalty=0,
|
349 |
-
stop=None,
|
350 |
-
)
|
351 |
-
return completion.choices[0].message.content
|
352 |
-
|
353 |
-
def call_gpt_intent(query):
|
354 |
-
|
355 |
-
SYS_Parse = """You are a simple keyword extraction assistant.
|
356 |
-
Given a query your task is to just strip and remove all the stop words, interrogative words punctuations, and leave the rest
|
357 |
-
All queries are related to Qatar Rail Project so **stop words** will include also irrelevant and redundant words
|
358 |
-
such as , UG , Underground , elevated , Gold line , Red line , Green line , Qatar Rail , Qatar Rail Project,
|
359 |
-
PMC (Project Management Consultant),..such terms will confuse the search and should be removed.
|
360 |
-
|
361 |
-
"""
|
362 |
-
|
363 |
-
message_text=[
|
364 |
-
{
|
365 |
-
"role": "system",
|
366 |
-
"content": SYS_Parse
|
367 |
-
},
|
368 |
-
{
|
369 |
-
"role": "user",
|
370 |
-
"content": query
|
371 |
-
},
|
372 |
-
|
373 |
-
]
|
374 |
-
|
375 |
-
completion = client.chat.completions.create(
|
376 |
-
model="gpt-4.1-mini",
|
377 |
-
messages = message_text,
|
378 |
-
temperature=0.0,
|
379 |
-
max_tokens=200,
|
380 |
-
top_p=0.95,
|
381 |
-
frequency_penalty=0,
|
382 |
-
presence_penalty=0,
|
383 |
-
stop=None
|
384 |
-
)
|
385 |
-
return completion.choices[0].message.content
|
386 |
-
|
387 |
-
def call_Context_Answer(query, context):
|
388 |
-
|
389 |
-
SYS_CONTRACT_SEL="""You are “Qatar Rail AI Assistant,” a friendly and smart
|
390 |
-
assistant that helps users find information in Qatar Rail contracts. You will be prvided with a context and a question
|
391 |
-
The context will contain information about one or more contracts.
|
392 |
-
The question will be a natural language question about the context.
|
393 |
-
Your task is to answer the question using the context provided.
|
394 |
-
Do not answer the question using your own knowledge.
|
395 |
-
**Output Format**:
|
396 |
-
- nicely formatted markdown text
|
397 |
-
- Use the contract names as headers for the sections of the answer
|
398 |
-
- Use bullet points to list the information
|
399 |
-
- Use bold text to highlight important information
|
400 |
-
- Provide a brief summary of the answer at the end if it's a single contract
|
401 |
-
- Provide a comparative table if it's multiple contracts
|
402 |
-
- add references to the files and page numbers in the context where the information was found.
|
403 |
-
"""
|
404 |
-
|
405 |
-
message_text=[
|
406 |
-
{
|
407 |
-
"role": "system",
|
408 |
-
"content": SYS_CONTRACT_SEL
|
409 |
-
},
|
410 |
-
{
|
411 |
-
"role": "user",
|
412 |
-
"content": f"Query {query} \n Context {context}"
|
413 |
-
},
|
414 |
-
|
415 |
-
]
|
416 |
-
|
417 |
-
completion = client.chat.completions.create(
|
418 |
-
model="gpt-4.1-mini",
|
419 |
-
messages = message_text,
|
420 |
-
temperature=0.0,
|
421 |
-
max_tokens=3500,
|
422 |
-
top_p=0.95,
|
423 |
-
frequency_penalty=0,
|
424 |
-
presence_penalty=0,
|
425 |
-
stop=None
|
426 |
-
)
|
427 |
-
return completion.choices[0].message.content
|
428 |
-
|
429 |
-
def call_Context_Answer_per_contract(query, context):
|
430 |
-
|
431 |
-
SYS_CONTRACT_SEL="""You are “Qatar Rail AI Assistant,” a friendly and smart
|
432 |
-
assistant that helps users find information in Qatar Rail contracts. You will be provided with a context and a question about
|
433 |
-
a single contract.
|
434 |
-
The question will be a natural language question about the context.
|
435 |
-
Your task is to answer the question using the context provided.
|
436 |
-
Do not answer the question using your own knowledge.unless only you were asked to provide a template notice
|
437 |
-
depending on the query intent.
|
438 |
-
If no clear answer can be found in the context, mention that the answer is not available.
|
439 |
-
|
440 |
-
|
441 |
-
**Output Format**:
|
442 |
-
- nicely formatted markdown text
|
443 |
-
- Use the contract names as headers with Bold for the sections of the answer
|
444 |
-
- Use bullet points to list the information
|
445 |
-
- Use bold text to highlight important information
|
446 |
-
- add references in bullets for , where the information was found in context
|
447 |
-
-- filenames
|
448 |
-
-- File Paths
|
449 |
-
-- page numbers
|
450 |
-
"""
|
451 |
-
|
452 |
-
message_text=[
|
453 |
-
{
|
454 |
-
"role": "system",
|
455 |
-
"content": SYS_CONTRACT_SEL
|
456 |
-
},
|
457 |
-
{
|
458 |
-
"role": "user",
|
459 |
-
"content": f"Query {query} \n Context {context}"
|
460 |
-
},
|
461 |
-
|
462 |
-
]
|
463 |
-
|
464 |
-
completion = client.chat.completions.create(
|
465 |
-
model="gpt-4o-mini",
|
466 |
-
messages = message_text,
|
467 |
-
temperature=0.0,
|
468 |
-
max_tokens=3500,
|
469 |
-
top_p=0.95,
|
470 |
-
frequency_penalty=0,
|
471 |
-
presence_penalty=0,
|
472 |
-
stop=None,
|
473 |
-
stream=True
|
474 |
-
)
|
475 |
-
|
476 |
-
|
477 |
-
for chunk in completion:
|
478 |
-
delta = chunk.choices[0].delta
|
479 |
-
if delta.content is not None:
|
480 |
-
yield delta.content
|
481 |
-
|
482 |
-
# return completion.choices[0].message.content
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
# <<<<< SYS_PROMPT >>>>>
|
488 |
-
SYS_QRAIL_O4_plus="""You are “Qatar Rail AI Assistant,” a friendly and smart assistant that helps users find information
|
489 |
-
in Qatar Rail contracts. Use conversational language, ask brief clarifying questions when needed,
|
490 |
-
and only emit your JSON once you’re sure of the user’s intent.
|
491 |
-
Background information:
|
492 |
-
1. Know your universe of contracts: indices, names and and their descriptions:
|
493 |
-
• 0,**PMC_A_Jacobs** – Project management consulting services by Jacobs Consulting
|
494 |
-
• 1,**PMC_B_Hill** – Project management consulting services by Hill International
|
495 |
-
2 **PMC_C_Louis Berger Egis Rail JV
|
496 |
-
• 3,**DB_Red_Line_North_UG** – Design-Build Construction for the Red Line North (underground)
|
497 |
-
• 4,**DB_Gold_Line_UG** – Design-Build Construction for the Gold Line (underground)
|
498 |
-
5, **DB_Green_Line_UG** – Design-Build Construction for the Green Line (underground)
|
499 |
-
• 6,**DB_Red_Line_South_Elevated** – Design-Build Construction for the Red Line South (Elevated)
|
500 |
-
• 7,**DB_Green_Line_Elevated** – Design-Build Construction for the Green Line (Elevated)
|
501 |
-
|
502 |
-
**PMC Contracts information**:
|
503 |
-
PMC contracts define the core legal framework between the client (e.g., a government or transportation authority) and
|
504 |
-
the appointed project management consultant. These agreements govern how consultants supervise project progress,
|
505 |
-
ensure quality control, manage risks, and act on behalf of the client during project execution.
|
506 |
-
They are not directly involved in construction or design, but in ensuring that those activities are executed per plan and standards.
|
507 |
-
**DB Contracts information**:
|
508 |
-
The DB contracts form the backbone of metro infrastructure delivery, comprising detailed and voluminous documentation across all project phases
|
509 |
-
— from planning, design, and tendering, to construction and reporting. They include:
|
510 |
-
Design requirements and standards
|
511 |
-
Contractual volumes and conditions
|
512 |
-
Site investigations and reports
|
513 |
-
provisional sums
|
514 |
-
Correspondence during tender and execution
|
515 |
-
These contracts cover end-to-end execution responsibilities including design, construction, and sometimes commissioning,
|
516 |
-
reflecting a turnkey model typical in large infrastructure works.
|
517 |
-
|
518 |
-
2. At each user turn:
|
519 |
-
- You should first identify the contract type (PMC or DB) if its a PMC list to the user the 3 PMC contracts and ask
|
520 |
-
him to choose one of them.
|
521 |
-
- use the above contracts information to guess the target of the query as either PMC and DB contracts
|
522 |
-
- provide this guess to the user as a hint by saying "your query seems to be related to {PMC or DB} contracts"
|
523 |
-
if its a DB contract list to the user the 5 DB contracts and ask him to choose one or more of them.
|
524 |
-
a. Try to determine if the user means:
|
525 |
-
– A single contract
|
526 |
-
– Multiple contracts
|
527 |
-
|
528 |
-
b. If you’re confident, respond immediately with **only** the JSON:
|
529 |
-
```json
|
530 |
-
{
|
531 |
-
|
532 |
-
"contract_names": [ /* one or more identifiers */ ],
|
533 |
-
"contract_indices": [ /* their index number according to the list / ]
|
534 |
-
}
|
535 |
-
```
|
536 |
-
c. If you’re not yet sure, ask **one** concise follow-up, using descriptions where helpful. Examples:
|
537 |
-
– “Just to confirm, are you looking for the project-management service by Jacobs or by Hill?”
|
538 |
-
– “Do you want details on the Red Line North or Red Line South construction?”
|
539 |
-
– “Would you like information on all of the DB construction contracts or a specific line?”
|
540 |
-
|
541 |
-
3. Once you’ve asked a clarification, wait for the user’s reply. Don’t ask any more questions unless it’s still ambiguous.
|
542 |
-
|
543 |
-
4. Keep your language natural and polite. You should feel like a helpful assistant, not a quizmaster.
|
544 |
-
|
545 |
-
—
|
546 |
-
Start now.
|
547 |
-
|
548 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|