Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
app update post reader and utils
Browse filesonce reader, retriever and utils methods are moved to respective scripts app is slimmed down
app.py
CHANGED
@@ -3,50 +3,37 @@ import pandas as pd
|
|
3 |
import logging
|
4 |
import asyncio
|
5 |
import os
|
6 |
-
import re
|
7 |
-
import json
|
8 |
from uuid import uuid4
|
9 |
from datetime import datetime
|
10 |
from pathlib import Path
|
11 |
from huggingface_hub import CommitScheduler
|
12 |
from auditqa.sample_questions import QUESTIONS
|
13 |
from auditqa.reports import files, report_list
|
14 |
-
from langchain.schema import (
|
15 |
-
HumanMessage,
|
16 |
-
SystemMessage,
|
17 |
-
)
|
18 |
-
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
19 |
-
from langchain_community.llms import HuggingFaceEndpoint
|
20 |
from auditqa.process_chunks import load_chunks, getconfig, get_local_qdrant
|
21 |
-
from
|
22 |
-
from
|
23 |
-
from
|
24 |
-
|
25 |
-
from qdrant_client.http import models as rest
|
26 |
from dotenv import load_dotenv
|
27 |
load_dotenv()
|
28 |
-
|
29 |
-
#
|
30 |
-
HF_token = os.environ["LLAMA_3_1"]
|
31 |
SPACES_LOG = os.environ["SPACES_LOG"]
|
|
|
|
|
32 |
# create the local logs repo
|
33 |
JSON_DATASET_DIR = Path("json_dataset")
|
34 |
JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
|
35 |
JSON_DATASET_PATH = JSON_DATASET_DIR / f"logs-{uuid4()}.json"
|
36 |
|
37 |
-
# the logs are written to dataset repo
|
38 |
# https://huggingface.co/spaces/Wauplin/space_to_dataset_saver
|
39 |
scheduler = CommitScheduler(
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
)
|
46 |
-
|
47 |
-
model_config = getconfig("model_params.cfg")
|
48 |
-
|
49 |
-
|
50 |
|
51 |
#### VECTOR STORE ####
|
52 |
# reports contain the already created chunks from Markdown version of pdf reports
|
@@ -54,68 +41,11 @@ model_config = getconfig("model_params.cfg")
|
|
54 |
# We need to create the local vectorstore collection once using load_chunks
|
55 |
# vectorestore colection are stored on persistent storage so this needs to be run only once
|
56 |
# hence, comment out line below when creating for first time
|
57 |
-
#
|
58 |
# once the vectore embeddings are created we will use qdrant client to access these
|
59 |
vectorstores = get_local_qdrant()
|
60 |
|
61 |
|
62 |
-
|
63 |
-
#### FUNCTIONS ####
|
64 |
-
# App UI and and its functionality is inspired and adapted from
|
65 |
-
# https://huggingface.co/spaces/Ekimetrics/climate-question-answering
|
66 |
-
|
67 |
-
|
68 |
-
def save_logs(logs) -> None:
|
69 |
-
""" Every interaction with app saves the log of question and answer,
|
70 |
-
this is to get the usage statistics of app and evaluate model performances
|
71 |
-
"""
|
72 |
-
with scheduler.lock:
|
73 |
-
with JSON_DATASET_PATH.open("a") as f:
|
74 |
-
json.dump(logs, f)
|
75 |
-
f.write("\n")
|
76 |
-
logging.info("logging done")
|
77 |
-
|
78 |
-
|
79 |
-
def make_html_source(source,i):
|
80 |
-
"""
|
81 |
-
takes the text and converts it into html format for display in "source" side tab
|
82 |
-
"""
|
83 |
-
meta = source.metadata
|
84 |
-
content = source.page_content.strip()
|
85 |
-
|
86 |
-
name = meta['filename']
|
87 |
-
card = f"""
|
88 |
-
<div class="card" id="doc{i}">
|
89 |
-
<div class="card-content">
|
90 |
-
<h2>Doc {i} - {meta['filename']} - Page {int(meta['page'])}</h2>
|
91 |
-
<p>{content}</p>
|
92 |
-
</div>
|
93 |
-
<div class="card-footer">
|
94 |
-
<span>{name}</span>
|
95 |
-
<a href="{meta['filename']}#page={int(meta['page'])}" target="_blank" class="pdf-link">
|
96 |
-
<span role="img" aria-label="Open PDF">🔗</span>
|
97 |
-
</a>
|
98 |
-
</div>
|
99 |
-
</div>
|
100 |
-
"""
|
101 |
-
|
102 |
-
return card
|
103 |
-
|
104 |
-
def parse_output_llm_with_sources(output):
|
105 |
-
# Split the content into a list of text and "[Doc X]" references
|
106 |
-
content_parts = re.split(r'\[(Doc\s?\d+(?:,\s?Doc\s?\d+)*)\]', output)
|
107 |
-
parts = []
|
108 |
-
for part in content_parts:
|
109 |
-
if part.startswith("Doc"):
|
110 |
-
subparts = part.split(",")
|
111 |
-
subparts = [subpart.lower().replace("doc","").strip() for subpart in subparts]
|
112 |
-
subparts = [f"""<a href="#doc{subpart}" class="a-doc-ref" target="_self"><span class='doc-ref'><sup>{subpart}</sup></span></a>""" for subpart in subparts]
|
113 |
-
parts.append("".join(subparts))
|
114 |
-
else:
|
115 |
-
parts.append(part)
|
116 |
-
content_parts = "".join(parts)
|
117 |
-
return content_parts
|
118 |
-
|
119 |
def start_chat(query,history):
|
120 |
history = history + [(query,None)]
|
121 |
history = [tuple(x) for x in history]
|
@@ -141,64 +71,18 @@ async def chat(query,history,sources,reports,subtype,year):
|
|
141 |
|
142 |
##------------------------fetch collection from vectorstore------------------------------
|
143 |
vectorstore = vectorstores["allreports"]
|
144 |
-
##---------------------construct filter for metdata filtering---------------------------
|
145 |
-
if len(reports) == 0:
|
146 |
-
("defining filter for:{}:{}:{}".format(sources,subtype,year))
|
147 |
-
filter=rest.Filter(
|
148 |
-
must=[rest.FieldCondition(
|
149 |
-
key="metadata.source",
|
150 |
-
match=rest.MatchValue(value=sources)
|
151 |
-
),
|
152 |
-
rest.FieldCondition(
|
153 |
-
key="metadata.subtype",
|
154 |
-
match=rest.MatchValue(value=subtype)
|
155 |
-
),
|
156 |
-
rest.FieldCondition(
|
157 |
-
key="metadata.year",
|
158 |
-
match=rest.MatchAny(any=year)
|
159 |
-
),])
|
160 |
-
else:
|
161 |
-
print("defining filter for allreports:",reports)
|
162 |
-
filter=rest.Filter(
|
163 |
-
must=[
|
164 |
-
rest.FieldCondition(
|
165 |
-
key="metadata.filename",
|
166 |
-
match=rest.MatchAny(any=reports)
|
167 |
-
)])
|
168 |
-
|
169 |
|
170 |
##------------------------------get context----------------------------------------------
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
for
|
175 |
-
# similarity score threshold can be used to make adjustments in quality and quantity for Retriever
|
176 |
-
# However need to make balancing, as retrieved results are again used by Ranker to fetch best among
|
177 |
-
# retreived results
|
178 |
-
retriever = vectorstore.as_retriever(
|
179 |
-
search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.6,
|
180 |
-
"k": int(model_config.get('retriever','TOP_K')),
|
181 |
-
"filter":filter})
|
182 |
-
model = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base")
|
183 |
-
compressor = CrossEncoderReranker(model=model, top_n=3)
|
184 |
-
compression_retriever = ContextualCompressionRetriever(
|
185 |
-
base_compressor=compressor, base_retriever=retriever
|
186 |
-
)
|
187 |
-
context_retrieved = compression_retriever.invoke(question)
|
188 |
-
logging.info(len(context_retrieved))
|
189 |
-
for doc in context_retrieved:
|
190 |
-
logging.info(doc.metadata)
|
191 |
|
192 |
-
def format_docs(docs):
|
193 |
-
return "\n\n".join(doc.page_content for doc in docs)
|
194 |
-
|
195 |
-
context_retrieved_formatted = format_docs(context_retrieved)
|
196 |
-
context_retrieved_lst.append(context_retrieved_formatted)
|
197 |
-
|
198 |
##------------------- -------------Prompt--------------------------------------------------
|
199 |
SYSTEM_PROMPT = """
|
200 |
You are AuditQ&A, an AI Assistant created by Auditors and Data Scientist. You are given a question and extracted passages of the consolidated/departmental/thematic focus audit reports. Provide a clear and structured answer based on the passages/context provided and the guidelines.
|
201 |
Guidelines:
|
|
|
202 |
- If the passages have useful facts or numbers, use them in your answer.
|
203 |
- When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
|
204 |
- Do not use the sentence 'Doc i says ...' to say where information came from.
|
@@ -215,87 +99,59 @@ async def chat(query,history,sources,reports,subtype,year):
|
|
215 |
Question: {question} - Explained to audit expert
|
216 |
Answer in english with the passages citations:
|
217 |
""".format(context = context_retrieved_lst, question=query)
|
|
|
|
|
|
|
218 |
|
219 |
-
|
220 |
-
SystemMessage(content=SYSTEM_PROMPT),
|
221 |
-
HumanMessage(
|
222 |
-
content=USER_PROMPT
|
223 |
-
),]
|
224 |
-
|
225 |
-
##-----------------------getting inference endpoints------------------------------
|
226 |
-
|
227 |
-
# Set up the streaming callback handler
|
228 |
-
callback = StreamingStdOutCallbackHandler()
|
229 |
-
|
230 |
-
# Initialize the HuggingFaceEndpoint with streaming enabled
|
231 |
-
llm_qa = HuggingFaceEndpoint(
|
232 |
-
endpoint_url=model_config.get('reader', 'ENDPOINT'),
|
233 |
-
max_new_tokens=512,
|
234 |
-
repetition_penalty=1.03,
|
235 |
-
timeout=70,
|
236 |
-
huggingfacehub_api_token=HF_token,
|
237 |
-
streaming=True, # Enable streaming for real-time token generation
|
238 |
-
callbacks=[callback] # Add the streaming callback handler
|
239 |
-
)
|
240 |
-
|
241 |
-
# Create a ChatHuggingFace instance with the streaming-enabled endpoint
|
242 |
-
chat_model = ChatHuggingFace(llm=llm_qa)
|
243 |
-
|
244 |
-
# Prepare the HTML for displaying source documents
|
245 |
docs_html = []
|
246 |
for i, d in enumerate(context_retrieved, 1):
|
247 |
docs_html.append(make_html_source(d, i))
|
248 |
docs_html = "".join(docs_html)
|
249 |
|
250 |
-
|
251 |
answer_yet = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
|
253 |
-
|
254 |
-
|
|
|
255 |
# Without nonlocal, Python would create a new local variable answer_yet inside process_stream(), instead of modifying the one from the outer scope.
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
# #callbacks = [StreamingStdOutCallbackHandler()]
|
270 |
-
# llm_qa = HuggingFaceEndpoint(
|
271 |
-
# endpoint_url= model_config.get('reader','ENDPOINT'),
|
272 |
-
# max_new_tokens=512,
|
273 |
-
# repetition_penalty=1.03,
|
274 |
-
# timeout=70,
|
275 |
-
# huggingfacehub_api_token=HF_token,)
|
276 |
-
|
277 |
-
# # create RAG
|
278 |
-
# chat_model = ChatHuggingFace(llm=llm_qa)
|
279 |
-
|
280 |
-
# ##-------------------------- get answers ---------------------------------------
|
281 |
-
# answer_lst = []
|
282 |
-
# for question, context in zip(question_lst , context_retrieved_lst):
|
283 |
-
# answer = chat_model.invoke(messages)
|
284 |
-
# answer_lst.append(answer.content)
|
285 |
-
# docs_html = []
|
286 |
-
# for i, d in enumerate(context_retrieved, 1):
|
287 |
-
# docs_html.append(make_html_source(d, i))
|
288 |
-
# docs_html = "".join(docs_html)
|
289 |
-
|
290 |
-
# previous_answer = history[-1][1]
|
291 |
-
# previous_answer = previous_answer if previous_answer is not None else ""
|
292 |
-
# answer_yet = previous_answer + answer_lst[0]
|
293 |
-
# answer_yet = parse_output_llm_with_sources(answer_yet)
|
294 |
-
# history[-1] = (query,answer_yet)
|
295 |
-
|
296 |
-
# history = [tuple(x) for x in history]
|
297 |
-
|
298 |
-
# yield history,docs_html
|
299 |
|
300 |
# logging the event
|
301 |
try:
|
@@ -309,7 +165,8 @@ async def chat(query,history,sources,reports,subtype,year):
|
|
309 |
"question":query,
|
310 |
"sources":sources,
|
311 |
"retriever":model_config.get('retriever','MODEL'),
|
312 |
-
"
|
|
|
313 |
"docs":[doc.page_content for doc in context_retrieved],
|
314 |
"answer": history[-1][1],
|
315 |
"time": timestamp,
|
|
|
3 |
import logging
|
4 |
import asyncio
|
5 |
import os
|
|
|
|
|
6 |
from uuid import uuid4
|
7 |
from datetime import datetime
|
8 |
from pathlib import Path
|
9 |
from huggingface_hub import CommitScheduler
|
10 |
from auditqa.sample_questions import QUESTIONS
|
11 |
from auditqa.reports import files, report_list
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
from auditqa.process_chunks import load_chunks, getconfig, get_local_qdrant
|
13 |
+
from auditqa.retriever import get_context
|
14 |
+
from auditqa.reader import nvidia_client, dedicated_endpoint
|
15 |
+
from auditqa.utils import make_html_source, parse_output_llm_with_sources, save_logs, get_message_template
|
16 |
+
|
|
|
17 |
from dotenv import load_dotenv
|
18 |
load_dotenv()
|
19 |
+
|
20 |
+
# fetch tokens and model config params
|
|
|
21 |
SPACES_LOG = os.environ["SPACES_LOG"]
|
22 |
+
model_config = getconfig("model_params.cfg")
|
23 |
+
|
24 |
# create the local logs repo
|
25 |
JSON_DATASET_DIR = Path("json_dataset")
|
26 |
JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
|
27 |
JSON_DATASET_PATH = JSON_DATASET_DIR / f"logs-{uuid4()}.json"
|
28 |
|
29 |
+
# the logs are written to dataset repo periodically from local logs
|
30 |
# https://huggingface.co/spaces/Wauplin/space_to_dataset_saver
|
31 |
scheduler = CommitScheduler(
|
32 |
+
repo_id="GIZ/spaces_logs",
|
33 |
+
repo_type="dataset",
|
34 |
+
folder_path=JSON_DATASET_DIR,
|
35 |
+
path_in_repo="audit_chatbot",
|
36 |
+
token=SPACES_LOG )
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
#### VECTOR STORE ####
|
39 |
# reports contain the already created chunks from Markdown version of pdf reports
|
|
|
41 |
# We need to create the local vectorstore collection once using load_chunks
|
42 |
# vectorestore colection are stored on persistent storage so this needs to be run only once
|
43 |
# hence, comment out line below when creating for first time
|
44 |
+
#vectorstores = load_chunks()
|
45 |
# once the vectore embeddings are created we will use qdrant client to access these
|
46 |
vectorstores = get_local_qdrant()
|
47 |
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
def start_chat(query,history):
|
50 |
history = history + [(query,None)]
|
51 |
history = [tuple(x) for x in history]
|
|
|
71 |
|
72 |
##------------------------fetch collection from vectorstore------------------------------
|
73 |
vectorstore = vectorstores["allreports"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
##------------------------------get context----------------------------------------------
|
76 |
+
context_retrieved = get_context(vectorstore=vectorstore,query=query,reports=reports,
|
77 |
+
sources=sources,subtype=subtype,year=year)
|
78 |
+
context_retrieved_formatted = "||".join(doc.page_content for doc in context_retrieved)
|
79 |
+
context_retrieved_lst = [doc.page_content for doc in context_retrieved]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
##------------------- -------------Prompt--------------------------------------------------
|
82 |
SYSTEM_PROMPT = """
|
83 |
You are AuditQ&A, an AI Assistant created by Auditors and Data Scientist. You are given a question and extracted passages of the consolidated/departmental/thematic focus audit reports. Provide a clear and structured answer based on the passages/context provided and the guidelines.
|
84 |
Guidelines:
|
85 |
+
- Passeges are provided as comma separated list of strings
|
86 |
- If the passages have useful facts or numbers, use them in your answer.
|
87 |
- When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
|
88 |
- Do not use the sentence 'Doc i says ...' to say where information came from.
|
|
|
99 |
Question: {question} - Explained to audit expert
|
100 |
Answer in english with the passages citations:
|
101 |
""".format(context = context_retrieved_lst, question=query)
|
102 |
+
|
103 |
+
##-------------------- apply message template ------------------------------
|
104 |
+
messages = get_message_template(model_config.get('reader','TYPE'),SYSTEM_PROMPT,USER_PROMPT)
|
105 |
|
106 |
+
## -----------------Prepare HTML for displaying source documents --------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
docs_html = []
|
108 |
for i, d in enumerate(context_retrieved, 1):
|
109 |
docs_html.append(make_html_source(d, i))
|
110 |
docs_html = "".join(docs_html)
|
111 |
|
112 |
+
##-----------------------get answer from endpoints------------------------------
|
113 |
answer_yet = ""
|
114 |
+
if model_config.get('reader','TYPE') == 'NVIDIA':
|
115 |
+
chat_model = nvidia_client()
|
116 |
+
async def process_stream():
|
117 |
+
nonlocal answer_yet
|
118 |
+
# Without nonlocal, Python would create a new local variable answer_yet inside process_stream(), instead of modifying the one from the outer scope.
|
119 |
+
#nonlocal answer_yet # Use the outer scope's answer_yet variable
|
120 |
+
# Iterate over the streaming response chunks
|
121 |
+
response = chat_model.chat_completion(
|
122 |
+
model=model_config.get("reader","NVIDIA_MODEL"),
|
123 |
+
messages=messages,
|
124 |
+
stream=True,
|
125 |
+
max_tokens=int(model_config.get('reader','MAX_TOKENS')),
|
126 |
+
)
|
127 |
+
for message in response:
|
128 |
+
token = message.choices[0].delta.content
|
129 |
+
if token:
|
130 |
+
answer_yet += token
|
131 |
+
parsed_answer = parse_output_llm_with_sources(answer_yet)
|
132 |
+
history[-1] = (query, parsed_answer)
|
133 |
+
yield [tuple(x) for x in history], docs_html
|
134 |
+
|
135 |
+
# Stream the response updates
|
136 |
+
async for update in process_stream():
|
137 |
+
yield update
|
138 |
|
139 |
+
else:
|
140 |
+
chat_model = dedicated_endpoint()
|
141 |
+
async def process_stream():
|
142 |
# Without nonlocal, Python would create a new local variable answer_yet inside process_stream(), instead of modifying the one from the outer scope.
|
143 |
+
nonlocal answer_yet # Use the outer scope's answer_yet variable
|
144 |
+
# Iterate over the streaming response chunks
|
145 |
+
async for chunk in chat_model.astream(messages):
|
146 |
+
token = chunk.content
|
147 |
+
answer_yet += token
|
148 |
+
parsed_answer = parse_output_llm_with_sources(answer_yet)
|
149 |
+
history[-1] = (query, parsed_answer)
|
150 |
+
yield [tuple(x) for x in history], docs_html
|
151 |
+
|
152 |
+
# Stream the response updates
|
153 |
+
async for update in process_stream():
|
154 |
+
yield update
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
# logging the event
|
157 |
try:
|
|
|
165 |
"question":query,
|
166 |
"sources":sources,
|
167 |
"retriever":model_config.get('retriever','MODEL'),
|
168 |
+
"endpoint_type":model_config.get('reader','TYPE')
|
169 |
+
"raeder":model_config.get('reader','NVIDIA_MODEL'),
|
170 |
"docs":[doc.page_content for doc in context_retrieved],
|
171 |
"answer": history[-1][1],
|
172 |
"time": timestamp,
|