ppsingh commited on
Commit
5a4f54c
1 Parent(s): 06bf5a5

app update post reader and utils

Browse files

once reader, retriever and utils methods are moved to respective scripts app is slimmed down

Files changed (1) hide show
  1. app.py +66 -209
app.py CHANGED
@@ -3,50 +3,37 @@ import pandas as pd
3
  import logging
4
  import asyncio
5
  import os
6
- import re
7
- import json
8
  from uuid import uuid4
9
  from datetime import datetime
10
  from pathlib import Path
11
  from huggingface_hub import CommitScheduler
12
  from auditqa.sample_questions import QUESTIONS
13
  from auditqa.reports import files, report_list
14
- from langchain.schema import (
15
- HumanMessage,
16
- SystemMessage,
17
- )
18
- from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
19
- from langchain_community.llms import HuggingFaceEndpoint
20
  from auditqa.process_chunks import load_chunks, getconfig, get_local_qdrant
21
- from langchain_community.chat_models.huggingface import ChatHuggingFace
22
- from langchain.retrievers import ContextualCompressionRetriever
23
- from langchain.retrievers.document_compressors import CrossEncoderReranker
24
- from langchain_community.cross_encoders import HuggingFaceCrossEncoder
25
- from qdrant_client.http import models as rest
26
  from dotenv import load_dotenv
27
  load_dotenv()
28
- # token to allow acces to Hub, This token should also be
29
- # valid fo calls made to Inference endpoints
30
- HF_token = os.environ["LLAMA_3_1"]
31
  SPACES_LOG = os.environ["SPACES_LOG"]
 
 
32
  # create the local logs repo
33
  JSON_DATASET_DIR = Path("json_dataset")
34
  JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
35
  JSON_DATASET_PATH = JSON_DATASET_DIR / f"logs-{uuid4()}.json"
36
 
37
- # the logs are written to dataset repo
38
  # https://huggingface.co/spaces/Wauplin/space_to_dataset_saver
39
  scheduler = CommitScheduler(
40
- repo_id="GIZ/spaces_logs",
41
- repo_type="dataset",
42
- folder_path=JSON_DATASET_DIR,
43
- path_in_repo="audit_chatbot",
44
- token=SPACES_LOG
45
- )
46
-
47
- model_config = getconfig("model_params.cfg")
48
-
49
-
50
 
51
  #### VECTOR STORE ####
52
  # reports contain the already created chunks from Markdown version of pdf reports
@@ -54,68 +41,11 @@ model_config = getconfig("model_params.cfg")
54
  # We need to create the local vectorstore collection once using load_chunks
55
  # vectorestore colection are stored on persistent storage so this needs to be run only once
56
  # hence, comment out line below when creating for first time
57
- # vectorstores = load_chunks()
58
  # once the vectore embeddings are created we will use qdrant client to access these
59
  vectorstores = get_local_qdrant()
60
 
61
 
62
-
63
- #### FUNCTIONS ####
64
- # App UI and and its functionality is inspired and adapted from
65
- # https://huggingface.co/spaces/Ekimetrics/climate-question-answering
66
-
67
-
68
- def save_logs(logs) -> None:
69
- """ Every interaction with app saves the log of question and answer,
70
- this is to get the usage statistics of app and evaluate model performances
71
- """
72
- with scheduler.lock:
73
- with JSON_DATASET_PATH.open("a") as f:
74
- json.dump(logs, f)
75
- f.write("\n")
76
- logging.info("logging done")
77
-
78
-
79
- def make_html_source(source,i):
80
- """
81
- takes the text and converts it into html format for display in "source" side tab
82
- """
83
- meta = source.metadata
84
- content = source.page_content.strip()
85
-
86
- name = meta['filename']
87
- card = f"""
88
- <div class="card" id="doc{i}">
89
- <div class="card-content">
90
- <h2>Doc {i} - {meta['filename']} - Page {int(meta['page'])}</h2>
91
- <p>{content}</p>
92
- </div>
93
- <div class="card-footer">
94
- <span>{name}</span>
95
- <a href="{meta['filename']}#page={int(meta['page'])}" target="_blank" class="pdf-link">
96
- <span role="img" aria-label="Open PDF">🔗</span>
97
- </a>
98
- </div>
99
- </div>
100
- """
101
-
102
- return card
103
-
104
- def parse_output_llm_with_sources(output):
105
- # Split the content into a list of text and "[Doc X]" references
106
- content_parts = re.split(r'\[(Doc\s?\d+(?:,\s?Doc\s?\d+)*)\]', output)
107
- parts = []
108
- for part in content_parts:
109
- if part.startswith("Doc"):
110
- subparts = part.split(",")
111
- subparts = [subpart.lower().replace("doc","").strip() for subpart in subparts]
112
- subparts = [f"""<a href="#doc{subpart}" class="a-doc-ref" target="_self"><span class='doc-ref'><sup>{subpart}</sup></span></a>""" for subpart in subparts]
113
- parts.append("".join(subparts))
114
- else:
115
- parts.append(part)
116
- content_parts = "".join(parts)
117
- return content_parts
118
-
119
  def start_chat(query,history):
120
  history = history + [(query,None)]
121
  history = [tuple(x) for x in history]
@@ -141,64 +71,18 @@ async def chat(query,history,sources,reports,subtype,year):
141
 
142
  ##------------------------fetch collection from vectorstore------------------------------
143
  vectorstore = vectorstores["allreports"]
144
- ##---------------------construct filter for metdata filtering---------------------------
145
- if len(reports) == 0:
146
- ("defining filter for:{}:{}:{}".format(sources,subtype,year))
147
- filter=rest.Filter(
148
- must=[rest.FieldCondition(
149
- key="metadata.source",
150
- match=rest.MatchValue(value=sources)
151
- ),
152
- rest.FieldCondition(
153
- key="metadata.subtype",
154
- match=rest.MatchValue(value=subtype)
155
- ),
156
- rest.FieldCondition(
157
- key="metadata.year",
158
- match=rest.MatchAny(any=year)
159
- ),])
160
- else:
161
- print("defining filter for allreports:",reports)
162
- filter=rest.Filter(
163
- must=[
164
- rest.FieldCondition(
165
- key="metadata.filename",
166
- match=rest.MatchAny(any=reports)
167
- )])
168
-
169
 
170
  ##------------------------------get context----------------------------------------------
171
- context_retrieved_lst = []
172
- question_lst= [query]
173
-
174
- for question in question_lst:
175
- # similarity score threshold can be used to make adjustments in quality and quantity for Retriever
176
- # However need to make balancing, as retrieved results are again used by Ranker to fetch best among
177
- # retreived results
178
- retriever = vectorstore.as_retriever(
179
- search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.6,
180
- "k": int(model_config.get('retriever','TOP_K')),
181
- "filter":filter})
182
- model = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base")
183
- compressor = CrossEncoderReranker(model=model, top_n=3)
184
- compression_retriever = ContextualCompressionRetriever(
185
- base_compressor=compressor, base_retriever=retriever
186
- )
187
- context_retrieved = compression_retriever.invoke(question)
188
- logging.info(len(context_retrieved))
189
- for doc in context_retrieved:
190
- logging.info(doc.metadata)
191
 
192
- def format_docs(docs):
193
- return "\n\n".join(doc.page_content for doc in docs)
194
-
195
- context_retrieved_formatted = format_docs(context_retrieved)
196
- context_retrieved_lst.append(context_retrieved_formatted)
197
-
198
  ##------------------- -------------Prompt--------------------------------------------------
199
  SYSTEM_PROMPT = """
200
  You are AuditQ&A, an AI Assistant created by Auditors and Data Scientist. You are given a question and extracted passages of the consolidated/departmental/thematic focus audit reports. Provide a clear and structured answer based on the passages/context provided and the guidelines.
201
  Guidelines:
 
202
  - If the passages have useful facts or numbers, use them in your answer.
203
  - When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
204
  - Do not use the sentence 'Doc i says ...' to say where information came from.
@@ -215,87 +99,59 @@ async def chat(query,history,sources,reports,subtype,year):
215
  Question: {question} - Explained to audit expert
216
  Answer in english with the passages citations:
217
  """.format(context = context_retrieved_lst, question=query)
 
 
 
218
 
219
- messages = [
220
- SystemMessage(content=SYSTEM_PROMPT),
221
- HumanMessage(
222
- content=USER_PROMPT
223
- ),]
224
-
225
- ##-----------------------getting inference endpoints------------------------------
226
-
227
- # Set up the streaming callback handler
228
- callback = StreamingStdOutCallbackHandler()
229
-
230
- # Initialize the HuggingFaceEndpoint with streaming enabled
231
- llm_qa = HuggingFaceEndpoint(
232
- endpoint_url=model_config.get('reader', 'ENDPOINT'),
233
- max_new_tokens=512,
234
- repetition_penalty=1.03,
235
- timeout=70,
236
- huggingfacehub_api_token=HF_token,
237
- streaming=True, # Enable streaming for real-time token generation
238
- callbacks=[callback] # Add the streaming callback handler
239
- )
240
-
241
- # Create a ChatHuggingFace instance with the streaming-enabled endpoint
242
- chat_model = ChatHuggingFace(llm=llm_qa)
243
-
244
- # Prepare the HTML for displaying source documents
245
  docs_html = []
246
  for i, d in enumerate(context_retrieved, 1):
247
  docs_html.append(make_html_source(d, i))
248
  docs_html = "".join(docs_html)
249
 
250
- # Initialize the variable to store the accumulated answer
251
  answer_yet = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
 
253
- # Define an asynchronous generator function to process the streaming response
254
- async def process_stream():
 
255
  # Without nonlocal, Python would create a new local variable answer_yet inside process_stream(), instead of modifying the one from the outer scope.
256
- nonlocal answer_yet # Use the outer scope's answer_yet variable
257
- # Iterate over the streaming response chunks
258
- async for chunk in chat_model.astream(messages):
259
- token = chunk.content
260
- answer_yet += token
261
- parsed_answer = parse_output_llm_with_sources(answer_yet)
262
- history[-1] = (query, parsed_answer)
263
- yield [tuple(x) for x in history], docs_html
264
-
265
- # Stream the response updates
266
- async for update in process_stream():
267
- yield update
268
-
269
- # #callbacks = [StreamingStdOutCallbackHandler()]
270
- # llm_qa = HuggingFaceEndpoint(
271
- # endpoint_url= model_config.get('reader','ENDPOINT'),
272
- # max_new_tokens=512,
273
- # repetition_penalty=1.03,
274
- # timeout=70,
275
- # huggingfacehub_api_token=HF_token,)
276
-
277
- # # create RAG
278
- # chat_model = ChatHuggingFace(llm=llm_qa)
279
-
280
- # ##-------------------------- get answers ---------------------------------------
281
- # answer_lst = []
282
- # for question, context in zip(question_lst , context_retrieved_lst):
283
- # answer = chat_model.invoke(messages)
284
- # answer_lst.append(answer.content)
285
- # docs_html = []
286
- # for i, d in enumerate(context_retrieved, 1):
287
- # docs_html.append(make_html_source(d, i))
288
- # docs_html = "".join(docs_html)
289
-
290
- # previous_answer = history[-1][1]
291
- # previous_answer = previous_answer if previous_answer is not None else ""
292
- # answer_yet = previous_answer + answer_lst[0]
293
- # answer_yet = parse_output_llm_with_sources(answer_yet)
294
- # history[-1] = (query,answer_yet)
295
-
296
- # history = [tuple(x) for x in history]
297
-
298
- # yield history,docs_html
299
 
300
  # logging the event
301
  try:
@@ -309,7 +165,8 @@ async def chat(query,history,sources,reports,subtype,year):
309
  "question":query,
310
  "sources":sources,
311
  "retriever":model_config.get('retriever','MODEL'),
312
- "raeder":model_config.get('reader','MODEL'),
 
313
  "docs":[doc.page_content for doc in context_retrieved],
314
  "answer": history[-1][1],
315
  "time": timestamp,
 
3
  import logging
4
  import asyncio
5
  import os
 
 
6
  from uuid import uuid4
7
  from datetime import datetime
8
  from pathlib import Path
9
  from huggingface_hub import CommitScheduler
10
  from auditqa.sample_questions import QUESTIONS
11
  from auditqa.reports import files, report_list
 
 
 
 
 
 
12
  from auditqa.process_chunks import load_chunks, getconfig, get_local_qdrant
13
+ from auditqa.retriever import get_context
14
+ from auditqa.reader import nvidia_client, dedicated_endpoint
15
+ from auditqa.utils import make_html_source, parse_output_llm_with_sources, save_logs, get_message_template
16
+
 
17
  from dotenv import load_dotenv
18
  load_dotenv()
19
+
20
+ # fetch tokens and model config params
 
21
  SPACES_LOG = os.environ["SPACES_LOG"]
22
+ model_config = getconfig("model_params.cfg")
23
+
24
  # create the local logs repo
25
  JSON_DATASET_DIR = Path("json_dataset")
26
  JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
27
  JSON_DATASET_PATH = JSON_DATASET_DIR / f"logs-{uuid4()}.json"
28
 
29
+ # the logs are written to dataset repo periodically from local logs
30
  # https://huggingface.co/spaces/Wauplin/space_to_dataset_saver
31
  scheduler = CommitScheduler(
32
+ repo_id="GIZ/spaces_logs",
33
+ repo_type="dataset",
34
+ folder_path=JSON_DATASET_DIR,
35
+ path_in_repo="audit_chatbot",
36
+ token=SPACES_LOG )
 
 
 
 
 
37
 
38
  #### VECTOR STORE ####
39
  # reports contain the already created chunks from Markdown version of pdf reports
 
41
  # We need to create the local vectorstore collection once using load_chunks
42
  # vectorestore colection are stored on persistent storage so this needs to be run only once
43
  # hence, comment out line below when creating for first time
44
+ #vectorstores = load_chunks()
45
  # once the vectore embeddings are created we will use qdrant client to access these
46
  vectorstores = get_local_qdrant()
47
 
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  def start_chat(query,history):
50
  history = history + [(query,None)]
51
  history = [tuple(x) for x in history]
 
71
 
72
  ##------------------------fetch collection from vectorstore------------------------------
73
  vectorstore = vectorstores["allreports"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  ##------------------------------get context----------------------------------------------
76
+ context_retrieved = get_context(vectorstore=vectorstore,query=query,reports=reports,
77
+ sources=sources,subtype=subtype,year=year)
78
+ context_retrieved_formatted = "||".join(doc.page_content for doc in context_retrieved)
79
+ context_retrieved_lst = [doc.page_content for doc in context_retrieved]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
 
 
 
 
 
 
81
  ##------------------- -------------Prompt--------------------------------------------------
82
  SYSTEM_PROMPT = """
83
  You are AuditQ&A, an AI Assistant created by Auditors and Data Scientist. You are given a question and extracted passages of the consolidated/departmental/thematic focus audit reports. Provide a clear and structured answer based on the passages/context provided and the guidelines.
84
  Guidelines:
85
+ - Passeges are provided as comma separated list of strings
86
  - If the passages have useful facts or numbers, use them in your answer.
87
  - When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
88
  - Do not use the sentence 'Doc i says ...' to say where information came from.
 
99
  Question: {question} - Explained to audit expert
100
  Answer in english with the passages citations:
101
  """.format(context = context_retrieved_lst, question=query)
102
+
103
+ ##-------------------- apply message template ------------------------------
104
+ messages = get_message_template(model_config.get('reader','TYPE'),SYSTEM_PROMPT,USER_PROMPT)
105
 
106
+ ## -----------------Prepare HTML for displaying source documents --------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  docs_html = []
108
  for i, d in enumerate(context_retrieved, 1):
109
  docs_html.append(make_html_source(d, i))
110
  docs_html = "".join(docs_html)
111
 
112
+ ##-----------------------get answer from endpoints------------------------------
113
  answer_yet = ""
114
+ if model_config.get('reader','TYPE') == 'NVIDIA':
115
+ chat_model = nvidia_client()
116
+ async def process_stream():
117
+ nonlocal answer_yet
118
+ # Without nonlocal, Python would create a new local variable answer_yet inside process_stream(), instead of modifying the one from the outer scope.
119
+ #nonlocal answer_yet # Use the outer scope's answer_yet variable
120
+ # Iterate over the streaming response chunks
121
+ response = chat_model.chat_completion(
122
+ model=model_config.get("reader","NVIDIA_MODEL"),
123
+ messages=messages,
124
+ stream=True,
125
+ max_tokens=int(model_config.get('reader','MAX_TOKENS')),
126
+ )
127
+ for message in response:
128
+ token = message.choices[0].delta.content
129
+ if token:
130
+ answer_yet += token
131
+ parsed_answer = parse_output_llm_with_sources(answer_yet)
132
+ history[-1] = (query, parsed_answer)
133
+ yield [tuple(x) for x in history], docs_html
134
+
135
+ # Stream the response updates
136
+ async for update in process_stream():
137
+ yield update
138
 
139
+ else:
140
+ chat_model = dedicated_endpoint()
141
+ async def process_stream():
142
  # Without nonlocal, Python would create a new local variable answer_yet inside process_stream(), instead of modifying the one from the outer scope.
143
+ nonlocal answer_yet # Use the outer scope's answer_yet variable
144
+ # Iterate over the streaming response chunks
145
+ async for chunk in chat_model.astream(messages):
146
+ token = chunk.content
147
+ answer_yet += token
148
+ parsed_answer = parse_output_llm_with_sources(answer_yet)
149
+ history[-1] = (query, parsed_answer)
150
+ yield [tuple(x) for x in history], docs_html
151
+
152
+ # Stream the response updates
153
+ async for update in process_stream():
154
+ yield update
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
  # logging the event
157
  try:
 
165
  "question":query,
166
  "sources":sources,
167
  "retriever":model_config.get('retriever','MODEL'),
168
+ "endpoint_type":model_config.get('reader','TYPE')
169
+ "raeder":model_config.get('reader','NVIDIA_MODEL'),
170
  "docs":[doc.page_content for doc in context_retrieved],
171
  "answer": history[-1][1],
172
  "time": timestamp,