Spaces:

GIZ
/

audit_assistant

Running on T4

App Files Files Community

ppsingh commited on Oct 22, 2024

Commit

5a4f54c

verified ·

1 Parent(s): 06bf5a5

app update post reader and utils

Browse files

once reader, retriever and utils methods are moved to respective scripts app is slimmed down

Files changed (1) hide show

app.py +66 -209

app.py CHANGED Viewed

@@ -3,50 +3,37 @@ import pandas as pd
 import logging
 import asyncio
 import os
-import re
-import json
 from uuid import uuid4
 from datetime import datetime
 from pathlib import Path
 from huggingface_hub import CommitScheduler
 from auditqa.sample_questions import QUESTIONS
 from auditqa.reports import files, report_list
-from langchain.schema import (
-    HumanMessage,
-    SystemMessage,
-)
-from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
-from langchain_community.llms import HuggingFaceEndpoint
 from auditqa.process_chunks import load_chunks, getconfig, get_local_qdrant
-from langchain_community.chat_models.huggingface import ChatHuggingFace
-from langchain.retrievers import ContextualCompressionRetriever
-from langchain.retrievers.document_compressors import CrossEncoderReranker
-from langchain_community.cross_encoders import HuggingFaceCrossEncoder
-from qdrant_client.http import models as rest
 from dotenv import load_dotenv
 load_dotenv()
-# token to allow acces to Hub, This token should also be
-# valid fo calls made to Inference endpoints
-HF_token = os.environ["LLAMA_3_1"]
 SPACES_LOG = os.environ["SPACES_LOG"]
 # create the local logs repo
 JSON_DATASET_DIR = Path("json_dataset")
 JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
 JSON_DATASET_PATH = JSON_DATASET_DIR / f"logs-{uuid4()}.json"
-# the logs are written to dataset repo
 # https://huggingface.co/spaces/Wauplin/space_to_dataset_saver
 scheduler = CommitScheduler(
-    repo_id="GIZ/spaces_logs",
-    repo_type="dataset",
-    folder_path=JSON_DATASET_DIR,
-    path_in_repo="audit_chatbot",
-    token=SPACES_LOG
-)
-model_config = getconfig("model_params.cfg")
 #### VECTOR STORE ####
 # reports contain the already created chunks from Markdown version of pdf reports
@@ -54,68 +41,11 @@ model_config = getconfig("model_params.cfg")
 # We need to create the local vectorstore collection once using load_chunks
 # vectorestore colection are stored on persistent storage so this needs to be run only once
 # hence, comment out line below when creating for first time
-# vectorstores = load_chunks()
 # once the vectore embeddings  are created we will use qdrant client to access these
 vectorstores = get_local_qdrant()
-#### FUNCTIONS ####
-# App UI and and its functionality is inspired and adapted from
-# https://huggingface.co/spaces/Ekimetrics/climate-question-answering
-def save_logs(logs) -> None:
-    """ Every interaction with app saves the log of question and answer,
-        this is to get the usage statistics of app and evaluate model performances
-    """
-    with scheduler.lock:
-        with JSON_DATASET_PATH.open("a") as f:
-            json.dump(logs, f)
-            f.write("\n")
-    logging.info("logging done")
-def make_html_source(source,i):
-    """
-    takes the text and converts it into html format for display in "source" side tab
-    """
-    meta = source.metadata
-    content = source.page_content.strip()
-    name = meta['filename']
-    card = f"""
-        <div class="card" id="doc{i}">
-            <div class="card-content">
-                <h2>Doc {i} - {meta['filename']} - Page {int(meta['page'])}</h2>
-                <p>{content}</p>
-            </div>
-            <div class="card-footer">
-                <span>{name}</span>
-                <a href="{meta['filename']}#page={int(meta['page'])}" target="_blank" class="pdf-link">
-                    <span role="img" aria-label="Open PDF">🔗</span>
-                </a>
-            </div>
-        </div>
-        """
-    return card
-def parse_output_llm_with_sources(output):
-    # Split the content into a list of text and "[Doc X]" references
-    content_parts = re.split(r'\[(Doc\s?\d+(?:,\s?Doc\s?\d+)*)\]', output)
-    parts = []
-    for part in content_parts:
-        if part.startswith("Doc"):
-            subparts = part.split(",")
-            subparts = [subpart.lower().replace("doc","").strip() for subpart in subparts]
-            subparts = [f"""<a href="#doc{subpart}" class="a-doc-ref" target="_self"><span class='doc-ref'><sup>{subpart}</sup></span></a>""" for subpart in subparts]
-            parts.append("".join(subparts))
-        else:
-            parts.append(part)
-    content_parts = "".join(parts)
-    return content_parts
 def start_chat(query,history):
     history = history + [(query,None)]
     history = [tuple(x) for x in history]
@@ -141,64 +71,18 @@ async def chat(query,history,sources,reports,subtype,year):
     ##------------------------fetch collection from vectorstore------------------------------
     vectorstore = vectorstores["allreports"]
-    ##---------------------construct filter for metdata filtering---------------------------
-    if len(reports) == 0:
-        ("defining filter for:{}:{}:{}".format(sources,subtype,year))
-        filter=rest.Filter(
-                must=[rest.FieldCondition(
-                        key="metadata.source",
-                        match=rest.MatchValue(value=sources)
-                    ),
-                    rest.FieldCondition(
-                        key="metadata.subtype",
-                        match=rest.MatchValue(value=subtype)
-                    ),
-                    rest.FieldCondition(
-                        key="metadata.year",
-                        match=rest.MatchAny(any=year)
-                    ),])
-    else:
-        print("defining filter for allreports:",reports)
-        filter=rest.Filter(
-                must=[
-                    rest.FieldCondition(
-                        key="metadata.filename",
-                        match=rest.MatchAny(any=reports)
-                    )])
     ##------------------------------get context----------------------------------------------
-    context_retrieved_lst = []
-    question_lst= [query]
-    for question in question_lst:
-        # similarity score threshold can be used to make adjustments in quality and quantity for Retriever
-        # However need to make balancing, as retrieved results are again used by Ranker to fetch best among
-        # retreived results
-        retriever = vectorstore.as_retriever(
-          search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.6,
-                                                                   "k": int(model_config.get('retriever','TOP_K')),
-                                                                   "filter":filter})
-        model = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base")
-        compressor = CrossEncoderReranker(model=model, top_n=3)
-        compression_retriever = ContextualCompressionRetriever(
-            base_compressor=compressor, base_retriever=retriever
-        )
-        context_retrieved = compression_retriever.invoke(question)
-        logging.info(len(context_retrieved))
-        for doc in context_retrieved:
-            logging.info(doc.metadata)
-        def format_docs(docs):
-            return "\n\n".join(doc.page_content for doc in docs)
-        context_retrieved_formatted = format_docs(context_retrieved)
-        context_retrieved_lst.append(context_retrieved_formatted)
     ##------------------- -------------Prompt--------------------------------------------------
     SYSTEM_PROMPT = """
         You are AuditQ&A, an AI Assistant created by Auditors and Data Scientist. You are given a question and extracted passages of the consolidated/departmental/thematic focus audit reports. Provide a clear and structured answer based on the passages/context provided and the guidelines.
         Guidelines:
         - If the passages have useful facts or numbers, use them in your answer.
         - When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
         - Do not use the sentence 'Doc i says ...' to say where information came from.
@@ -215,87 +99,59 @@ async def chat(query,history,sources,reports,subtype,year):
         Question: {question}  - Explained to audit expert
         Answer in english with the passages citations:
         """.format(context = context_retrieved_lst, question=query)
-    messages = [
-                SystemMessage(content=SYSTEM_PROMPT),
-                HumanMessage(
-                    content=USER_PROMPT
-                ),]
-    ##-----------------------getting inference endpoints------------------------------
-    # Set up the streaming callback handler
-    callback = StreamingStdOutCallbackHandler()
-    # Initialize the HuggingFaceEndpoint with streaming enabled
-    llm_qa = HuggingFaceEndpoint(
-        endpoint_url=model_config.get('reader', 'ENDPOINT'),
-        max_new_tokens=512,
-        repetition_penalty=1.03,
-        timeout=70,
-        huggingfacehub_api_token=HF_token,
-        streaming=True, # Enable streaming for real-time token generation
-        callbacks=[callback] # Add the streaming callback handler
-    )
-    # Create a ChatHuggingFace instance with the streaming-enabled endpoint
-    chat_model = ChatHuggingFace(llm=llm_qa)
-    # Prepare the HTML for displaying source documents
     docs_html = []
     for i, d in enumerate(context_retrieved, 1):
         docs_html.append(make_html_source(d, i))
     docs_html = "".join(docs_html)
-    # Initialize the variable to store the accumulated answer
     answer_yet = ""
-    # Define an asynchronous generator function to process the streaming response
-    async def process_stream():
         # Without nonlocal, Python would create a new local variable answer_yet inside process_stream(), instead of modifying the one from the outer scope.
-        nonlocal answer_yet # Use the outer scope's answer_yet variable
-        # Iterate over the streaming response chunks
-        async for chunk in chat_model.astream(messages):
-            token = chunk.content
-            answer_yet += token
-            parsed_answer = parse_output_llm_with_sources(answer_yet)
-            history[-1] = (query, parsed_answer)
-            yield [tuple(x) for x in history], docs_html
-    # Stream the response updates
-    async for update in process_stream():
-        yield update
-    # #callbacks = [StreamingStdOutCallbackHandler()]
-    # llm_qa = HuggingFaceEndpoint(
-    #     endpoint_url= model_config.get('reader','ENDPOINT'),
-    #     max_new_tokens=512,
-    #     repetition_penalty=1.03,
-    #     timeout=70,
-    #     huggingfacehub_api_token=HF_token,)
-    # # create RAG
-    # chat_model = ChatHuggingFace(llm=llm_qa)
-    # ##-------------------------- get answers ---------------------------------------
-    # answer_lst = []
-    # for question, context in zip(question_lst , context_retrieved_lst):
-    #     answer = chat_model.invoke(messages)
-    #     answer_lst.append(answer.content)
-    # docs_html = []
-    # for i, d in enumerate(context_retrieved, 1):
-    #     docs_html.append(make_html_source(d, i))
-    # docs_html = "".join(docs_html)
-    # previous_answer = history[-1][1]
-    # previous_answer = previous_answer if previous_answer is not None else ""
-    # answer_yet = previous_answer + answer_lst[0]
-    # answer_yet = parse_output_llm_with_sources(answer_yet)
-    # history[-1] = (query,answer_yet)
-    # history = [tuple(x) for x in history]
-    # yield history,docs_html
     # logging the event
     try:
@@ -309,7 +165,8 @@ async def chat(query,history,sources,reports,subtype,year):
                 "question":query,
                 "sources":sources,
                 "retriever":model_config.get('retriever','MODEL'),
-                "raeder":model_config.get('reader','MODEL'),
                 "docs":[doc.page_content for doc in context_retrieved],
                 "answer": history[-1][1],
                 "time": timestamp,

 import logging
 import asyncio
 import os
 from uuid import uuid4
 from datetime import datetime
 from pathlib import Path
 from huggingface_hub import CommitScheduler
 from auditqa.sample_questions import QUESTIONS
 from auditqa.reports import files, report_list
 from auditqa.process_chunks import load_chunks, getconfig, get_local_qdrant
+from auditqa.retriever import get_context
+from auditqa.reader import nvidia_client, dedicated_endpoint
+from auditqa.utils import make_html_source, parse_output_llm_with_sources, save_logs, get_message_template
 from dotenv import load_dotenv
 load_dotenv()
+# fetch tokens and model config params
 SPACES_LOG = os.environ["SPACES_LOG"]
+model_config = getconfig("model_params.cfg")
 # create the local logs repo
 JSON_DATASET_DIR = Path("json_dataset")
 JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
 JSON_DATASET_PATH = JSON_DATASET_DIR / f"logs-{uuid4()}.json"
+# the logs are written to dataset repo periodically from local logs
 # https://huggingface.co/spaces/Wauplin/space_to_dataset_saver
 scheduler = CommitScheduler(
+     repo_id="GIZ/spaces_logs",
+     repo_type="dataset",
+     folder_path=JSON_DATASET_DIR,
+     path_in_repo="audit_chatbot",
+     token=SPACES_LOG )
 #### VECTOR STORE ####
 # reports contain the already created chunks from Markdown version of pdf reports
 # We need to create the local vectorstore collection once using load_chunks
 # vectorestore colection are stored on persistent storage so this needs to be run only once
 # hence, comment out line below when creating for first time
+#vectorstores = load_chunks()
 # once the vectore embeddings  are created we will use qdrant client to access these
 vectorstores = get_local_qdrant()
 def start_chat(query,history):
     history = history + [(query,None)]
     history = [tuple(x) for x in history]
     ##------------------------fetch collection from vectorstore------------------------------
     vectorstore = vectorstores["allreports"]
     ##------------------------------get context----------------------------------------------
+    context_retrieved = get_context(vectorstore=vectorstore,query=query,reports=reports,
+                                                sources=sources,subtype=subtype,year=year)
+    context_retrieved_formatted = "||".join(doc.page_content for doc in context_retrieved)
+    context_retrieved_lst = [doc.page_content for doc in context_retrieved]
     ##------------------- -------------Prompt--------------------------------------------------
     SYSTEM_PROMPT = """
         You are AuditQ&A, an AI Assistant created by Auditors and Data Scientist. You are given a question and extracted passages of the consolidated/departmental/thematic focus audit reports. Provide a clear and structured answer based on the passages/context provided and the guidelines.
         Guidelines:
+        - Passeges are provided as comma separated list of strings
         - If the passages have useful facts or numbers, use them in your answer.
         - When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
         - Do not use the sentence 'Doc i says ...' to say where information came from.
         Question: {question}  - Explained to audit expert
         Answer in english with the passages citations:
         """.format(context = context_retrieved_lst, question=query)
+    ##-------------------- apply message template ------------------------------
+    messages = get_message_template(model_config.get('reader','TYPE'),SYSTEM_PROMPT,USER_PROMPT)
+    ## -----------------Prepare HTML for displaying source documents --------------
     docs_html = []
     for i, d in enumerate(context_retrieved, 1):
         docs_html.append(make_html_source(d, i))
     docs_html = "".join(docs_html)
+    ##-----------------------get answer from endpoints------------------------------
     answer_yet = ""
+    if model_config.get('reader','TYPE') == 'NVIDIA':
+        chat_model = nvidia_client()
+        async def process_stream():
+            nonlocal answer_yet
+            # Without nonlocal, Python would create a new local variable answer_yet inside process_stream(), instead of modifying the one from the outer scope.
+            #nonlocal answer_yet # Use the outer scope's answer_yet variable
+            # Iterate over the streaming response chunks
+            response = chat_model.chat_completion(
+                                model=model_config.get("reader","NVIDIA_MODEL"),
+                                messages=messages,
+                                stream=True,
+                                max_tokens=int(model_config.get('reader','MAX_TOKENS')),
+                            )
+            for message in response:
+                token = message.choices[0].delta.content
+                if token:
+                    answer_yet += token
+                    parsed_answer = parse_output_llm_with_sources(answer_yet)
+                    history[-1] = (query, parsed_answer)
+                yield [tuple(x) for x in history], docs_html
+        # Stream the response updates
+        async for update in process_stream():
+            yield update
+    else:
+        chat_model = dedicated_endpoint()
+        async def process_stream():
         # Without nonlocal, Python would create a new local variable answer_yet inside process_stream(), instead of modifying the one from the outer scope.
+            nonlocal answer_yet # Use the outer scope's answer_yet variable
+            # Iterate over the streaming response chunks
+            async for chunk in chat_model.astream(messages):
+                token = chunk.content
+                answer_yet += token
+                parsed_answer = parse_output_llm_with_sources(answer_yet)
+                history[-1] = (query, parsed_answer)
+                yield [tuple(x) for x in history], docs_html
+        # Stream the response updates
+        async for update in process_stream():
+            yield update
     # logging the event
     try:
                 "question":query,
                 "sources":sources,
                 "retriever":model_config.get('retriever','MODEL'),
+                "endpoint_type":model_config.get('reader','TYPE')
+                "raeder":model_config.get('reader','NVIDIA_MODEL'),
                 "docs":[doc.page_content for doc in context_retrieved],
                 "answer": history[-1][1],
                 "time": timestamp,