Spaces:

Nexialog
/

ESMA-GPT

Runtime error

App Files Files Community

Afritz commited on Oct 20, 2023

Commit

743be46

1 Parent(s): 02f7718

Update new version

Browse files

Files changed (4) hide show

app.py +7 -3
assets/style.css +12 -3
config.py +10 -0
utils.py +40 -11

app.py CHANGED Viewed

@@ -34,12 +34,16 @@ with gr.Blocks(title=CFG_APP.BOT_NAME, css="assets/style.css", theme=theme) as d
             )
             state = gr.State([system_template])
             with gr.Row():
                 ask = gr.Textbox(
                     show_label=False,
                     placeholder="Ask here your question and press enter",
-                ).style(container=False)
-                ask_examples_hidden = gr.Textbox(elem_id="hidden-message")
             examples_questions = gr.Examples(
                 [*CFG_APP.DEFAULT_QUESTIONS],
@@ -53,7 +57,7 @@ with gr.Blocks(title=CFG_APP.BOT_NAME, css="assets/style.css", theme=theme) as d
     ask.submit(
         fn=chat,
-        inputs=[ask, state],
         outputs=[chatbot, state, sources_textbox],
     )
     ask.submit(lambda x: gr.update(value=""), [], [ask])

             )
             state = gr.State([system_template])
             with gr.Row():
                 ask = gr.Textbox(
                     show_label=False,
                     placeholder="Ask here your question and press enter",
+                    )
+            query_mode = gr.Radio(choices=["HYDE", "Reformulation"], elem_id="type-emb", default="HYDE", label="Query Embedding's Mode")
+            ask_examples_hidden = gr.Textbox(elem_id="hidden-message")
             examples_questions = gr.Examples(
                 [*CFG_APP.DEFAULT_QUESTIONS],
     ask.submit(
         fn=chat,
+        inputs=[ask, state, query_mode],
         outputs=[chatbot, state, sources_textbox],
     )
     ask.submit(lambda x: gr.update(value=""), [], [ask])

assets/style.css CHANGED Viewed

@@ -140,7 +140,6 @@ a {
 label>span {
-    background-color: white !important;
     color: #577b9b !important;
 }
@@ -152,7 +151,7 @@ label>span {
     left: -10px;
     width: 30px;
     height: 30px;
-    background-image: url('https://www.nexialog.com/wp-content/uploads/2021/10/cropped-icone-onglet-logo.png');
     background-color: #fff;
     background-size: cover;
     background-position: center;
@@ -181,4 +180,14 @@ label>span {
     padding: 17px 24px !important;
     text-align: justify !important;
     color: #fff !important;
-}

 label>span {
     color: #577b9b !important;
 }
     left: -10px;
     width: 30px;
     height: 30px;
+    background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAACnUlEQVR4AcXXA4wkQRiG4eHZtm3btm3btm3bDs+2bdvm2vPfm6Qu6XRuOz3LSp7xdH3ltCU8Za+lsA1JYLVER6HiOFiFXkgGa1QHiIvzCMQplI2uAKJMiY4A50ILwHs7bGG9eFqUQgx3A2gq74X+SAGrO5U7MQvfsAKF4XAzQD68QSDOoLbp3lAt/wxR3mMGssNmFEDTgAUQJQTTYDO7ticgEKLhwhMMRVpYDQIUwyeI8hhZzbbeipQYgNsIhmgE4xraIqk+AGJiNUQJwjCD1hsGSYfheIgQiIYXJuASRJmM8vgBUa4hdXi328yYgGdwQZSvuq4ehi0QxR9dYTVTUWIUQmEDtbESbzRBXBB4Yyb+QJTjSGx22U3DD/wMxQ+8xxXswRt8wjUInuKsboiamG19aXyBuCEQC9AIP/AZPhC4sBVxzVQeG2vgDR8YCYDgG1YhNZxoiWsIgi/2IA/iwojTwkMsFEN5VAhFRYzAc7hwFbXggBX5sB1+8MRNnNc5p3MAxcyuhOJ4ppvdX9ABuXET4qbtZocoLnZBFG+ch+AeNsED9/AFIRAY+YSSZjejBvCCKCdwGoJA+CII97EAA9Efg3SGYBRGoxkcZgIkwTGI8ge98RqCYHhClACcQRskMlqCZlvfCQEQZScqwQMCH6yFN0TDD0fRFAnCGiANrkKUH6iICvDRBKiOAZpe0fLBftRFXHf3/yG6k3ADYkIfoDzsKICV+ArR8cQGJDYbIBseQ5TP/2bt/wJo/hcD5bADHhCNrYhtNkA5PIILgiVwGgbQ7a6oh8PwxUeUdHcIcmABrqGAhWIygPY6CdEefY2XnfEpmQ52gwAVTKwmmyW8xTBAVBZ1yt2DK7oC2JAdc/EM5aPrztiJEkgXnuv8BdWTESwwR9FxAAAAAElFTkSuQmCC');
     background-color: #fff;
     background-size: cover;
     background-position: center;
     padding: 17px 24px !important;
     text-align: justify !important;
     color: #fff !important;
+}
+#chatbot{
+    height: auto !important;
+    max-height: 500px;
+}
+#type-emb label {
+    background: #ebeaea;
+}

config.py CHANGED Viewed

@@ -58,4 +58,14 @@ class CFG_APP:
         standalone question: What does UL (Unexpected Loss) stand for?
         language: English
     """
     DOC_METADATA_PATH = f"{DATA_FOLDER}/doc_metadata.json"

         standalone question: What does UL (Unexpected Loss) stand for?
         language: English
     """
+    HYDE_PROMPT = """
+        Important ! Give the output as a answer to the query followed by the detected language whatever the form of the query.
+        You must answer to the query in a short answer, 2 sentences maximum, using the right vocabulary of the context of the query. You must keep the question at the begining of the answer.
+        ---
+        query : C'est quoi les régles que les banques américaines doivent suivre ?
+        output : C'est quoi les régles que les banques américaines doivent suivre ? Les banques américaines doivent suivre un ensemble de réglementations fédérales et d'État imposées par des organismes tels que la Réserve fédérale et le Bureau de protection financière du consommateur.
+        language : French
+    """
     DOC_METADATA_PATH = f"{DATA_FOLDER}/doc_metadata.json"

utils.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import json
 import openai
 import re
 from config import CFG_APP
@@ -37,6 +38,17 @@ def get_reformulation_prompt(query: str) -> list:
         }
     ]
 def make_pairs(lst):
     """From a list of even lenght, make tupple pairs
@@ -68,10 +80,10 @@ def make_html_source(paragraph, meta_doc, i):
 """
-def preprocess_message(text: str) -> str:
     return re.sub(
         r"\[doc (\d+)\]",
-        lambda match: f'<a href="#do-{match.group(1)}">{match.group(0)}</a>',
         text,
     )
@@ -96,6 +108,7 @@ def num_tokens_from_string(string: str, encoding_name: str) -> int:
 def chat(
     query: str,
     history: list,
     threshold: float = CFG_APP.THRESHOLD,
     k_total: int = CFG_APP.K_TOTAL,
 ) -> tuple:
@@ -108,13 +121,23 @@ def chat(
     Yields:
         tuple: chat gradio format, chat openai format, sources used.
     """
-    reformulated_query = openai.ChatCompletion.create(
-        model=CFG_APP.MODEL_NAME,
-        messages=get_reformulation_prompt(parse_glossary(query)),
-        temperature=0,
-        max_tokens=CFG_APP.MAX_TOKENS_REF_QUESTION,
-    )
     reformulated_query = reformulated_query["choices"][0]["message"]["content"]
     if len(reformulated_query.split("\n")) == 2:
@@ -134,6 +157,11 @@ def chat(
     messages = history + [{"role": "user", "content": query}]
     if len(sources) > 0:
         docs_string = []
         docs_html = []
@@ -150,6 +178,9 @@ def chat(
             docs_string.append(doc_content)
             docs_html.append(make_html_source(data, meta_doc, i))
         docs_string = "\n\n".join(
             [f"Query used for retrieval:\n{reformulated_query}"] + docs_string
         )
@@ -197,14 +228,12 @@ def chat(
         )
         complete_response = ""
         messages.pop()
         messages.append({"role": "assistant", "content": complete_response})
         for chunk in response:
             chunk_message = chunk["choices"][0]["delta"].get("content")
             if chunk_message:
                 complete_response += chunk_message
-                complete_response = preprocess_message(complete_response)
                 messages[-1]["content"] = complete_response
                 gradio_format = make_pairs([a["content"] for a in messages[1:]])
                 yield gradio_format, messages, docs_html

 import json
+from collections import defaultdict
 import openai
 import re
 from config import CFG_APP
         }
     ]
+def get_hyde_prompt(query: str) -> list:
+    return [
+        {
+            "role": "user",
+            "content": f"""{CFG_APP.HYDE_PROMPT}
+            ---
+            query: {query}
+            output: """,
+        }
+    ]
 def make_pairs(lst):
     """From a list of even lenght, make tupple pairs
 """
+def preprocess_message(text: str, docs_url: dict) -> str:
     return re.sub(
         r"\[doc (\d+)\]",
+        lambda match: f'<a href="{docs_url[match.group(1)]}" target="_blank" class="pdf-link">{match.group(0)}</a>',
         text,
     )
 def chat(
     query: str,
     history: list,
+    query_mode : str,
     threshold: float = CFG_APP.THRESHOLD,
     k_total: int = CFG_APP.K_TOTAL,
 ) -> tuple:
     Yields:
         tuple: chat gradio format, chat openai format, sources used.
     """
+    if query_mode == 'Reformulation':
+        reformulated_query = openai.ChatCompletion.create(
+            model=CFG_APP.MODEL_NAME,
+            messages=get_reformulation_prompt(parse_glossary(query)),
+            temperature=0,
+            max_tokens=CFG_APP.MAX_TOKENS_REF_QUESTION,
+        )
+    else :
+        reformulated_query = openai.ChatCompletion.create(
+            model=CFG_APP.MODEL_NAME,
+            messages=get_hyde_prompt(parse_glossary(query)),
+            temperature=0,
+            max_tokens=CFG_APP.MAX_TOKENS_REF_QUESTION,
+        )
     reformulated_query = reformulated_query["choices"][0]["message"]["content"]
     if len(reformulated_query.split("\n")) == 2:
     messages = history + [{"role": "user", "content": query}]
+    if query_mode == 'HYDE' :
+        reformulated_query = reformulated_query.split("?")[0] + '?'
+    docs_url = defaultdict(str)
     if len(sources) > 0:
         docs_string = []
         docs_html = []
             docs_string.append(doc_content)
             docs_html.append(make_html_source(data, meta_doc, i))
+            url_doc = f'<a href="{meta_doc["url"]}#page={data["meta"]["page_number"]}" target="_blank" class="pdf-link">'
+            docs_url[i] = url_doc
         docs_string = "\n\n".join(
             [f"Query used for retrieval:\n{reformulated_query}"] + docs_string
         )
         )
         complete_response = ""
         messages.pop()
         messages.append({"role": "assistant", "content": complete_response})
         for chunk in response:
             chunk_message = chunk["choices"][0]["delta"].get("content")
             if chunk_message:
                 complete_response += chunk_message
+                complete_response = preprocess_message(complete_response, docs_url)
                 messages[-1]["content"] = complete_response
                 gradio_format = make_pairs([a["content"] for a in messages[1:]])
                 yield gradio_format, messages, docs_html