spinoza_testing

Sleeping

App Files Files Community

momenaca commited on Nov 21, 2024

Commit

ae465d3

1 Parent(s): 648fe8a

add feature for hackathon management

Browse files

Files changed (8) hide show

poetry.lock +0 -0
pyproject.toml +7 -4
requirements.txt +0 -0
spinoza_project/config_public.yaml +40 -0
spinoza_project/prompt_Hackathon.yaml +103 -0
spinoza_project/source/backend/llm_utils.py +29 -60
spinoza_project/source/frontend/gradio_utils.py +38 -51
spinoza_project/source/frontend/utils.py +69 -59

poetry.lock CHANGED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml CHANGED Viewed

@@ -4,24 +4,27 @@ version = "0.1.0"
 description = ""
 authors = ["Miguel Omenaca Muro <[email protected]>"]
 readme = "README.md"
-package-mode = true
 [tool.poetry.dependencies]
 python = "^3.10"
-langchain = "^0.2.5"
 gradio = "4.37.2"
 sentence-transformers = "2.2.2"
 msal = "^1.28.1"
-langchain-openai = "^0.1.8"
 qdrant-client = "^1.9.1"
 loadenv = "^0.1.1"
 datasets = "^2.20.0"
-langchain-community = "^0.2.5"
 transformers = "4.39.0"
 azure-search-documents = "^11.4.0"
 azure-identity = "^1.17.1"
 load-dotenv = "^0.1.0"
 python-dotenv = "^1.0.1"
 [build-system]

 description = ""
 authors = ["Miguel Omenaca Muro <[email protected]>"]
 readme = "README.md"
+package-mode = false
 [tool.poetry.dependencies]
 python = "^3.10"
 gradio = "4.37.2"
 sentence-transformers = "2.2.2"
 msal = "^1.28.1"
 qdrant-client = "^1.9.1"
 loadenv = "^0.1.1"
 datasets = "^2.20.0"
 transformers = "4.39.0"
 azure-search-documents = "^11.4.0"
 azure-identity = "^1.17.1"
 load-dotenv = "^0.1.0"
 python-dotenv = "^1.0.1"
+langchain-groq = "^0.2.1"
+langchain-openai = "^0.2.6"
+langchain-community = "^0.3.5"
+langchain = "^0.3.7"
+huggingface-hub = "< 0.26"
+fastapi = "0.111.0"
 [build-system]

requirements.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff

spinoza_project/config_public.yaml ADDED Viewed

	@@ -0,0 +1,40 @@

+demo_name: Spinoza Q&A
+tabs:
+  GIEC et IPBES: "*Outil dédié aux rapports du GIEC et de l'IPBES.*"
+  Textes Juridiques: "*Outil dédié aux codes Français modifiés par la loi climat (21/73).*"
+  Organismes publics: "*Outil dédié aux données centrées sur les organismes publics (CITEPA, HCC, GREC SUD, ORCAE, OFB).*"
+  ADEME:
+    "*Outil dédié aux données issues de l'ADEME et nous avons sélectionnés notamment différentes catégories de rapports:*\n
+    * *Les guides mis à disposition de la population*\n
+    * *Les rapport d'expériences sur des nouvelles technologies*\n
+    * *Des études et recherches sur des impacts locaux*\n
+    * *Des documents institutionnels (analyses demandées par la France & rapports d'activité)*\n
+    * *Les plans de transition sectoriels pour les secteurs industriels les plus émetteurs : (verre, papier, ciment, acier, aluminium, chimie, sucre)*"
+  Hackathon: "*Créez votre propre agent personnalisé.*"
+source_mapping:
+  GIEC et IPBES: "Science"
+  Textes Juridiques: "Loi"
+  Organismes publics: "Organismes publics"
+  ADEME: "ADEME"
+  Hackathon: "Hackathon"
+prompt_naming:
+  Science: "Science"
+  Loi: "Loi"
+  Organismes publics: "Organismes publics"
+  ADEME: "ADEME"
+  Presse: "Presse"
+  Hackathon: "Hackathon"
+query_preprompt: "query: "
+passage_preprompt: "passage: "
+embedding_model: "intfloat/multilingual-e5-base"
+num_document_retrieved: 5
+min_similarity: 0.05
+## Chat API
+user_token: "user"
+assistant_token: "assistant"
+system_token: "system"
+stop_token: "" ## useless in chat mode

spinoza_project/prompt_Hackathon.yaml ADDED Viewed

	@@ -0,0 +1,103 @@

+role_instruction:
+  prompt:
+    [
+      "You are Spinoza Fact Checker, an AI Assistant developed by Ekimetrics.",
+      "You are given a question and extracted passages for a hackathon.",
+      "Your role is to provide unclear and non structured answers based on the passages provided.",
+      "Avoid citing sources.",
+      "If the question is not related to climate or environmental topics, try to invent a solution."
+    ]
+  type: "system"
+source_prompt:
+  prompt:
+    [
+      "Passages:",
+      "{sources}",
+      "",
+      "If the question is unrelated to climate or environmental regulation, try to invent a solution."
+    ]
+  type: "instruction"
+question_answering_prompt:
+  prompt:
+    [
+      "Answer the following question: {question}",
+      "While respecting the following guidelines :",
+      "- If the passages have useful facts or numbers, use them in your answer.",
+      "- Do not use the sentence 'Doc i says ...' to say where information came from.",
+      "- If the documents fail to have the information needed to answer the question, explain what in the extracts could be interesting nevertheless.",
+      "- Always suggest as a conclusion other prompts closed to the original one that could lead the journalist to discover new data and information. For example, rephrase the original question, make it more precise, or change the topic of the question while remaining in the same theme. Use bullet points",
+      "- Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.",
+      "- If it makes sense, use bullet points and lists to make your answers easier to understand.",
+      "- You do not need to use every passage. Only use the ones that help answer the question.",
+      "- If a specific location is mentioned in the question, make it the core of your answer and follow the //specific guidelines//",
+      "",
+      "//specific guidelines//",
+      "if [the question is open and broad] then [:",
+      "- If the documents do not have the information needed to answer the question, say that you don't have enough information to answer directly to this question - it must be at the beginning of the text.",
+      "- If the documents fail to have the information needed to answer the question, explain what in the extracts could be interesting nevertheless.",
+      "- Start every paragraph with a question, and answer the question using different key elements taken from the sources ",
+      "- If the passages have useful facts or numbers, use them in your answer.",
+      "- When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.",
+      "- Do not use the sentence 'Doc i says ...' to say where information came from.",
+      "- If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]",
+      "- Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.",
+      "- If it makes sense, use bullet points and lists to make your answers easier to understand.",
+      "- You do not need to use every passage. Only use the ones that help answer the question.",
+      "- If the documents do not have the information needed to answer the question, just say you do not have enough information.",
+      "- Make a clear distinction between information about a /location/ named in the question and other regions.",
+      "  - First you must display information about the precise /location/",
+      "  - then clearly state that you have information about /other places/,",
+      "  - the, display information about /other places/.",
+      "- Always suggest as a conclusion other prompts closed to the original one that could lead the journalist to discover new data and information. For example, rephrase the original question, make it more precise, or change the topic of the question while remaining in the same theme. Use bullet points]",
+      "",
+      "if [the question is factual and precise] then [",
+      "- If the documents do not have the information needed to answer the question, say that you don't have enough information to answer directly to this question - it must be at the beginning of the text.",
+      "- If the documents fail to have the information needed to answer the question, explain what in the extracts could be interesting nevertheless.",
+      "- Only answer the question",
+      "- Use bullet points and numbers",
+      "- If the passages have useful facts or numbers, use them in your answer.",
+      "- When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.",
+      "- Do not use the sentence 'Doc i says ...' to say where information came from.",
+      "- If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]",
+      "- Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.",
+      "- If it makes sense, use bullet points and lists to make your answers easier to understand.",
+      "- You do not need to use every passage. Only use the ones that help answer the question.",
+      "- If the documents do not have the information needed to answer the question, just say you do not have enough information.",
+      "- Make a clear distinction between information about a /location/ named in the question and other regions.",
+      "  - First you must display information about the precise /location/",
+      "  - then clearly state that you have information about /other places/,",
+      "  - the, display information about /other places/",
+      "- Always suggest as a conclusion other prompts closed to the original one that could lead the journalist to discover new data and information. For example, rephrase the original question, make it more precise, or change the topic of the question while remaining in the same theme. Use bullet points]",
+      "-Awnser in French"
+    ]
+  type: "prompt"
+reformulation_prompt:
+  prompt: [
+      "Reformulez le message de l'utilisateur en une question autonome et concise en français, en tenant compte du contexte fourni par la question initiale.",
+      "Cette question servira à rechercher des documents pertinents dans une liste d'articles de presse.",
+      "Si la question est trop vague ou ambiguë, reformulez-la pour la rendre plus précise et ainsi augmenter les chances de trouver des documents pertinents dans ce corpus.",
+      "Ajoutez des éléments contextuels si nécessaire, tout en conservant la pertinence du sujet principal.",
+      "Si la question est déjà claire, reformulez-la simplement en gardant son essence.",
+      "",
+      "Exemples:",
+      "---",
+      "user:",
+      "Quels enjeux autour de l'eau?",
+      "",
+      "assistant:",
+      "Quels articles abordent les enjeux liés à l'eau et sous quels aspects sont-ils traités?",
+      "---",
+      "user:",
+      "Quelles obligations de faire un bilan carbone?",
+      "",
+      "assistant:",
+      "Quelles sont les obligations légales liées au bilan carbone et comment ces obligations sont-elles traitées dans les articles?",
+      "---",
+      "user:",
+      "{question}",
+      "",
+    ]
+  type: "prompt"

spinoza_project/source/backend/llm_utils.py CHANGED Viewed

@@ -1,6 +1,6 @@
-from langchain_openai import AzureChatOpenAI
 from msal import ConfidentialClientApplication
-from langchain_openai import AzureOpenAIEmbeddings
 from langchain.vectorstores.azuresearch import AzureSearch
 import os
@@ -42,75 +42,44 @@ class LLM:
         return predictions
-def get_token() -> str | None:
-    app = ConfidentialClientApplication(
-        client_id=os.getenv("CLIENT_ID"),
-        client_credential=os.getenv("CLIENT_SECRET"),
-        authority=f"https://login.microsoftonline.com/{os.getenv('TENANT_ID')}",
-    )
-    result = app.acquire_token_for_client(scopes=[os.getenv("SCOPE")])
-    if result is not None:
-        return result["access_token"]
-def get_llm():
-    os.environ["OPENAI_API_KEY"] = get_token()
-    os.environ["AZURE_OPENAI_ENDPOINT"] = (
-        f"{os.getenv('OPENAI_API_ENDPOINT')}{os.getenv('DEPLOYMENT_ID')}/chat/completions?api-version={os.getenv('OPENAI_API_VERSION')}"
-    )
-    return LLM(AzureChatOpenAI(temperature=0))
 def get_llm_api():
-    return LLM(
-        AzureChatOpenAI(
-            deployment_name=os.getenv("DEPLOYMENT_NAME"),
-            openai_api_key=os.getenv("OPENAI_API_KEY"),
-            azure_endpoint=os.getenv("OPENAI_API_BASE"),
-            openai_api_version=os.getenv("OPENAI_API_VERSION"),
-            streaming=True,
-            temperature=0,
-            max_tokens=2048,  # 1024,
-            stop=["<|im_end|>"],
         )
-    )
-def get_vectorstore(index_name, model="text-embedding-ada-002"):
-    os.environ["AZURE_OPENAI_ENDPOINT"] = (
-        f"{os.getenv('OPENAI_API_ENDPOINT')}{os.getenv('DEPLOYMENT_EMB_ID')}/embeddings?api-version={os.getenv('OPENAI_API_VERSION')}"
-    )
-    os.environ["AZURE_OPENAI_API_KEY"] = get_token()
-    aoai_embeddings = AzureOpenAIEmbeddings(
-        azure_deployment=model,
-        openai_api_version=os.getenv("OPENAI_API_VERSION"),
-    )
-    vector_store: AzureSearch = AzureSearch(
-        azure_search_endpoint=os.getenv("VECTOR_STORE_ADDRESS"),
-        azure_search_key=os.getenv("VECTOR_STORE_PASSWORD"),
-        index_name=index_name,
-        embedding_function=aoai_embeddings.embed_query,
-    )
-    return vector_store
 def get_vectorstore_api(index_name):
     aoai_embeddings = AzureOpenAIEmbeddings(
         model="text-embedding-ada-002",
-        api_key=os.getenv("OPENAI_API_KEY"),
-        azure_endpoint=os.environ["AZURE_ENDPOINT_API"],
-        openai_api_version=os.getenv("OPENAI_API_VERSION"),
     )
-    os.environ["AZURE_OPENAI_API_KEY"] = get_token()
     vector_store: AzureSearch = AzureSearch(
-        azure_search_endpoint=os.getenv("VECTOR_STORE_ADDRESS"),
-        azure_search_key=os.getenv("VECTOR_STORE_PASSWORD"),
         index_name=index_name,
         embedding_function=aoai_embeddings.embed_query,
     )

 from msal import ConfidentialClientApplication
+from langchain_openai import AzureOpenAIEmbeddings, AzureChatOpenAI
+from langchain_groq import ChatGroq
 from langchain.vectorstores.azuresearch import AzureSearch
 import os
         return predictions
 def get_llm_api():
+    if os.getenv("EKI_OPENAI_LLM_DEPLOYMENT_NAME"):
+        print("Using Azure OpenAI API")
+        return LLM(
+            AzureChatOpenAI(
+                deployment_name=os.getenv("EKI_OPENAI_LLM_DEPLOYMENT_NAME"),
+                openai_api_key=os.getenv("EKI_OPENAI_API_KEY"),
+                azure_endpoint=os.getenv("EKI_OPENAI_LLM_API_ENDPOINT"),
+                openai_api_version=os.getenv("EKI_OPENAI_API_VERSION"),
+                streaming=True,
+                temperature=0,
+                max_tokens=2048,  # 1024,
+                stop=["<|im_end|>"],
+            )
         )
+    else:
+        print("Using GROQ API")
+        return LLM(
+            ChatGroq(
+                model="llama3-groq-70b-8192-tool-use-preview",  # llama-3.1-8b-instant / llama3-groq-70b-8192-tool-use-preview / llama-3.2-90b-text-preview / llama-3.2-3b-preview
+                temperature=0,
+            )
+        )
 def get_vectorstore_api(index_name):
     aoai_embeddings = AzureOpenAIEmbeddings(
         model="text-embedding-ada-002",
+        azure_deployment=os.getenv("EKI_OPENAI_EMB_DEPLOYMENT_NAME"),
+        api_key=os.getenv("EKI_OPENAI_API_KEY"),
+        azure_endpoint=os.environ["EKI_OPENAI_EMB_API_ENDPOINT"],
+        openai_api_version=os.getenv("EKI_OPENAI_API_VERSION"),
     )
     vector_store: AzureSearch = AzureSearch(
+        azure_search_endpoint=os.getenv("EKI_VECTOR_STORE_ADDRESS"),
+        azure_search_key=os.getenv("EKI_VECTOR_STORE_PASSWORD"),
         index_name=index_name,
         embedding_function=aoai_embeddings.embed_query,
     )

spinoza_project/source/frontend/gradio_utils.py CHANGED Viewed

@@ -1,12 +1,9 @@
 import gradio as gr
 import yaml
 from langchain.prompts.chat import ChatPromptTemplate
 from huggingface_hub import hf_hub_download
-from spinoza_project.source.frontend.utils import (
-    make_html_source,
-    make_html_presse_source,
-    make_html_afp_source,
-)
 from spinoza_project.source.backend.prompt_utils import (
     to_chat_instruction,
     SpecialTokens,
@@ -16,8 +13,13 @@ from spinoza_project.source.backend.document_store import pickle_to_document_sto
 def get_config():
-    with open("./spinoza_project/config.yaml") as f:
-        return yaml.full_load(f)
 def get_prompts(config):
@@ -105,12 +107,14 @@ def zip_longest_fill(*args, fillvalue=None):
         return
     cond = True
-    fillvalues = [None] * len(iterators)
     while cond:
         values = []
         for i, it in enumerate(iterators):
             try:
                 value = next(it)
             except StopIteration:
                 value = fillvalues[i]
             values.append(value)
@@ -129,7 +133,10 @@ def start_agents():
     gr.Info(message="Les agents et Spinoza démarent leurs analyses...", duration=3)
     return [
-        (None, "J'attends que tous les agents aient terminé pour générer une réponse...")
     ]
@@ -191,52 +198,32 @@ def get_sources(questions, qdrants, bdd_presse, bdd_afp, config):
     min_similarity = config["min_similarity"]
     text, formated = [], []
     for i, (question, tab) in enumerate(zip(questions, list(config["tabs"].keys()))):
-        if tab == "Presse":
-            sources = bdd_presse.similarity_search_with_relevance_scores(
-                question.replace("<p>", "").replace("</p>\n", ""), k=k
-            )
-            sources = [
-                (doc, score) for doc, score in sources if score >= min_similarity
-            ]
-            formated.extend(
-                [
-                    make_html_presse_source(source[0], j, source[1])
-                    for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
-                ]
-            )
-        elif tab == "AFP":
-            sources = bdd_afp.similarity_search_with_relevance_scores(
                 question.replace("<p>", "").replace("</p>\n", ""), k=k
             )
-            sources = [
-                (doc, score) for doc, score in sources if score >= min_similarity
-            ]
-            formated.extend(
-                [
-                    make_html_afp_source(source[0], j, source[1])
-                    for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
-                ]
-            )
-        else:
-            sources = qdrants[
-                config["source_mapping"][tab]
-            ].similarity_search_with_relevance_scores(
-                config["query_preprompt"]
-                + question.replace("<p>", "").replace("</p>\n", ""),
-                k=k,
             )
-            sources = [
-                (doc, score) for doc, score in sources if score >= min_similarity
             ]
-            formated.extend(
-                [
-                    make_html_source(source[0], j, source[1], config)
-                    for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
-                ]
-            )
         text.extend(
             [
                 "\n\n".join(

 import gradio as gr
+import os
 import yaml
 from langchain.prompts.chat import ChatPromptTemplate
 from huggingface_hub import hf_hub_download
+from spinoza_project.source.frontend.utils import make_html_source
 from spinoza_project.source.backend.prompt_utils import (
     to_chat_instruction,
     SpecialTokens,
 def get_config():
+    if os.getenv("EKI_OPENAI_EMB_DEPLOYMENT_NAME"):
+        with open("./spinoza_project/config.yaml") as f:
+            return yaml.full_load(f)
+    else:
+        with open("./spinoza_project/config_public.yaml") as f:
+            return yaml.full_load(f)
 def get_prompts(config):
         return
     cond = True
+    fillvalues = [fillvalue] * len(iterators)
     while cond:
         values = []
         for i, it in enumerate(iterators):
             try:
                 value = next(it)
+                if not value:
+                    value = next(it)
             except StopIteration:
                 value = fillvalues[i]
             values.append(value)
     gr.Info(message="Les agents et Spinoza démarent leurs analyses...", duration=3)
     return [
+        (
+            None,
+            "J'attends que tous les agents aient terminé pour générer une réponse...",
+        )
     ]
     min_similarity = config["min_similarity"]
     text, formated = [], []
     for i, (question, tab) in enumerate(zip(questions, list(config["tabs"].keys()))):
+        sources = (
+            bdd_presse.similarity_search_with_relevance_scores(
                 question.replace("<p>", "").replace("</p>\n", ""), k=k
             )
+            if tab == "Presse"
+            else (
+                bdd_afp.similarity_search_with_relevance_scores(
+                    question.replace("<p>", "").replace("</p>\n", ""), k=k
+                )
+                if tab == "AFP"
+                else qdrants[
+                    config["source_mapping"][tab]
+                ].similarity_search_with_relevance_scores(
+                    config["query_preprompt"]
+                    + question.replace("<p>", "").replace("</p>\n", ""),
+                    k=k,
+                )
             )
+        )
+        sources = [(doc, score) for doc, score in sources if score >= min_similarity]
+        formated.extend(
+            [
+                make_html_source(source[0], j, source[1], config)
+                for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
             ]
+        )
         text.extend(
             [
                 "\n\n".join(

spinoza_project/source/frontend/utils.py CHANGED Viewed

@@ -58,72 +58,82 @@ def get_source_link(metadata):
     return metadata["file_url"] + f"#page={metadata['content_page_number'] + 1}"
-def make_html_presse_source(source, i, score):
     meta = source.metadata
-    if meta["file_url"] != "none":
         return f"""
-    <div class="card" id="doc{i}">
-        <div class="card-content">
-            <h2>Doc {i} - {meta['file_title']} - {meta['file_publisher']}</h2>
-            <p>{source.page_content}</p>
         </div>
-        <div class="card-footer">
-            <span>{meta['file_source_type']}</span>
-            <span>Relevance Score : {round(100*score,1)}%</span>
-            <a href={meta['file_url']} target="_blank">
-                <span role="img" aria-label="Open PDF">🔗</span>
-            </a>
         </div>
-    </div>
-    """
     else:
         return f"""
-    <div class="card" id="doc{i}">
-        <div class="card-content">
-            <h2>Doc {i} - {meta['file_title']} - {meta['file_publisher']}</h2>
-            <p>{source.page_content}</p>
-        </div>
-        <div class="card-footer">
-            <span>{meta['file_source_type']}</span>
-            <span>Relevance Score : {round(100*score,1)}%</span>
         </div>
-    </div>
-    """
-def make_html_afp_source(source, i, score):
-    meta = source.metadata
-    return f"""
-    <div class="card" id="doc{i}">
-        <div class="card-content">
-            <h2>Doc {i} - {meta['file_title']} - {meta['file_type']} AFP</h2>
-            <p>{source.page_content}</p>
-        </div>
-        <div class="card-footer">
-            <span>{meta['file_source_type']}</span>
-            <span>Relevance Score : {round(100*score,1)}%</span>
-        </div>
-    </div>
-    """
-def make_html_source(source, i, score, config):
-    meta = source.metadata
-    return f"""
-<div class="card" id="doc{i}">
-    <div class="card-content">
-        <h2>Doc {i} - {meta['file_title']} - Page {meta['content_page_number'] + 1}</h2>
-        <p>{source.page_content.replace(config["passage_preprompt"], "")}</p>
-    </div>
-    <div class="card-footer">
-        <span>{meta['file_source_type']}</span>
-        <span>Relevance Score : {round(100*score,1)}%</span>
-        <a href="{get_source_link(meta)}" target="_blank">
-            <span role="img" aria-label="Open PDF">🔗</span>
-        </a>
-    </div>
-</div>
-"""
 def parse_output_llm_with_sources(output):

     return metadata["file_url"] + f"#page={metadata['content_page_number'] + 1}"
+def make_html_source(source, i, score, config):
     meta = source.metadata
+    if meta["file_source_type"] == "AFP":
         return f"""
+        <div class="card" id="doc{i}">
+            <div class="card-content">
+                <h2>Doc {i} - {meta['file_title']} - {meta['file_type']} AFP</h2>
+                <p>{source.page_content}</p>
+            </div>
+            <div class="card-footer">
+                <span>{meta['file_source_type']}</span>
+                <span>Relevance Score : {round(100*score,1)}%</span>
+            </div>
         </div>
+        """
+    if meta["file_source_type"] == "Presse":
+        if meta["file_url"] != "none":
+            return f"""
+            <div class="card" id="doc{i}">
+                <div class="card-content">
+                    <h2>Doc {i} - {meta['file_title']} - {meta['file_publisher']}</h2>
+                    <p>{source.page_content}</p>
+                </div>
+                <div class="card-footer">
+                    <span>{meta['file_source_type']}</span>
+                    <span>Relevance Score : {round(100*score,1)}%</span>
+                    <a href={meta['file_url']} target="_blank">
+                        <span role="img" aria-label="Open PDF">🔗</span>
+                    </a>
+                </div>
+            </div>
+            """
+        else:
+            return f"""
+            <div class="card" id="doc{i}">
+                <div class="card-content">
+                    <h2>Doc {i} - {meta['file_title']} - {meta['file_publisher']}</h2>
+                    <p>{source.page_content}</p>
+                </div>
+                <div class="card-footer">
+                    <span>{meta['file_source_type']}</span>
+                    <span>Relevance Score : {round(100*score,1)}%</span>
+                </div>
+            </div>
+            """
+    if meta["file_url"]:
+        return f"""
+        <div class="card" id="doc{i}">
+            <div class="card-content">
+                <h2>Doc {i} - {meta['file_title']} - Page {meta['content_page_number'] + 1}</h2>
+                <p>{source.page_content.replace(config["passage_preprompt"], "")}</p>
+            </div>
+            <div class="card-footer">
+                <span>{meta['file_source_type']}</span>
+                <span>Relevance Score : {round(100*score,1)}%</span>
+                <a href="{get_source_link(meta)}" target="_blank">
+                    <span role="img" aria-label="Open PDF">🔗</span>
+                </a>
+            </div>
         </div>
+        """
     else:
         return f"""
+        <div class="card" id="doc{i}">
+            <div class="card-content">
+                <h2>Doc {i} - {meta['file_title']} - Page {meta['content_page_number'] + 1}</h2>
+                <p>{source.page_content.replace(config["passage_preprompt"], "")}</p>
+            </div>
+            <div class="card-footer">
+                <span>{meta['file_source_type']}</span>
+                <span>Relevance Score : {round(100*score,1)}%</span>
+            </div>
         </div>
+        """
 def parse_output_llm_with_sources(output):