momenaca commited on
Commit
ae465d3
·
1 Parent(s): 648fe8a

add feature for hackathon management

Browse files
poetry.lock CHANGED
The diff for this file is too large to render. See raw diff
 
pyproject.toml CHANGED
@@ -4,24 +4,27 @@ version = "0.1.0"
4
  description = ""
5
  authors = ["Miguel Omenaca Muro <[email protected]>"]
6
  readme = "README.md"
7
- package-mode = true
8
 
9
  [tool.poetry.dependencies]
10
  python = "^3.10"
11
- langchain = "^0.2.5"
12
  gradio = "4.37.2"
13
  sentence-transformers = "2.2.2"
14
  msal = "^1.28.1"
15
- langchain-openai = "^0.1.8"
16
  qdrant-client = "^1.9.1"
17
  loadenv = "^0.1.1"
18
  datasets = "^2.20.0"
19
- langchain-community = "^0.2.5"
20
  transformers = "4.39.0"
21
  azure-search-documents = "^11.4.0"
22
  azure-identity = "^1.17.1"
23
  load-dotenv = "^0.1.0"
24
  python-dotenv = "^1.0.1"
 
 
 
 
 
 
25
 
26
 
27
  [build-system]
 
4
  description = ""
5
  authors = ["Miguel Omenaca Muro <[email protected]>"]
6
  readme = "README.md"
7
+ package-mode = false
8
 
9
  [tool.poetry.dependencies]
10
  python = "^3.10"
 
11
  gradio = "4.37.2"
12
  sentence-transformers = "2.2.2"
13
  msal = "^1.28.1"
 
14
  qdrant-client = "^1.9.1"
15
  loadenv = "^0.1.1"
16
  datasets = "^2.20.0"
 
17
  transformers = "4.39.0"
18
  azure-search-documents = "^11.4.0"
19
  azure-identity = "^1.17.1"
20
  load-dotenv = "^0.1.0"
21
  python-dotenv = "^1.0.1"
22
+ langchain-groq = "^0.2.1"
23
+ langchain-openai = "^0.2.6"
24
+ langchain-community = "^0.3.5"
25
+ langchain = "^0.3.7"
26
+ huggingface-hub = "< 0.26"
27
+ fastapi = "0.111.0"
28
 
29
 
30
  [build-system]
requirements.txt CHANGED
The diff for this file is too large to render. See raw diff
 
spinoza_project/config_public.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ demo_name: Spinoza Q&A
2
+ tabs:
3
+ GIEC et IPBES: "*Outil dédié aux rapports du GIEC et de l'IPBES.*"
4
+ Textes Juridiques: "*Outil dédié aux codes Français modifiés par la loi climat (21/73).*"
5
+ Organismes publics: "*Outil dédié aux données centrées sur les organismes publics (CITEPA, HCC, GREC SUD, ORCAE, OFB).*"
6
+ ADEME:
7
+ "*Outil dédié aux données issues de l'ADEME et nous avons sélectionnés notamment différentes catégories de rapports:*\n
8
+ * *Les guides mis à disposition de la population*\n
9
+ * *Les rapport d'expériences sur des nouvelles technologies*\n
10
+ * *Des études et recherches sur des impacts locaux*\n
11
+ * *Des documents institutionnels (analyses demandées par la France & rapports d'activité)*\n
12
+ * *Les plans de transition sectoriels pour les secteurs industriels les plus émetteurs : (verre, papier, ciment, acier, aluminium, chimie, sucre)*"
13
+ Hackathon: "*Créez votre propre agent personnalisé.*"
14
+
15
+ source_mapping:
16
+ GIEC et IPBES: "Science"
17
+ Textes Juridiques: "Loi"
18
+ Organismes publics: "Organismes publics"
19
+ ADEME: "ADEME"
20
+ Hackathon: "Hackathon"
21
+
22
+ prompt_naming:
23
+ Science: "Science"
24
+ Loi: "Loi"
25
+ Organismes publics: "Organismes publics"
26
+ ADEME: "ADEME"
27
+ Presse: "Presse"
28
+ Hackathon: "Hackathon"
29
+
30
+ query_preprompt: "query: "
31
+ passage_preprompt: "passage: "
32
+ embedding_model: "intfloat/multilingual-e5-base"
33
+ num_document_retrieved: 5
34
+ min_similarity: 0.05
35
+
36
+ ## Chat API
37
+ user_token: "user"
38
+ assistant_token: "assistant"
39
+ system_token: "system"
40
+ stop_token: "" ## useless in chat mode
spinoza_project/prompt_Hackathon.yaml ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ role_instruction:
2
+ prompt:
3
+ [
4
+ "You are Spinoza Fact Checker, an AI Assistant developed by Ekimetrics.",
5
+ "You are given a question and extracted passages for a hackathon.",
6
+ "Your role is to provide unclear and non structured answers based on the passages provided.",
7
+ "Avoid citing sources.",
8
+ "If the question is not related to climate or environmental topics, try to invent a solution."
9
+ ]
10
+ type: "system"
11
+
12
+ source_prompt:
13
+ prompt:
14
+ [
15
+ "Passages:",
16
+ "{sources}",
17
+ "",
18
+ "If the question is unrelated to climate or environmental regulation, try to invent a solution."
19
+ ]
20
+ type: "instruction"
21
+
22
+ question_answering_prompt:
23
+ prompt:
24
+ [
25
+ "Answer the following question: {question}",
26
+ "While respecting the following guidelines :",
27
+ "- If the passages have useful facts or numbers, use them in your answer.",
28
+ "- Do not use the sentence 'Doc i says ...' to say where information came from.",
29
+ "- If the documents fail to have the information needed to answer the question, explain what in the extracts could be interesting nevertheless.",
30
+ "- Always suggest as a conclusion other prompts closed to the original one that could lead the journalist to discover new data and information. For example, rephrase the original question, make it more precise, or change the topic of the question while remaining in the same theme. Use bullet points",
31
+ "- Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.",
32
+ "- If it makes sense, use bullet points and lists to make your answers easier to understand.",
33
+ "- You do not need to use every passage. Only use the ones that help answer the question.",
34
+ "- If a specific location is mentioned in the question, make it the core of your answer and follow the //specific guidelines//",
35
+ "",
36
+ "//specific guidelines//",
37
+ "if [the question is open and broad] then [:",
38
+ "- If the documents do not have the information needed to answer the question, say that you don't have enough information to answer directly to this question - it must be at the beginning of the text.",
39
+ "- If the documents fail to have the information needed to answer the question, explain what in the extracts could be interesting nevertheless.",
40
+ "- Start every paragraph with a question, and answer the question using different key elements taken from the sources ",
41
+ "- If the passages have useful facts or numbers, use them in your answer.",
42
+ "- When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.",
43
+ "- Do not use the sentence 'Doc i says ...' to say where information came from.",
44
+ "- If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]",
45
+ "- Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.",
46
+ "- If it makes sense, use bullet points and lists to make your answers easier to understand.",
47
+ "- You do not need to use every passage. Only use the ones that help answer the question.",
48
+ "- If the documents do not have the information needed to answer the question, just say you do not have enough information.",
49
+ "- Make a clear distinction between information about a /location/ named in the question and other regions.",
50
+ " - First you must display information about the precise /location/",
51
+ " - then clearly state that you have information about /other places/,",
52
+ " - the, display information about /other places/.",
53
+ "- Always suggest as a conclusion other prompts closed to the original one that could lead the journalist to discover new data and information. For example, rephrase the original question, make it more precise, or change the topic of the question while remaining in the same theme. Use bullet points]",
54
+ "",
55
+ "if [the question is factual and precise] then [",
56
+ "- If the documents do not have the information needed to answer the question, say that you don't have enough information to answer directly to this question - it must be at the beginning of the text.",
57
+ "- If the documents fail to have the information needed to answer the question, explain what in the extracts could be interesting nevertheless.",
58
+ "- Only answer the question",
59
+ "- Use bullet points and numbers",
60
+ "- If the passages have useful facts or numbers, use them in your answer.",
61
+ "- When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.",
62
+ "- Do not use the sentence 'Doc i says ...' to say where information came from.",
63
+ "- If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]",
64
+ "- Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.",
65
+ "- If it makes sense, use bullet points and lists to make your answers easier to understand.",
66
+ "- You do not need to use every passage. Only use the ones that help answer the question.",
67
+ "- If the documents do not have the information needed to answer the question, just say you do not have enough information.",
68
+ "- Make a clear distinction between information about a /location/ named in the question and other regions.",
69
+ " - First you must display information about the precise /location/",
70
+ " - then clearly state that you have information about /other places/,",
71
+ " - the, display information about /other places/",
72
+ "- Always suggest as a conclusion other prompts closed to the original one that could lead the journalist to discover new data and information. For example, rephrase the original question, make it more precise, or change the topic of the question while remaining in the same theme. Use bullet points]",
73
+ "-Awnser in French"
74
+ ]
75
+ type: "prompt"
76
+
77
+ reformulation_prompt:
78
+ prompt: [
79
+ "Reformulez le message de l'utilisateur en une question autonome et concise en français, en tenant compte du contexte fourni par la question initiale.",
80
+ "Cette question servira à rechercher des documents pertinents dans une liste d'articles de presse.",
81
+ "Si la question est trop vague ou ambiguë, reformulez-la pour la rendre plus précise et ainsi augmenter les chances de trouver des documents pertinents dans ce corpus.",
82
+ "Ajoutez des éléments contextuels si nécessaire, tout en conservant la pertinence du sujet principal.",
83
+ "Si la question est déjà claire, reformulez-la simplement en gardant son essence.",
84
+ "",
85
+ "Exemples:",
86
+ "---",
87
+ "user:",
88
+ "Quels enjeux autour de l'eau?",
89
+ "",
90
+ "assistant:",
91
+ "Quels articles abordent les enjeux liés à l'eau et sous quels aspects sont-ils traités?",
92
+ "---",
93
+ "user:",
94
+ "Quelles obligations de faire un bilan carbone?",
95
+ "",
96
+ "assistant:",
97
+ "Quelles sont les obligations légales liées au bilan carbone et comment ces obligations sont-elles traitées dans les articles?",
98
+ "---",
99
+ "user:",
100
+ "{question}",
101
+ "",
102
+ ]
103
+ type: "prompt"
spinoza_project/source/backend/llm_utils.py CHANGED
@@ -1,6 +1,6 @@
1
- from langchain_openai import AzureChatOpenAI
2
  from msal import ConfidentialClientApplication
3
- from langchain_openai import AzureOpenAIEmbeddings
 
4
  from langchain.vectorstores.azuresearch import AzureSearch
5
  import os
6
 
@@ -42,75 +42,44 @@ class LLM:
42
  return predictions
43
 
44
 
45
- def get_token() -> str | None:
46
- app = ConfidentialClientApplication(
47
- client_id=os.getenv("CLIENT_ID"),
48
- client_credential=os.getenv("CLIENT_SECRET"),
49
- authority=f"https://login.microsoftonline.com/{os.getenv('TENANT_ID')}",
50
- )
51
- result = app.acquire_token_for_client(scopes=[os.getenv("SCOPE")])
52
- if result is not None:
53
- return result["access_token"]
54
-
55
-
56
- def get_llm():
57
- os.environ["OPENAI_API_KEY"] = get_token()
58
- os.environ["AZURE_OPENAI_ENDPOINT"] = (
59
- f"{os.getenv('OPENAI_API_ENDPOINT')}{os.getenv('DEPLOYMENT_ID')}/chat/completions?api-version={os.getenv('OPENAI_API_VERSION')}"
60
- )
61
-
62
- return LLM(AzureChatOpenAI(temperature=0))
63
-
64
-
65
  def get_llm_api():
66
- return LLM(
67
- AzureChatOpenAI(
68
- deployment_name=os.getenv("DEPLOYMENT_NAME"),
69
- openai_api_key=os.getenv("OPENAI_API_KEY"),
70
- azure_endpoint=os.getenv("OPENAI_API_BASE"),
71
- openai_api_version=os.getenv("OPENAI_API_VERSION"),
72
- streaming=True,
73
- temperature=0,
74
- max_tokens=2048, # 1024,
75
- stop=["<|im_end|>"],
 
 
 
76
  )
77
- )
78
-
79
-
80
- def get_vectorstore(index_name, model="text-embedding-ada-002"):
81
- os.environ["AZURE_OPENAI_ENDPOINT"] = (
82
- f"{os.getenv('OPENAI_API_ENDPOINT')}{os.getenv('DEPLOYMENT_EMB_ID')}/embeddings?api-version={os.getenv('OPENAI_API_VERSION')}"
83
- )
84
- os.environ["AZURE_OPENAI_API_KEY"] = get_token()
85
 
86
- aoai_embeddings = AzureOpenAIEmbeddings(
87
- azure_deployment=model,
88
- openai_api_version=os.getenv("OPENAI_API_VERSION"),
89
- )
90
-
91
- vector_store: AzureSearch = AzureSearch(
92
- azure_search_endpoint=os.getenv("VECTOR_STORE_ADDRESS"),
93
- azure_search_key=os.getenv("VECTOR_STORE_PASSWORD"),
94
- index_name=index_name,
95
- embedding_function=aoai_embeddings.embed_query,
96
- )
97
-
98
- return vector_store
99
 
100
 
101
  def get_vectorstore_api(index_name):
102
  aoai_embeddings = AzureOpenAIEmbeddings(
103
  model="text-embedding-ada-002",
104
- api_key=os.getenv("OPENAI_API_KEY"),
105
- azure_endpoint=os.environ["AZURE_ENDPOINT_API"],
106
- openai_api_version=os.getenv("OPENAI_API_VERSION"),
 
107
  )
108
 
109
- os.environ["AZURE_OPENAI_API_KEY"] = get_token()
110
-
111
  vector_store: AzureSearch = AzureSearch(
112
- azure_search_endpoint=os.getenv("VECTOR_STORE_ADDRESS"),
113
- azure_search_key=os.getenv("VECTOR_STORE_PASSWORD"),
114
  index_name=index_name,
115
  embedding_function=aoai_embeddings.embed_query,
116
  )
 
 
1
  from msal import ConfidentialClientApplication
2
+ from langchain_openai import AzureOpenAIEmbeddings, AzureChatOpenAI
3
+ from langchain_groq import ChatGroq
4
  from langchain.vectorstores.azuresearch import AzureSearch
5
  import os
6
 
 
42
  return predictions
43
 
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  def get_llm_api():
46
+ if os.getenv("EKI_OPENAI_LLM_DEPLOYMENT_NAME"):
47
+ print("Using Azure OpenAI API")
48
+ return LLM(
49
+ AzureChatOpenAI(
50
+ deployment_name=os.getenv("EKI_OPENAI_LLM_DEPLOYMENT_NAME"),
51
+ openai_api_key=os.getenv("EKI_OPENAI_API_KEY"),
52
+ azure_endpoint=os.getenv("EKI_OPENAI_LLM_API_ENDPOINT"),
53
+ openai_api_version=os.getenv("EKI_OPENAI_API_VERSION"),
54
+ streaming=True,
55
+ temperature=0,
56
+ max_tokens=2048, # 1024,
57
+ stop=["<|im_end|>"],
58
+ )
59
  )
 
 
 
 
 
 
 
 
60
 
61
+ else:
62
+ print("Using GROQ API")
63
+ return LLM(
64
+ ChatGroq(
65
+ model="llama3-groq-70b-8192-tool-use-preview", # llama-3.1-8b-instant / llama3-groq-70b-8192-tool-use-preview / llama-3.2-90b-text-preview / llama-3.2-3b-preview
66
+ temperature=0,
67
+ )
68
+ )
 
 
 
 
 
69
 
70
 
71
  def get_vectorstore_api(index_name):
72
  aoai_embeddings = AzureOpenAIEmbeddings(
73
  model="text-embedding-ada-002",
74
+ azure_deployment=os.getenv("EKI_OPENAI_EMB_DEPLOYMENT_NAME"),
75
+ api_key=os.getenv("EKI_OPENAI_API_KEY"),
76
+ azure_endpoint=os.environ["EKI_OPENAI_EMB_API_ENDPOINT"],
77
+ openai_api_version=os.getenv("EKI_OPENAI_API_VERSION"),
78
  )
79
 
 
 
80
  vector_store: AzureSearch = AzureSearch(
81
+ azure_search_endpoint=os.getenv("EKI_VECTOR_STORE_ADDRESS"),
82
+ azure_search_key=os.getenv("EKI_VECTOR_STORE_PASSWORD"),
83
  index_name=index_name,
84
  embedding_function=aoai_embeddings.embed_query,
85
  )
spinoza_project/source/frontend/gradio_utils.py CHANGED
@@ -1,12 +1,9 @@
1
  import gradio as gr
 
2
  import yaml
3
  from langchain.prompts.chat import ChatPromptTemplate
4
  from huggingface_hub import hf_hub_download
5
- from spinoza_project.source.frontend.utils import (
6
- make_html_source,
7
- make_html_presse_source,
8
- make_html_afp_source,
9
- )
10
  from spinoza_project.source.backend.prompt_utils import (
11
  to_chat_instruction,
12
  SpecialTokens,
@@ -16,8 +13,13 @@ from spinoza_project.source.backend.document_store import pickle_to_document_sto
16
 
17
 
18
  def get_config():
19
- with open("./spinoza_project/config.yaml") as f:
20
- return yaml.full_load(f)
 
 
 
 
 
21
 
22
 
23
  def get_prompts(config):
@@ -105,12 +107,14 @@ def zip_longest_fill(*args, fillvalue=None):
105
  return
106
 
107
  cond = True
108
- fillvalues = [None] * len(iterators)
109
  while cond:
110
  values = []
111
  for i, it in enumerate(iterators):
112
  try:
113
  value = next(it)
 
 
114
  except StopIteration:
115
  value = fillvalues[i]
116
  values.append(value)
@@ -129,7 +133,10 @@ def start_agents():
129
  gr.Info(message="Les agents et Spinoza démarent leurs analyses...", duration=3)
130
 
131
  return [
132
- (None, "J'attends que tous les agents aient terminé pour générer une réponse...")
 
 
 
133
  ]
134
 
135
 
@@ -191,52 +198,32 @@ def get_sources(questions, qdrants, bdd_presse, bdd_afp, config):
191
  min_similarity = config["min_similarity"]
192
  text, formated = [], []
193
  for i, (question, tab) in enumerate(zip(questions, list(config["tabs"].keys()))):
194
- if tab == "Presse":
195
- sources = bdd_presse.similarity_search_with_relevance_scores(
196
- question.replace("<p>", "").replace("</p>\n", ""), k=k
197
- )
198
- sources = [
199
- (doc, score) for doc, score in sources if score >= min_similarity
200
- ]
201
- formated.extend(
202
- [
203
- make_html_presse_source(source[0], j, source[1])
204
- for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
205
- ]
206
- )
207
-
208
- elif tab == "AFP":
209
- sources = bdd_afp.similarity_search_with_relevance_scores(
210
  question.replace("<p>", "").replace("</p>\n", ""), k=k
211
  )
212
- sources = [
213
- (doc, score) for doc, score in sources if score >= min_similarity
214
- ]
215
- formated.extend(
216
- [
217
- make_html_afp_source(source[0], j, source[1])
218
- for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
219
- ]
220
- )
221
-
222
- else:
223
- sources = qdrants[
224
- config["source_mapping"][tab]
225
- ].similarity_search_with_relevance_scores(
226
- config["query_preprompt"]
227
- + question.replace("<p>", "").replace("</p>\n", ""),
228
- k=k,
229
  )
230
- sources = [
231
- (doc, score) for doc, score in sources if score >= min_similarity
 
 
 
 
232
  ]
233
- formated.extend(
234
- [
235
- make_html_source(source[0], j, source[1], config)
236
- for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
237
- ]
238
- )
239
-
240
  text.extend(
241
  [
242
  "\n\n".join(
 
1
  import gradio as gr
2
+ import os
3
  import yaml
4
  from langchain.prompts.chat import ChatPromptTemplate
5
  from huggingface_hub import hf_hub_download
6
+ from spinoza_project.source.frontend.utils import make_html_source
 
 
 
 
7
  from spinoza_project.source.backend.prompt_utils import (
8
  to_chat_instruction,
9
  SpecialTokens,
 
13
 
14
 
15
  def get_config():
16
+ if os.getenv("EKI_OPENAI_EMB_DEPLOYMENT_NAME"):
17
+ with open("./spinoza_project/config.yaml") as f:
18
+ return yaml.full_load(f)
19
+
20
+ else:
21
+ with open("./spinoza_project/config_public.yaml") as f:
22
+ return yaml.full_load(f)
23
 
24
 
25
  def get_prompts(config):
 
107
  return
108
 
109
  cond = True
110
+ fillvalues = [fillvalue] * len(iterators)
111
  while cond:
112
  values = []
113
  for i, it in enumerate(iterators):
114
  try:
115
  value = next(it)
116
+ if not value:
117
+ value = next(it)
118
  except StopIteration:
119
  value = fillvalues[i]
120
  values.append(value)
 
133
  gr.Info(message="Les agents et Spinoza démarent leurs analyses...", duration=3)
134
 
135
  return [
136
+ (
137
+ None,
138
+ "J'attends que tous les agents aient terminé pour générer une réponse...",
139
+ )
140
  ]
141
 
142
 
 
198
  min_similarity = config["min_similarity"]
199
  text, formated = [], []
200
  for i, (question, tab) in enumerate(zip(questions, list(config["tabs"].keys()))):
201
+ sources = (
202
+ bdd_presse.similarity_search_with_relevance_scores(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  question.replace("<p>", "").replace("</p>\n", ""), k=k
204
  )
205
+ if tab == "Presse"
206
+ else (
207
+ bdd_afp.similarity_search_with_relevance_scores(
208
+ question.replace("<p>", "").replace("</p>\n", ""), k=k
209
+ )
210
+ if tab == "AFP"
211
+ else qdrants[
212
+ config["source_mapping"][tab]
213
+ ].similarity_search_with_relevance_scores(
214
+ config["query_preprompt"]
215
+ + question.replace("<p>", "").replace("</p>\n", ""),
216
+ k=k,
217
+ )
 
 
 
 
218
  )
219
+ )
220
+ sources = [(doc, score) for doc, score in sources if score >= min_similarity]
221
+ formated.extend(
222
+ [
223
+ make_html_source(source[0], j, source[1], config)
224
+ for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
225
  ]
226
+ )
 
 
 
 
 
 
227
  text.extend(
228
  [
229
  "\n\n".join(
spinoza_project/source/frontend/utils.py CHANGED
@@ -58,72 +58,82 @@ def get_source_link(metadata):
58
  return metadata["file_url"] + f"#page={metadata['content_page_number'] + 1}"
59
 
60
 
61
- def make_html_presse_source(source, i, score):
62
  meta = source.metadata
63
- if meta["file_url"] != "none":
64
  return f"""
65
- <div class="card" id="doc{i}">
66
- <div class="card-content">
67
- <h2>Doc {i} - {meta['file_title']} - {meta['file_publisher']}</h2>
68
- <p>{source.page_content}</p>
 
 
 
 
 
69
  </div>
70
- <div class="card-footer">
71
- <span>{meta['file_source_type']}</span>
72
- <span>Relevance Score : {round(100*score,1)}%</span>
73
- <a href={meta['file_url']} target="_blank">
74
- <span role="img" aria-label="Open PDF">🔗</span>
75
- </a>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  </div>
77
- </div>
78
- """
79
  else:
80
  return f"""
81
- <div class="card" id="doc{i}">
82
- <div class="card-content">
83
- <h2>Doc {i} - {meta['file_title']} - {meta['file_publisher']}</h2>
84
- <p>{source.page_content}</p>
85
- </div>
86
- <div class="card-footer">
87
- <span>{meta['file_source_type']}</span>
88
- <span>Relevance Score : {round(100*score,1)}%</span>
 
89
  </div>
90
- </div>
91
- """
92
-
93
-
94
- def make_html_afp_source(source, i, score):
95
- meta = source.metadata
96
- return f"""
97
- <div class="card" id="doc{i}">
98
- <div class="card-content">
99
- <h2>Doc {i} - {meta['file_title']} - {meta['file_type']} AFP</h2>
100
- <p>{source.page_content}</p>
101
- </div>
102
- <div class="card-footer">
103
- <span>{meta['file_source_type']}</span>
104
- <span>Relevance Score : {round(100*score,1)}%</span>
105
- </div>
106
- </div>
107
- """
108
-
109
-
110
- def make_html_source(source, i, score, config):
111
- meta = source.metadata
112
- return f"""
113
- <div class="card" id="doc{i}">
114
- <div class="card-content">
115
- <h2>Doc {i} - {meta['file_title']} - Page {meta['content_page_number'] + 1}</h2>
116
- <p>{source.page_content.replace(config["passage_preprompt"], "")}</p>
117
- </div>
118
- <div class="card-footer">
119
- <span>{meta['file_source_type']}</span>
120
- <span>Relevance Score : {round(100*score,1)}%</span>
121
- <a href="{get_source_link(meta)}" target="_blank">
122
- <span role="img" aria-label="Open PDF">🔗</span>
123
- </a>
124
- </div>
125
- </div>
126
- """
127
 
128
 
129
  def parse_output_llm_with_sources(output):
 
58
  return metadata["file_url"] + f"#page={metadata['content_page_number'] + 1}"
59
 
60
 
61
+ def make_html_source(source, i, score, config):
62
  meta = source.metadata
63
+ if meta["file_source_type"] == "AFP":
64
  return f"""
65
+ <div class="card" id="doc{i}">
66
+ <div class="card-content">
67
+ <h2>Doc {i} - {meta['file_title']} - {meta['file_type']} AFP</h2>
68
+ <p>{source.page_content}</p>
69
+ </div>
70
+ <div class="card-footer">
71
+ <span>{meta['file_source_type']}</span>
72
+ <span>Relevance Score : {round(100*score,1)}%</span>
73
+ </div>
74
  </div>
75
+ """
76
+
77
+ if meta["file_source_type"] == "Presse":
78
+ if meta["file_url"] != "none":
79
+ return f"""
80
+ <div class="card" id="doc{i}">
81
+ <div class="card-content">
82
+ <h2>Doc {i} - {meta['file_title']} - {meta['file_publisher']}</h2>
83
+ <p>{source.page_content}</p>
84
+ </div>
85
+ <div class="card-footer">
86
+ <span>{meta['file_source_type']}</span>
87
+ <span>Relevance Score : {round(100*score,1)}%</span>
88
+ <a href={meta['file_url']} target="_blank">
89
+ <span role="img" aria-label="Open PDF">🔗</span>
90
+ </a>
91
+ </div>
92
+ </div>
93
+ """
94
+ else:
95
+ return f"""
96
+ <div class="card" id="doc{i}">
97
+ <div class="card-content">
98
+ <h2>Doc {i} - {meta['file_title']} - {meta['file_publisher']}</h2>
99
+ <p>{source.page_content}</p>
100
+ </div>
101
+ <div class="card-footer">
102
+ <span>{meta['file_source_type']}</span>
103
+ <span>Relevance Score : {round(100*score,1)}%</span>
104
+ </div>
105
+ </div>
106
+ """
107
+
108
+ if meta["file_url"]:
109
+ return f"""
110
+ <div class="card" id="doc{i}">
111
+ <div class="card-content">
112
+ <h2>Doc {i} - {meta['file_title']} - Page {meta['content_page_number'] + 1}</h2>
113
+ <p>{source.page_content.replace(config["passage_preprompt"], "")}</p>
114
+ </div>
115
+ <div class="card-footer">
116
+ <span>{meta['file_source_type']}</span>
117
+ <span>Relevance Score : {round(100*score,1)}%</span>
118
+ <a href="{get_source_link(meta)}" target="_blank">
119
+ <span role="img" aria-label="Open PDF">🔗</span>
120
+ </a>
121
+ </div>
122
  </div>
123
+ """
 
124
  else:
125
  return f"""
126
+ <div class="card" id="doc{i}">
127
+ <div class="card-content">
128
+ <h2>Doc {i} - {meta['file_title']} - Page {meta['content_page_number'] + 1}</h2>
129
+ <p>{source.page_content.replace(config["passage_preprompt"], "")}</p>
130
+ </div>
131
+ <div class="card-footer">
132
+ <span>{meta['file_source_type']}</span>
133
+ <span>Relevance Score : {round(100*score,1)}%</span>
134
+ </div>
135
  </div>
136
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
 
139
  def parse_output_llm_with_sources(output):