abdelom commited on
Commit
229825e
·
verified ·
1 Parent(s): 04659e4

Update pages/1_Chatbot_FR.py

Browse files
Files changed (1) hide show
  1. pages/1_Chatbot_FR.py +77 -77
pages/1_Chatbot_FR.py CHANGED
@@ -193,54 +193,54 @@ def predict_class(text, max_length=500):
193
  # 3. CLASSIFICATION DATASET & VECTOR STORE
194
  ##############################
195
 
196
- @st.cache_data(show_spinner=False)
197
- def load_classification_dataset():
198
- """
199
- Loads the classification Q&A dataset from the Excel file and returns a DataFrame.
200
- """
201
- df = pd.read_excel("Classification dataset - Q&A.xlsx", sheet_name="Fr")
202
- return df
203
-
204
- @st.cache_resource(show_spinner=False)
205
- def load_classification_vectorstore(persist_dir: str = "./chroma_db_class_fr"):
206
- """
207
- Builds (and persists) a Chroma vector store from the classification Q&A dataset.
208
- Each document contains the answer (Réponse) with metadata including the class ("Classe").
209
- """
210
- df = load_classification_dataset()
211
- # Create documents using the "Réponse" as content and include metadata.
212
- from langchain.schema import Document
213
- documents = []
214
- for _, row in df.iterrows():
215
- documents.append(
216
- Document(
217
- page_content=row["Réponse"],
218
- metadata={
219
- "id": row["ID"],
220
- "Classe": row["Classe"],
221
- "Question": row["Question"]
222
- }
223
- )
224
- )
225
- from langchain.embeddings import HuggingFaceEmbeddings
226
- from langchain.vectorstores import Chroma
227
- embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
228
- vectorstore = Chroma.from_documents(documents, embedding_model, persist_directory=persist_dir)
229
- vectorstore.persist()
230
- return vectorstore
231
-
232
- def load_existing_classification_vectorstore(persist_dir: str = "./chroma_db_class_fr"):
233
- """
234
- Loads an existing Chroma vector store for the classification dataset.
235
- """
236
- from langchain.embeddings import HuggingFaceEmbeddings
237
- from langchain.vectorstores import Chroma
238
- embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
239
- vectorstore = Chroma(
240
- persist_directory=persist_dir,
241
- embedding_function=embedding_model
242
- )
243
- return vectorstore
244
 
245
  ##############################
246
  # 4. PROMPT & LLM FR SETUP
@@ -394,37 +394,37 @@ def main():
394
  st.write(f"**Classe prédite :** {predicted_label}")
395
 
396
  # --- Retrieve final answer using the classification vector store ---
397
- if predicted_label != "Autre":
398
- # Build or load the classification vector store if not already in session_state.
399
- if "class_retriever" not in st.session_state:
400
- # Either create new or load existing
401
- try:
402
- # Attempt to load an existing vector store.
403
- vectorstore_class = load_existing_classification_vectorstore("./chroma_db_class_fr")
404
- except Exception:
405
- # If not found, create it.
406
- vectorstore_class = load_classification_vectorstore("./chroma_db_class_fr")
407
- st.session_state["class_retriever"] = vectorstore_class.as_retriever(
408
- search_type="mmr",
409
- search_kwargs={"k": 1, "lambda_mult": 0.5}
410
- )
411
- # Retrieve the final answer with a metadata filter.
412
- # (Assumes the underlying retriever supports a filter parameter.)
413
- final_docs = st.session_state["class_retriever"].get_relevant_documents(
414
- response_fr, filter={"Classe": predicted_label}
415
- )
416
- if final_docs:
417
- final_answer = final_docs[0].page_content
418
- else:
419
- final_answer = response_fr # fallback if no document found
420
- else:
421
- final_answer = ("Je n'ai pas d'information précise à ce sujet. "
422
- "Souhaitez-vous que je vous mette en contact avec un agent Inwi ?")
423
 
424
- st.write("**Réponse finale :**")
425
- st.write(final_answer)
426
  else:
427
  st.write("Aucun contexte trouvé pour cette question. Essayez autre chose.")
428
 
429
  if __name__ == "__main__":
430
- main()
 
193
  # 3. CLASSIFICATION DATASET & VECTOR STORE
194
  ##############################
195
 
196
+ # @st.cache_data(show_spinner=False)
197
+ # def load_classification_dataset():
198
+ # """
199
+ # Loads the classification Q&A dataset from the Excel file and returns a DataFrame.
200
+ # """
201
+ # df = pd.read_excel("Classification dataset - Q&A.xlsx", sheet_name="Fr")
202
+ # return df
203
+
204
+ # @st.cache_resource(show_spinner=False)
205
+ # def load_classification_vectorstore(persist_dir: str = "./chroma_db_class_fr"):
206
+ # """
207
+ # Builds (and persists) a Chroma vector store from the classification Q&A dataset.
208
+ # Each document contains the answer (Réponse) with metadata including the class ("Classe").
209
+ # """
210
+ # df = load_classification_dataset()
211
+ # # Create documents using the "Réponse" as content and include metadata.
212
+ # from langchain.schema import Document
213
+ # documents = []
214
+ # for _, row in df.iterrows():
215
+ # documents.append(
216
+ # Document(
217
+ # page_content=row["Réponse"],
218
+ # metadata={
219
+ # "id": row["ID"],
220
+ # "Classe": row["Classe"],
221
+ # "Question": row["Question"]
222
+ # }
223
+ # )
224
+ # )
225
+ # from langchain.embeddings import HuggingFaceEmbeddings
226
+ # from langchain.vectorstores import Chroma
227
+ # embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
228
+ # vectorstore = Chroma.from_documents(documents, embedding_model, persist_directory=persist_dir)
229
+ # vectorstore.persist()
230
+ # return vectorstore
231
+
232
+ # def load_existing_classification_vectorstore(persist_dir: str = "./chroma_db_class_fr"):
233
+ # """
234
+ # Loads an existing Chroma vector store for the classification dataset.
235
+ # """
236
+ # from langchain.embeddings import HuggingFaceEmbeddings
237
+ # from langchain.vectorstores import Chroma
238
+ # embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
239
+ # vectorstore = Chroma(
240
+ # persist_directory=persist_dir,
241
+ # embedding_function=embedding_model
242
+ # )
243
+ # return vectorstore
244
 
245
  ##############################
246
  # 4. PROMPT & LLM FR SETUP
 
394
  st.write(f"**Classe prédite :** {predicted_label}")
395
 
396
  # --- Retrieve final answer using the classification vector store ---
397
+ # if predicted_label != "Autre":
398
+ # # Build or load the classification vector store if not already in session_state.
399
+ # if "class_retriever" not in st.session_state:
400
+ # # Either create new or load existing
401
+ # try:
402
+ # # Attempt to load an existing vector store.
403
+ # vectorstore_class = load_existing_classification_vectorstore("./chroma_db_class_fr")
404
+ # except Exception:
405
+ # # If not found, create it.
406
+ # vectorstore_class = load_classification_vectorstore("./chroma_db_class_fr")
407
+ # st.session_state["class_retriever"] = vectorstore_class.as_retriever(
408
+ # search_type="mmr",
409
+ # search_kwargs={"k": 1, "lambda_mult": 0.5}
410
+ # )
411
+ # # Retrieve the final answer with a metadata filter.
412
+ # # (Assumes the underlying retriever supports a filter parameter.)
413
+ # final_docs = st.session_state["class_retriever"].get_relevant_documents(
414
+ # response_fr, filter={"Classe": predicted_label}
415
+ # )
416
+ # if final_docs:
417
+ # final_answer = final_docs[0].page_content
418
+ # else:
419
+ # final_answer = response_fr # fallback if no document found
420
+ # else:
421
+ # final_answer = ("Je n'ai pas d'information précise à ce sujet. "
422
+ # "Souhaitez-vous que je vous mette en contact avec un agent Inwi ?")
423
 
424
+ # st.write("**Réponse finale :**")
425
+ # st.write(final_answer)
426
  else:
427
  st.write("Aucun contexte trouvé pour cette question. Essayez autre chose.")
428
 
429
  if __name__ == "__main__":
430
+ main()