eliot-hub commited on
Commit
bc7d8a5
Β·
1 Parent(s): 982050e
Files changed (2) hide show
  1. app.py +1 -1
  2. tools.py +11 -3
app.py CHANGED
@@ -57,7 +57,7 @@ with gr.Blocks(theme=theme, js=js_func, title="Dataltist", fill_height=True) as
57
  gr.Markdown("# Dataltist Chatbot πŸš€")
58
  chatbot = gr.Chatbot(show_copy_button=True, show_share_button=False, type="messages", scale=1)
59
  msg = gr.Textbox(lines=1, show_label=False, placeholder="Posez vos questions sur l'assurance") # submit_btn=True
60
- clear = gr.ClearButton([msg, chatbot], value="Effacer πŸ—‘")
61
  config = {"configurable": {"thread_id": "1"}}
62
 
63
 
 
57
  gr.Markdown("# Dataltist Chatbot πŸš€")
58
  chatbot = gr.Chatbot(show_copy_button=True, show_share_button=False, type="messages", scale=1)
59
  msg = gr.Textbox(lines=1, show_label=False, placeholder="Posez vos questions sur l'assurance") # submit_btn=True
60
+ # clear = gr.ClearButton([msg, chatbot], value="Effacer πŸ—‘")
61
  config = {"configurable": {"thread_id": "1"}}
62
 
63
 
tools.py CHANGED
@@ -18,7 +18,8 @@ import chromadb
18
  from typing import List
19
  from datasets import load_dataset
20
  from langchain_huggingface import HuggingFaceEmbeddings
21
-
 
22
 
23
  load_dotenv()
24
  # Global params
@@ -40,8 +41,15 @@ memoires_ds = load_dataset("eliot-hub/memoires_vec_800", split="data", token=HF_
40
  batched_ds = memoires_ds.batch(batch_size=41000)
41
  client = chromadb.Client()
42
  collection = client.get_or_create_collection(name="embeddings_mxbai")
43
-
44
-
 
 
 
 
 
 
 
45
 
46
  llm_4o = ChatOpenAI(model="gpt-4o-mini", api_key=OPENAI_API_KEY, temperature=0)
47
 
 
18
  from typing import List
19
  from datasets import load_dataset
20
  from langchain_huggingface import HuggingFaceEmbeddings
21
+ from tqdm import tqdm
22
+ import datetime
23
 
24
  load_dotenv()
25
  # Global params
 
41
  batched_ds = memoires_ds.batch(batch_size=41000)
42
  client = chromadb.Client()
43
  collection = client.get_or_create_collection(name="embeddings_mxbai")
44
+ for batch in tqdm(batched_ds, desc=f"[{datetime.now().strftime('%H:%M:%S')}] Processing dataset batches"):
45
+ collection.add(
46
+ ids=batch["ids"],
47
+ # metadatas=batch["metadatas"],
48
+ documents=batch["documents"],
49
+ embeddings=batch["embeddings"],
50
+ )
51
+ print("DB done")
52
+ del memoires_ds, batched_ds
53
 
54
  llm_4o = ChatOpenAI(model="gpt-4o-mini", api_key=OPENAI_API_KEY, temperature=0)
55