Lrosado commited on
Commit
be8635e
·
verified ·
1 Parent(s): 2075358

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -6
app.py CHANGED
@@ -2,6 +2,7 @@
2
  ## Setup
3
  # Import the necessary Libraries
4
  import os
 
5
  import uuid
6
  import json
7
  import gradio as gr
@@ -25,18 +26,34 @@ client = OpenAI(
25
  # Define the embedding model and the vectorstore
26
  #embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-small')
27
  #embedding_model = HuggingFaceEmbeddings(model_name='thenlper/gte-small')
28
- embedding_model = OpenAIEmbeddings(model="text-embedding-ada-001", openai_api_key="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")
29
 
 
 
30
 
31
- # Load the persisted vectorDB
32
- collection_name = '10k_embeddings'
 
33
 
34
- tenkdb = Chroma(
 
 
 
35
  collection_name="10k_embeddings",
36
- persist_directory='./reports_db',
37
- embedding_function=embedding_model
38
  )
39
 
 
 
 
 
 
 
 
 
 
 
 
40
  # Prepare the logging functionality
41
  log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
42
  log_folder = log_file.parent
 
2
  ## Setup
3
  # Import the necessary Libraries
4
  import os
5
+ import shutil
6
  import uuid
7
  import json
8
  import gradio as gr
 
26
  # Define the embedding model and the vectorstore
27
  #embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-small')
28
  #embedding_model = HuggingFaceEmbeddings(model_name='thenlper/gte-small')
29
+ embedding_model = OpenAIEmbeddings(model="text-embedding-ada-002", openai_api_key="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")
30
 
31
+ # Define database path
32
+ db_path = "./10kdb"
33
 
34
+ # Delete the existing ChromaDB database if dimensions don't match
35
+ if os.path.exists(db_path):
36
+ shutil.rmtree(db_path) # Removes the old database
37
 
38
+ # Create the vector database with 1536-dimensional embeddings
39
+ vectorstore = Chroma.from_documents(
40
+ documents=report_chunks, # List of text chunks
41
+ embedding=embedding_model,
42
  collection_name="10k_embeddings",
43
+ persist_directory=db_path # Path where ChromaDB is stored
 
44
  )
45
 
46
+ print("ChromaDB has been successfully created with 1536-dimensional embeddings.")
47
+
48
+ # Load the persisted vectorDB
49
+ #collection_name = '10k_embeddings'
50
+
51
+ #tenkdb = Chroma(
52
+ # collection_name="10k_embeddings",
53
+ # persist_directory='./reports_db',
54
+ # embedding_function=embedding_model
55
+ #)
56
+
57
  # Prepare the logging functionality
58
  log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
59
  log_folder = log_file.parent