Spaces:

Lrosado
/

10k_reports

Sleeping

Lrosado commited on Mar 2

Commit

be8635e

verified ·

1 Parent(s): 2075358

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,6 +2,7 @@
 ## Setup
 # Import the necessary Libraries
 import os
 import uuid
 import json
 import gradio as gr
@@ -25,18 +26,34 @@ client = OpenAI(
 # Define the embedding model and the vectorstore
 #embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-small')
 #embedding_model = HuggingFaceEmbeddings(model_name='thenlper/gte-small')
-embedding_model = OpenAIEmbeddings(model="text-embedding-ada-001", openai_api_key="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")
-# Load the persisted vectorDB
-collection_name = '10k_embeddings'
-tenkdb = Chroma(
     collection_name="10k_embeddings",
-    persist_directory='./reports_db',
-    embedding_function=embedding_model
 )
 # Prepare the logging functionality
 log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
 log_folder = log_file.parent

 ## Setup
 # Import the necessary Libraries
 import os
+import shutil
 import uuid
 import json
 import gradio as gr
 # Define the embedding model and the vectorstore
 #embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-small')
 #embedding_model = HuggingFaceEmbeddings(model_name='thenlper/gte-small')
+embedding_model = OpenAIEmbeddings(model="text-embedding-ada-002", openai_api_key="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")
+# Define database path
+db_path = "./10kdb"
+# Delete the existing ChromaDB database if dimensions don't match
+if os.path.exists(db_path):
+    shutil.rmtree(db_path)  # Removes the old database
+# Create the vector database with 1536-dimensional embeddings
+vectorstore = Chroma.from_documents(
+    documents=report_chunks,  # List of text chunks
+    embedding=embedding_model,
     collection_name="10k_embeddings",
+    persist_directory=db_path  # Path where ChromaDB is stored
 )
+print("ChromaDB has been successfully created with 1536-dimensional embeddings.")
+# Load the persisted vectorDB
+#collection_name = '10k_embeddings'
+#tenkdb = Chroma(
+#    collection_name="10k_embeddings",
+#    persist_directory='./reports_db',
+#    embedding_function=embedding_model
+#)
 # Prepare the logging functionality
 log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
 log_folder = log_file.parent