Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2 |
## Setup
|
3 |
# Import the necessary Libraries
|
4 |
import os
|
|
|
5 |
import uuid
|
6 |
import json
|
7 |
import gradio as gr
|
@@ -25,18 +26,34 @@ client = OpenAI(
|
|
25 |
# Define the embedding model and the vectorstore
|
26 |
#embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-small')
|
27 |
#embedding_model = HuggingFaceEmbeddings(model_name='thenlper/gte-small')
|
28 |
-
embedding_model = OpenAIEmbeddings(model="text-embedding-ada-
|
29 |
|
|
|
|
|
30 |
|
31 |
-
#
|
32 |
-
|
|
|
33 |
|
34 |
-
|
|
|
|
|
|
|
35 |
collection_name="10k_embeddings",
|
36 |
-
persist_directory=
|
37 |
-
embedding_function=embedding_model
|
38 |
)
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
# Prepare the logging functionality
|
41 |
log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
|
42 |
log_folder = log_file.parent
|
|
|
2 |
## Setup
|
3 |
# Import the necessary Libraries
|
4 |
import os
|
5 |
+
import shutil
|
6 |
import uuid
|
7 |
import json
|
8 |
import gradio as gr
|
|
|
26 |
# Define the embedding model and the vectorstore
|
27 |
#embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-small')
|
28 |
#embedding_model = HuggingFaceEmbeddings(model_name='thenlper/gte-small')
|
29 |
+
embedding_model = OpenAIEmbeddings(model="text-embedding-ada-002", openai_api_key="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")
|
30 |
|
31 |
+
# Define database path
|
32 |
+
db_path = "./10kdb"
|
33 |
|
34 |
+
# Delete the existing ChromaDB database if dimensions don't match
|
35 |
+
if os.path.exists(db_path):
|
36 |
+
shutil.rmtree(db_path) # Removes the old database
|
37 |
|
38 |
+
# Create the vector database with 1536-dimensional embeddings
|
39 |
+
vectorstore = Chroma.from_documents(
|
40 |
+
documents=report_chunks, # List of text chunks
|
41 |
+
embedding=embedding_model,
|
42 |
collection_name="10k_embeddings",
|
43 |
+
persist_directory=db_path # Path where ChromaDB is stored
|
|
|
44 |
)
|
45 |
|
46 |
+
print("ChromaDB has been successfully created with 1536-dimensional embeddings.")
|
47 |
+
|
48 |
+
# Load the persisted vectorDB
|
49 |
+
#collection_name = '10k_embeddings'
|
50 |
+
|
51 |
+
#tenkdb = Chroma(
|
52 |
+
# collection_name="10k_embeddings",
|
53 |
+
# persist_directory='./reports_db',
|
54 |
+
# embedding_function=embedding_model
|
55 |
+
#)
|
56 |
+
|
57 |
# Prepare the logging functionality
|
58 |
log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
|
59 |
log_folder = log_file.parent
|