Spaces:
Running
Running
Update database.py
Browse files- database.py +6 -6
database.py
CHANGED
@@ -7,14 +7,14 @@ import numpy as np
|
|
7 |
from datasets import Dataset, load_dataset
|
8 |
from transformers import AutoTokenizer, AutoModel
|
9 |
import torch
|
10 |
-
import
|
11 |
-
dotenv.load()
|
12 |
|
|
|
|
|
13 |
|
14 |
-
# User-configurable variables
|
15 |
DB_NAME = "python_programs" # ChromaDB collection name
|
16 |
HF_DATASET_NAME = "python_program_vectors" # Hugging Face Dataset name
|
17 |
-
HF_KEY = os.getenv("HF_KEY") # Replace with your Hugging Face API token
|
18 |
PERSIST_DIR = "./chroma_data" # Directory for persistent storage (optional)
|
19 |
USE_GPU = False # Default to CPU, set to True for GPU if available
|
20 |
|
@@ -190,7 +190,7 @@ def generate_semantic_vector(description, use_gpu=USE_GPU):
|
|
190 |
vector = vector[:6]
|
191 |
return vector
|
192 |
|
193 |
-
def save_chromadb_to_hf(dataset_name=HF_DATASET_NAME, token=HF_KEY):
|
194 |
"""Save ChromaDB data to Hugging Face Dataset."""
|
195 |
client = init_chromadb()
|
196 |
collection = create_collection(client)
|
@@ -211,7 +211,7 @@ def save_chromadb_to_hf(dataset_name=HF_DATASET_NAME, token=HF_KEY):
|
|
211 |
dataset.push_to_hub(dataset_name, token=token)
|
212 |
print(f"Dataset pushed to Hugging Face Hub as {dataset_name}")
|
213 |
|
214 |
-
def load_chromadb_from_hf(dataset_name=HF_DATASET_NAME, token=HF_KEY):
|
215 |
"""Load ChromaDB data from Hugging Face Dataset, handle empty dataset."""
|
216 |
try:
|
217 |
dataset = load_dataset(dataset_name, split="train", token=token)
|
|
|
7 |
from datasets import Dataset, load_dataset
|
8 |
from transformers import AutoTokenizer, AutoModel
|
9 |
import torch
|
10 |
+
from dotenv import load_dotenv
|
|
|
11 |
|
12 |
+
# Load environment variables
|
13 |
+
load_dotenv()
|
14 |
|
15 |
+
# User-configurable variables (no HF_KEY hardcoded here)
|
16 |
DB_NAME = "python_programs" # ChromaDB collection name
|
17 |
HF_DATASET_NAME = "python_program_vectors" # Hugging Face Dataset name
|
|
|
18 |
PERSIST_DIR = "./chroma_data" # Directory for persistent storage (optional)
|
19 |
USE_GPU = False # Default to CPU, set to True for GPU if available
|
20 |
|
|
|
190 |
vector = vector[:6]
|
191 |
return vector
|
192 |
|
193 |
+
def save_chromadb_to_hf(dataset_name=HF_DATASET_NAME, token=os.getenv("HF_KEY")):
|
194 |
"""Save ChromaDB data to Hugging Face Dataset."""
|
195 |
client = init_chromadb()
|
196 |
collection = create_collection(client)
|
|
|
211 |
dataset.push_to_hub(dataset_name, token=token)
|
212 |
print(f"Dataset pushed to Hugging Face Hub as {dataset_name}")
|
213 |
|
214 |
+
def load_chromadb_from_hf(dataset_name=HF_DATASET_NAME, token=os.getenv("HF_KEY")):
|
215 |
"""Load ChromaDB data from Hugging Face Dataset, handle empty dataset."""
|
216 |
try:
|
217 |
dataset = load_dataset(dataset_name, split="train", token=token)
|