Spaces:
Running
Running
Update database.py
Browse files- database.py +5 -4
database.py
CHANGED
@@ -260,9 +260,9 @@ def save_chromadb_to_hf(dataset_name=HF_DATASET_NAME, token=os.getenv("HF_KEY"))
|
|
260 |
dataset = Dataset.from_dict(data)
|
261 |
logger.info(f"Created Hugging Face Dataset with {len(data['code'])} entries")
|
262 |
|
263 |
-
# Push to Hugging Face Hub
|
264 |
-
dataset.push_to_hub(dataset_name, token=token)
|
265 |
-
logger.info(f"Dataset pushed to Hugging Face Hub as {dataset_name}")
|
266 |
# Verify push (optional, could check dataset on Hub)
|
267 |
logger.info(f"Verified Hugging Face dataset push with {len(dataset)} entries")
|
268 |
except Exception as e:
|
@@ -291,4 +291,5 @@ def load_chromadb_from_hf(dataset_name=HF_DATASET_NAME, token=os.getenv("HF_KEY"
|
|
291 |
|
292 |
if __name__ == '__main__':
|
293 |
client = load_chromadb_from_hf()
|
294 |
-
|
|
|
|
260 |
dataset = Dataset.from_dict(data)
|
261 |
logger.info(f"Created Hugging Face Dataset with {len(data['code'])} entries")
|
262 |
|
263 |
+
# Push to Hugging Face Hub, overwriting existing dataset
|
264 |
+
dataset.push_to_hub(dataset_name, token=token, exist_ok=True)
|
265 |
+
logger.info(f"Dataset pushed to Hugging Face Hub as {dataset_name}, overwriting existing dataset")
|
266 |
# Verify push (optional, could check dataset on Hub)
|
267 |
logger.info(f"Verified Hugging Face dataset push with {len(dataset)} entries")
|
268 |
except Exception as e:
|
|
|
291 |
|
292 |
if __name__ == '__main__':
|
293 |
client = load_chromadb_from_hf()
|
294 |
+
collection = create_collection(client, DB_NAME)
|
295 |
+
logger.info(f"Database initialized or loaded from Hugging Face Hub, contains {collection.count()} entries")
|