broadfield-dev commited on
Commit
17dfbee
·
verified ·
1 Parent(s): 0b25cd8

Delete save_to_hf.py

Browse files
Files changed (1) hide show
  1. save_to_hf.py +0 -26
save_to_hf.py DELETED
@@ -1,26 +0,0 @@
1
- # save_to_hf.py
2
- from datasets import Dataset
3
- import chromadb
4
- from database import init_chromadb, create_collection
5
-
6
- def save_chromadb_to_hf(dataset_name="python_program_vectors"):
7
- client = init_chromadb()
8
- collection = create_collection(client)
9
-
10
- # Fetch all data from ChromaDB
11
- results = collection.get(include=["documents", "metadatas", "embeddings"])
12
- data = {
13
- "code": results["documents"],
14
- "sequence": [meta["sequence"] for meta in results["metadatas"]],
15
- "vectors": results["embeddings"]
16
- }
17
-
18
- # Create a Hugging Face Dataset
19
- dataset = Dataset.from_dict(data)
20
-
21
- # Push to Hugging Face Hub
22
- dataset.push_to_hub(dataset_name, token="YOUR_HUGGINGFACE_TOKEN")
23
- print(f"Dataset pushed to Hugging Face Hub as {dataset_name}")
24
-
25
- if __name__ == "__main__":
26
- save_chromadb_to_hf()