File size: 847 Bytes
90e461b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# save_to_hf.py
from datasets import Dataset
import chromadb
from database import init_chromadb, create_collection

def save_chromadb_to_hf(dataset_name="python_program_vectors"):
    client = init_chromadb()
    collection = create_collection(client)
    
    # Fetch all data from ChromaDB
    results = collection.get(include=["documents", "metadatas", "embeddings"])
    data = {
        "code": results["documents"],
        "sequence": [meta["sequence"] for meta in results["metadatas"]],
        "vectors": results["embeddings"]
    }
    
    # Create a Hugging Face Dataset
    dataset = Dataset.from_dict(data)
    
    # Push to Hugging Face Hub
    dataset.push_to_hub(dataset_name, token="YOUR_HUGGINGFACE_TOKEN")
    print(f"Dataset pushed to Hugging Face Hub as {dataset_name}")

if __name__ == "__main__":
    save_chromadb_to_hf()