Spaces:

broadfield-dev
/

parse_py

Sleeping

parse_py / save_to_hf.py

Create save_to_hf.py

90e461b verified 4 months ago

847 Bytes

	# save_to_hf.py
	from datasets import Dataset
	import chromadb
	from database import init_chromadb, create_collection

	def save_chromadb_to_hf(dataset_name="python_program_vectors"):
	client = init_chromadb()
	collection = create_collection(client)

	# Fetch all data from ChromaDB
	results = collection.get(include=["documents", "metadatas", "embeddings"])
	data = {
	"code": results["documents"],
	"sequence": [meta["sequence"] for meta in results["metadatas"]],
	"vectors": results["embeddings"]
	}

	# Create a Hugging Face Dataset
	dataset = Dataset.from_dict(data)

	# Push to Hugging Face Hub
	dataset.push_to_hub(dataset_name, token="YOUR_HUGGINGFACE_TOKEN")
	print(f"Dataset pushed to Hugging Face Hub as {dataset_name}")

	if __name__ == "__main__":
	save_chromadb_to_hf()