Spaces:
Running
Running
Delete save_to_hf.py
Browse files- save_to_hf.py +0 -26
save_to_hf.py
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
# save_to_hf.py
|
2 |
-
from datasets import Dataset
|
3 |
-
import chromadb
|
4 |
-
from database import init_chromadb, create_collection
|
5 |
-
|
6 |
-
def save_chromadb_to_hf(dataset_name="python_program_vectors"):
|
7 |
-
client = init_chromadb()
|
8 |
-
collection = create_collection(client)
|
9 |
-
|
10 |
-
# Fetch all data from ChromaDB
|
11 |
-
results = collection.get(include=["documents", "metadatas", "embeddings"])
|
12 |
-
data = {
|
13 |
-
"code": results["documents"],
|
14 |
-
"sequence": [meta["sequence"] for meta in results["metadatas"]],
|
15 |
-
"vectors": results["embeddings"]
|
16 |
-
}
|
17 |
-
|
18 |
-
# Create a Hugging Face Dataset
|
19 |
-
dataset = Dataset.from_dict(data)
|
20 |
-
|
21 |
-
# Push to Hugging Face Hub
|
22 |
-
dataset.push_to_hub(dataset_name, token="YOUR_HUGGINGFACE_TOKEN")
|
23 |
-
print(f"Dataset pushed to Hugging Face Hub as {dataset_name}")
|
24 |
-
|
25 |
-
if __name__ == "__main__":
|
26 |
-
save_chromadb_to_hf()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|