Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ import os
|
|
4 |
import time
|
5 |
import json
|
6 |
from pathlib import Path
|
7 |
-
from huggingface_hub import CommitScheduler
|
8 |
from uuid import uuid4
|
9 |
from datasets import load_dataset
|
10 |
import shutil
|
@@ -21,19 +21,20 @@ scheduler = CommitScheduler(
|
|
21 |
|
22 |
# Loading dataset from HuggingFace -------------------------------------------------------------------------------------
|
23 |
def download_dataset_file(dataset_id, local_dir):
|
24 |
-
dataset = load_dataset(dataset_id)
|
25 |
-
cache_file_info = dataset.cache_files
|
26 |
-
print(f"Cache File Info: {cache_file_info}\n")
|
27 |
-
|
28 |
-
filename = cache_file_info['test'][0]['filename']
|
29 |
-
snapshot_id = filename.split('/')[-2]
|
30 |
-
dataset_name = "350016z--Taiwanese_dataset" # change there
|
31 |
-
snapshot_path = os.path.join("/home/user/.cache/huggingface/hub", "datasets--"+dataset_name, "snapshots", snapshot_id)
|
|
|
|
|
32 |
contents = os.listdir(snapshot_path)
|
33 |
|
34 |
print("---------------------------------------")
|
35 |
print(contents)
|
36 |
-
print(os.listdir("/home/user/.cache/huggingface/hub/datasets--350016z--Taiwanese_dataset/snapshots/"))
|
37 |
print("---------------------------------------")
|
38 |
|
39 |
for file_name in contents:
|
|
|
4 |
import time
|
5 |
import json
|
6 |
from pathlib import Path
|
7 |
+
from huggingface_hub import CommitScheduler, snapshot_download
|
8 |
from uuid import uuid4
|
9 |
from datasets import load_dataset
|
10 |
import shutil
|
|
|
21 |
|
22 |
# Loading dataset from HuggingFace -------------------------------------------------------------------------------------
|
23 |
def download_dataset_file(dataset_id, local_dir):
|
24 |
+
# dataset = load_dataset(dataset_id)
|
25 |
+
# cache_file_info = dataset.cache_files
|
26 |
+
# print(f"Cache File Info: {cache_file_info}\n")
|
27 |
+
|
28 |
+
# filename = cache_file_info['test'][0]['filename']
|
29 |
+
# snapshot_id = filename.split('/')[-2]
|
30 |
+
# dataset_name = "350016z--Taiwanese_dataset" # change there
|
31 |
+
# snapshot_path = os.path.join("/home/user/.cache/huggingface/hub", "datasets--"+dataset_name, "snapshots", snapshot_id)
|
32 |
+
# contents = os.listdir(snapshot_path)
|
33 |
+
snapshot_path = snapshot_download(repo_id=dataset_id, repo_type="dataset")
|
34 |
contents = os.listdir(snapshot_path)
|
35 |
|
36 |
print("---------------------------------------")
|
37 |
print(contents)
|
|
|
38 |
print("---------------------------------------")
|
39 |
|
40 |
for file_name in contents:
|