Spaces:

350016z
/

TranslationError_Gradio

Sleeping

350016z commited on Feb 24

Commit

88ed347

verified ·

1 Parent(s): 847e429

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import os
 import time
 import json
 from pathlib import Path
-from huggingface_hub import CommitScheduler
 from uuid import uuid4
 from datasets import load_dataset
 import shutil
@@ -21,19 +21,20 @@ scheduler = CommitScheduler(
 # Loading dataset from HuggingFace -------------------------------------------------------------------------------------
 def download_dataset_file(dataset_id, local_dir):
-    dataset = load_dataset(dataset_id)
-    cache_file_info = dataset.cache_files
-    print(f"Cache File Info: {cache_file_info}\n")
-    filename = cache_file_info['test'][0]['filename']
-    snapshot_id = filename.split('/')[-2]
-    dataset_name = "350016z--Taiwanese_dataset" # change there
-    snapshot_path = os.path.join("/home/user/.cache/huggingface/hub", "datasets--"+dataset_name, "snapshots", snapshot_id)
     contents = os.listdir(snapshot_path)
     print("---------------------------------------")
     print(contents)
-    print(os.listdir("/home/user/.cache/huggingface/hub/datasets--350016z--Taiwanese_dataset/snapshots/"))
     print("---------------------------------------")
     for file_name in contents:

 import time
 import json
 from pathlib import Path
+from huggingface_hub import CommitScheduler, snapshot_download
 from uuid import uuid4
 from datasets import load_dataset
 import shutil
 # Loading dataset from HuggingFace -------------------------------------------------------------------------------------
 def download_dataset_file(dataset_id, local_dir):
+    # dataset = load_dataset(dataset_id)
+    # cache_file_info = dataset.cache_files
+    # print(f"Cache File Info: {cache_file_info}\n")
+    # filename = cache_file_info['test'][0]['filename']
+    # snapshot_id = filename.split('/')[-2]
+    # dataset_name = "350016z--Taiwanese_dataset" # change there
+    # snapshot_path = os.path.join("/home/user/.cache/huggingface/hub", "datasets--"+dataset_name, "snapshots", snapshot_id)
+    # contents = os.listdir(snapshot_path)
+    snapshot_path = snapshot_download(repo_id=dataset_id, repo_type="dataset")
     contents = os.listdir(snapshot_path)
     print("---------------------------------------")
     print(contents)
     print("---------------------------------------")
     for file_name in contents: