Spaces:

350016z
/

TranslationError_Gradio

Sleeping

350016z commited on Jan 19

Commit

f60033e

verified ·

1 Parent(s): b5f571a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -22,9 +22,15 @@ def download_dataset_file(dataset_id, local_dir):
     dataset = load_dataset(dataset_id)
     cache_file_info = dataset.cache_files
     print(f"Cache File Info: {cache_file_info}\n")
-    source_file = "/home/user/.cache/huggingface/hub/datasets--350016z--flores_plus_Taiwanese/snapshots/9a8fe738903c9ab08620db4553c582376bcdd64f"
     contents = os.listdir(source_file)
     print("Contents of snapshot path:")
     print(contents)
@@ -33,20 +39,22 @@ def download_dataset_file(dataset_id, local_dir):
     for file_name in contents:
         print("Checking file: ", file_name)
         if file_name.endswith(".csv"):
-            print("Found CSV file: ", file_name)
             local_file_path = os.path.join(local_dir, file_name)
-            shutil.move(source_file, local_file_path)
     return local_file_path
 DATASET_ID = "350016z/flores_plus_Taiwanese"
 data_path = "test.csv"
 current_dir = os.getcwd()
 data_path = download_dataset_file(DATASET_ID, current_dir)
 print(f"Data path: {data_path}")
 csv_files = [f for f in os.listdir(current_dir) if f.endswith('.csv')]
 # data_path = "test.csv"
 # current_dir = os.path.dirname(os.path.abspath(data_path))

     dataset = load_dataset(dataset_id)
     cache_file_info = dataset.cache_files
     print(f"Cache File Info: {cache_file_info}\n")
+    filename = cache_file_info['test'][0]['filename']
+    snapshot_id = filename.split('/')[-2]
+    dataset_name = filename.split('/')[-5]
+    dataset_name = dataset_name.replace('___', '--')
+    base_path = os.path.join('/home/user/.cache/huggingface/hub', 'datasets--' + dataset_name)
+    snapshot_path = os.path.join(base_path, "snapshots", snapshot_id)
+    print(f"snapshot_path: {snapshot_path}")
     contents = os.listdir(source_file)
     print("Contents of snapshot path:")
     print(contents)
     for file_name in contents:
         print("Checking file: ", file_name)
         if file_name.endswith(".csv"):
+            source_file_path = os.path.join(snapshot_path, file_name)
             local_file_path = os.path.join(local_dir, file_name)
+            shutil.move(source_file_path, local_file_path)
     return local_file_path
 DATASET_ID = "350016z/flores_plus_Taiwanese"
 data_path = "test.csv"
 current_dir = os.getcwd()
 data_path = download_dataset_file(DATASET_ID, current_dir)
 print(f"Data path: {data_path}")
 csv_files = [f for f in os.listdir(current_dir) if f.endswith('.csv')]
+print(f"CSV Files: {csv_files}")
 # data_path = "test.csv"
 # current_dir = os.path.dirname(os.path.abspath(data_path))