350016z commited on
Commit
f60033e
·
verified ·
1 Parent(s): b5f571a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -5
app.py CHANGED
@@ -22,9 +22,15 @@ def download_dataset_file(dataset_id, local_dir):
22
  dataset = load_dataset(dataset_id)
23
  cache_file_info = dataset.cache_files
24
  print(f"Cache File Info: {cache_file_info}\n")
25
-
26
 
27
- source_file = "/home/user/.cache/huggingface/hub/datasets--350016z--flores_plus_Taiwanese/snapshots/9a8fe738903c9ab08620db4553c582376bcdd64f"
 
 
 
 
 
 
 
28
  contents = os.listdir(source_file)
29
  print("Contents of snapshot path:")
30
  print(contents)
@@ -33,20 +39,22 @@ def download_dataset_file(dataset_id, local_dir):
33
  for file_name in contents:
34
  print("Checking file: ", file_name)
35
  if file_name.endswith(".csv"):
36
- print("Found CSV file: ", file_name)
37
  local_file_path = os.path.join(local_dir, file_name)
38
- shutil.move(source_file, local_file_path)
39
 
40
  return local_file_path
41
 
42
  DATASET_ID = "350016z/flores_plus_Taiwanese"
43
  data_path = "test.csv"
44
  current_dir = os.getcwd()
 
45
  data_path = download_dataset_file(DATASET_ID, current_dir)
46
  print(f"Data path: {data_path}")
47
 
48
-
49
  csv_files = [f for f in os.listdir(current_dir) if f.endswith('.csv')]
 
 
50
 
51
  # data_path = "test.csv"
52
  # current_dir = os.path.dirname(os.path.abspath(data_path))
 
22
  dataset = load_dataset(dataset_id)
23
  cache_file_info = dataset.cache_files
24
  print(f"Cache File Info: {cache_file_info}\n")
 
25
 
26
+ filename = cache_file_info['test'][0]['filename']
27
+ snapshot_id = filename.split('/')[-2]
28
+ dataset_name = filename.split('/')[-5]
29
+ dataset_name = dataset_name.replace('___', '--')
30
+ base_path = os.path.join('/home/user/.cache/huggingface/hub', 'datasets--' + dataset_name)
31
+ snapshot_path = os.path.join(base_path, "snapshots", snapshot_id)
32
+ print(f"snapshot_path: {snapshot_path}")
33
+
34
  contents = os.listdir(source_file)
35
  print("Contents of snapshot path:")
36
  print(contents)
 
39
  for file_name in contents:
40
  print("Checking file: ", file_name)
41
  if file_name.endswith(".csv"):
42
+ source_file_path = os.path.join(snapshot_path, file_name)
43
  local_file_path = os.path.join(local_dir, file_name)
44
+ shutil.move(source_file_path, local_file_path)
45
 
46
  return local_file_path
47
 
48
  DATASET_ID = "350016z/flores_plus_Taiwanese"
49
  data_path = "test.csv"
50
  current_dir = os.getcwd()
51
+
52
  data_path = download_dataset_file(DATASET_ID, current_dir)
53
  print(f"Data path: {data_path}")
54
 
 
55
  csv_files = [f for f in os.listdir(current_dir) if f.endswith('.csv')]
56
+ print(f"CSV Files: {csv_files}")
57
+
58
 
59
  # data_path = "test.csv"
60
  # current_dir = os.path.dirname(os.path.abspath(data_path))