dragg2 commited on
Commit
150a1f4
·
verified ·
1 Parent(s): 657589a

Create sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +91 -0
sync_data.sh ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # 检查环境变量
4
+ if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
5
+ echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
6
+ exec python main.py
7
+ fi
8
+
9
+ # 登录HuggingFace (使用环境变量方式避免交互问题)
10
+ export HUGGING_FACE_HUB_TOKEN=$HF_TOKEN
11
+
12
+ # 同步函数
13
+ sync_data() {
14
+ while true; do
15
+ echo "Starting sync process at $(date)"
16
+
17
+ # 创建临时压缩文件
18
+ cd /app
19
+ timestamp=$(date +%Y%m%d_%H%M%S)
20
+ backup_file="backup_${timestamp}.tar.gz"
21
+
22
+ tar -czf "/tmp/${backup_file}" data/
23
+
24
+ # 使用Python脚本上传到HuggingFace
25
+ python3 -c "
26
+ try:
27
+ from huggingface_hub import HfApi
28
+ import os
29
+
30
+ api = HfApi()
31
+ api.upload_file(
32
+ path_or_fileobj='/tmp/${backup_file}',
33
+ path_in_repo='${backup_file}',
34
+ repo_id='${DATASET_ID}',
35
+ repo_type='dataset'
36
+ )
37
+ print('Backup completed successfully')
38
+ except Exception as e:
39
+ print(f'Backup failed: {str(e)}')
40
+ "
41
+ # 清理临时文件
42
+ rm -f "/tmp/${backup_file}"
43
+
44
+ # 设置同步间隔
45
+ SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
46
+ echo "Next sync in ${SYNC_INTERVAL} seconds..."
47
+ sleep $SYNC_INTERVAL
48
+ done
49
+ }
50
+
51
+ # 恢复函数
52
+ restore_latest() {
53
+ echo "Attempting to restore latest backup..."
54
+ python3 -c "
55
+ try:
56
+ from huggingface_hub import HfApi
57
+ import os
58
+
59
+ api = HfApi()
60
+ files = api.list_repo_files('${DATASET_ID}', repo_type='dataset')
61
+ backup_files = [f for f in files if f.startswith('backup_') and f.endswith('.tar.gz')]
62
+
63
+ if backup_files:
64
+ latest = sorted(backup_files)[-1]
65
+ api.hf_hub_download(
66
+ repo_id='${DATASET_ID}',
67
+ filename=latest,
68
+ repo_type='dataset',
69
+ local_dir='/tmp'
70
+ )
71
+ os.system(f'tar -xzf /tmp/{latest} -C /app')
72
+ os.remove(f'/tmp/{latest}')
73
+ print(f'Restored from {latest}')
74
+ else:
75
+ print('No backup found')
76
+ except Exception as e:
77
+ print(f'Restore failed: {str(e)}')
78
+ "
79
+ }
80
+
81
+ # 主程序
82
+ (
83
+ # 尝试恢复
84
+ restore_latest
85
+
86
+ # 启动同步进程
87
+ sync_data &
88
+
89
+ # 启动主应用
90
+ exec python main.py
91
+ ) 2>&1 | tee -a /app/data/backup.log