hxger commited on
Commit
d74c57a
·
verified ·
1 Parent(s): 9e598ae

Update sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +58 -57
sync_data.sh CHANGED
@@ -2,129 +2,130 @@
2
 
3
  # 检查环境变量
4
  if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
5
- echo "缺少HF_TOKEN或DATASET_ID环境变量,无法启用备份功能"
6
  exit 0
7
  fi
8
 
9
- # 激活虚拟环境
10
- source /opt/venv/bin/activate
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  # 上传备份
13
  upload_backup() {
14
- file_path="$1"
15
- file_name="$2"
16
- token="$HF_TOKEN"
17
- repo_id="$DATASET_ID"
18
-
19
- python3 -c "
20
- from huggingface_hub import HfApi
21
  import sys
22
  import os
23
  def manage_backups(api, repo_id, max_files=10):
24
- files = api.list_repo_files(repo_id=repo_id, repo_type=\"dataset\")
25
- backup_files = [f for f in files if f.startswith(\"linkwarden_backup_\") and f.endswith(\".tar.gz\")]
26
  backup_files.sort()
27
 
28
  if len(backup_files) >= max_files:
29
  files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
30
  for file_to_delete in files_to_delete:
31
  try:
32
- api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type=\"dataset\")
33
- print(f\"已删除旧备份: {file_to_delete}\")
34
  except Exception as e:
35
- print(f\"删除 {file_to_delete} 时出错: {str(e)}\")
36
- api = HfApi(token=\"$token\")
37
  try:
38
  api.upload_file(
39
- path_or_fileobj=\"$file_path\",
40
- path_in_repo=\"$file_name\",
41
- repo_id=\"$repo_id\",
42
- repo_type=\"dataset\"
43
  )
44
- print(f\"成功上传 $file_name\")
45
 
46
- manage_backups(api, \"$repo_id\")
47
  except Exception as e:
48
- print(f\"上传文件时出错: {str(e)}\")
49
  "
50
  }
51
 
52
- # 下载最新备份
53
  download_latest_backup() {
54
  token="$HF_TOKEN"
55
  repo_id="$DATASET_ID"
 
 
 
 
56
 
57
  python3 -c "
58
  from huggingface_hub import HfApi
59
- import sys
60
  import os
61
  import tarfile
62
  import tempfile
63
- api = HfApi(token=\"$token\")
64
  try:
65
- files = api.list_repo_files(repo_id=\"$repo_id\", repo_type=\"dataset\")
66
- backup_files = [f for f in files if f.startswith(\"linkwarden_backup_\") and f.endswith(\".tar.gz\")]
67
 
68
  if not backup_files:
69
- print(\"未找到备份文件\")
70
  sys.exit()
71
 
72
  latest_backup = sorted(backup_files)[-1]
73
 
74
  with tempfile.TemporaryDirectory() as temp_dir:
75
  filepath = api.hf_hub_download(
76
- repo_id=\"$repo_id\",
77
  filename=latest_backup,
78
- repo_type=\"dataset\",
79
  local_dir=temp_dir
80
  )
81
 
82
  if filepath and os.path.exists(filepath):
83
- with tarfile.open(filepath, \"r:gz\") as tar:
84
- tar.extractall(\"/app/data\")
85
- print(f\"成功从 {latest_backup} 恢复备份\")
86
 
87
  except Exception as e:
88
- print(f\"下载备份时出错: {str(e)}\")
89
  "
90
  }
91
 
92
- # 同步数据
 
 
 
 
93
  sync_data() {
94
  while true; do
95
- echo "开始同步过程,时间: $(date)"
96
 
97
- if [ -d /app/data ]; then
 
98
  timestamp=$(date +%Y%m%d_%H%M%S)
99
- backup_file="linkwarden_backup_${timestamp}.tar.gz"
100
 
101
  # 压缩数���目录
102
- tar -czf "/tmp/${backup_file}" -C /app/data .
103
 
104
  echo "正在上传备份到HuggingFace..."
105
- upload_backup "/tmp/${backup_file}" "${backup_file}"
106
 
107
- rm -f "/tmp/${backup_file}"
108
  else
109
- echo "数据目录不存在,等待下次同步..."
110
  fi
111
 
112
- SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
113
  echo "下次同步将在 ${SYNC_INTERVAL} 秒后进行..."
114
  sleep $SYNC_INTERVAL
115
  done
116
  }
117
 
118
- # 根据命令行参数执行不同操作
119
- case "$1" in
120
- download)
121
- download_latest_backup
122
- ;;
123
- sync)
124
- sync_data
125
- ;;
126
- *)
127
- echo "用法: $0 {download|sync}"
128
- exit 1
129
- ;;
130
- esac
 
2
 
3
  # 检查环境变量
4
  if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
5
+ echo "未启用备份功能 - 缺少HF_TOKEN或DATASET_ID环境变量"
6
  exit 0
7
  fi
8
 
9
+ # 创建临时目录
10
+ TEMP_DIR="/tmp/sillytavern_backup"
11
+ mkdir -p $TEMP_DIR
12
+
13
+ # 安装python和huggingface_hub
14
+ if ! command -v python3 &> /dev/null; then
15
+ echo "正在安装Python..."
16
+ apk add --no-cache python3 py3-pip
17
+ fi
18
+
19
+ if ! python3 -c "import huggingface_hub" &> /dev/null; then
20
+ echo "正在安装huggingface_hub..."
21
+ pip3 install --no-cache-dir huggingface_hub
22
+ fi
23
 
24
  # 上传备份
25
  upload_backup() {
 
 
 
 
 
 
 
26
  import sys
27
  import os
28
  def manage_backups(api, repo_id, max_files=10):
29
+ files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
30
+ backup_files = [f for f in files if f.startswith('sillytavern_backup_') and f.endswith('.tar.gz')]
31
  backup_files.sort()
32
 
33
  if len(backup_files) >= max_files:
34
  files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
35
  for file_to_delete in files_to_delete:
36
  try:
37
+ api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type='dataset')
38
+ print(f'已删除旧备份: {file_to_delete}')
39
  except Exception as e:
40
+ print(f'删除 {file_to_delete} 时出错: {str(e)}')
41
+ api = HfApi(token='$token')
42
  try:
43
  api.upload_file(
44
+ path_or_fileobj='$file_path',
45
+ path_in_repo='$file_name',
46
+ repo_id='$repo_id',
47
+ repo_type='dataset'
48
  )
49
+ print(f'成功上传 $file_name')
50
 
51
+ manage_backups(api, '$repo_id')
52
  except Exception as e:
53
+ print(f'上传文件时出错: {str(e)}')
54
  "
55
  }
56
 
 
57
  download_latest_backup() {
58
  token="$HF_TOKEN"
59
  repo_id="$DATASET_ID"
60
+ data_dir="/home/node/app/data"
61
+
62
+ # 确保数据目录存在
63
+ mkdir -p $data_dir
64
 
65
  python3 -c "
66
  from huggingface_hub import HfApi
 
67
  import os
68
  import tarfile
69
  import tempfile
70
+ api = HfApi(token='$token')
71
  try:
72
+ files = api.list_repo_files(repo_id='$repo_id', repo_type='dataset')
73
+ backup_files = [f for f in files if f.startswith('sillytavern_backup_') and f.endswith('.tar.gz')]
74
 
75
  if not backup_files:
76
+ print('未找到备份文件')
77
  sys.exit()
78
 
79
  latest_backup = sorted(backup_files)[-1]
80
 
81
  with tempfile.TemporaryDirectory() as temp_dir:
82
  filepath = api.hf_hub_download(
83
+ repo_id='$repo_id',
84
  filename=latest_backup,
85
+ repo_type='dataset',
86
  local_dir=temp_dir
87
  )
88
 
89
  if filepath and os.path.exists(filepath):
90
+ with tarfile.open(filepath, 'r:gz') as tar:
91
+ tar.extractall('$data_dir')
92
+ print(f'成功从 {latest_backup} 恢复备份')
93
 
94
  except Exception as e:
95
+ print(f'下载备份时出错: {str(e)}')
96
  "
97
  }
98
 
99
+ # 首次启动时下载最新备份
100
+ echo "正在从HuggingFace下载最新备份..."
101
+ download_latest_backup
102
+
103
+ # 同步函数
104
  sync_data() {
105
  while true; do
106
+ echo "开始同步进程,时间: $(date)"
107
 
108
+ data_dir="/home/node/app/data"
109
+ if [ -d "$data_dir" ]; then
110
  timestamp=$(date +%Y%m%d_%H%M%S)
111
+ backup_file="sillytavern_backup_${timestamp}.tar.gz"
112
 
113
  # 压缩数���目录
114
+ tar -czf "${TEMP_DIR}/${backup_file}" -C "$data_dir" .
115
 
116
  echo "正在上传备份到HuggingFace..."
117
+ upload_backup "${TEMP_DIR}/${backup_file}" "${backup_file}"
118
 
119
+ rm -f "${TEMP_DIR}/${backup_file}"
120
  else
121
+ echo "数据目录尚不存在,等待下次同步..."
122
  fi
123
 
124
+ SYNC_INTERVAL=${SYNC_INTERVAL:-3600}
125
  echo "下次同步将在 ${SYNC_INTERVAL} 秒后进行..."
126
  sleep $SYNC_INTERVAL
127
  done
128
  }
129
 
130
+ # 启动同步进程
131
+ sync_data