hxger commited on
Commit
9e598ae
·
verified ·
1 Parent(s): 437ef8f

Update sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +48 -58
sync_data.sh CHANGED
@@ -2,24 +2,12 @@
2
 
3
  # 检查环境变量
4
  if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
5
- echo "未启用备份功能 - 缺少HF_TOKEN或DATASET_ID环境变量"
6
  exit 0
7
  fi
8
 
9
- # 创建临时目录
10
- TEMP_DIR="/tmp/sillytavern_backup"
11
- mkdir -p $TEMP_DIR
12
-
13
- # 安装python和huggingface_hub
14
- if ! command -v python3 &> /dev/null; then
15
- echo "正在安装Python..."
16
- apk add --no-cache python3 py3-pip
17
- fi
18
-
19
- if ! python3 -c "import huggingface_hub" &> /dev/null; then
20
- echo "正在安装huggingface_hub..."
21
- pip3 install --no-cache-dir huggingface_hub
22
- fi
23
 
24
  # 上传备份
25
  upload_backup() {
@@ -33,31 +21,31 @@ from huggingface_hub import HfApi
33
  import sys
34
  import os
35
  def manage_backups(api, repo_id, max_files=10):
36
- files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
37
- backup_files = [f for f in files if f.startswith('sillytavern_backup_') and f.endswith('.tar.gz')]
38
  backup_files.sort()
39
 
40
  if len(backup_files) >= max_files:
41
  files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
42
  for file_to_delete in files_to_delete:
43
  try:
44
- api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type='dataset')
45
- print(f'已删除旧备份: {file_to_delete}')
46
  except Exception as e:
47
- print(f'删除 {file_to_delete} 时出错: {str(e)}')
48
- api = HfApi(token='$token')
49
  try:
50
  api.upload_file(
51
- path_or_fileobj='$file_path',
52
- path_in_repo='$file_name',
53
- repo_id='$repo_id',
54
- repo_type='dataset'
55
  )
56
- print(f'成功上传 $file_name')
57
 
58
- manage_backups(api, '$repo_id')
59
  except Exception as e:
60
- print(f'上传文件时出错: {str(e)}')
61
  "
62
  }
63
 
@@ -65,10 +53,6 @@ except Exception as e:
65
  download_latest_backup() {
66
  token="$HF_TOKEN"
67
  repo_id="$DATASET_ID"
68
- data_dir="/home/node/app/data"
69
-
70
- # 确保数据目录存在
71
- mkdir -p $data_dir
72
 
73
  python3 -c "
74
  from huggingface_hub import HfApi
@@ -76,65 +60,71 @@ import sys
76
  import os
77
  import tarfile
78
  import tempfile
79
- api = HfApi(token='$token')
80
  try:
81
- files = api.list_repo_files(repo_id='$repo_id', repo_type='dataset')
82
- backup_files = [f for f in files if f.startswith('sillytavern_backup_') and f.endswith('.tar.gz')]
83
 
84
  if not backup_files:
85
- print('未找到备份文件')
86
  sys.exit()
87
 
88
  latest_backup = sorted(backup_files)[-1]
89
 
90
  with tempfile.TemporaryDirectory() as temp_dir:
91
  filepath = api.hf_hub_download(
92
- repo_id='$repo_id',
93
  filename=latest_backup,
94
- repo_type='dataset',
95
  local_dir=temp_dir
96
  )
97
 
98
  if filepath and os.path.exists(filepath):
99
- with tarfile.open(filepath, 'r:gz') as tar:
100
- tar.extractall('$data_dir')
101
- print(f'成功从 {latest_backup} 恢复备份')
102
 
103
  except Exception as e:
104
- print(f'下载备份时出错: {str(e)}')
105
  "
106
  }
107
 
108
- # 首次启动时下载最新备份
109
- echo "正在从HuggingFace下载最新备份..."
110
- download_latest_backup
111
-
112
- # 同步函数
113
  sync_data() {
114
  while true; do
115
- echo "开始同步进程,时间: $(date)"
116
 
117
- data_dir="/home/node/app/data"
118
- if [ -d "$data_dir" ]; then
119
  timestamp=$(date +%Y%m%d_%H%M%S)
120
- backup_file="sillytavern_backup_${timestamp}.tar.gz"
121
 
122
  # 压缩数据目录
123
- tar -czf "${TEMP_DIR}/${backup_file}" -C "$data_dir" .
124
 
125
  echo "正在上传备份到HuggingFace..."
126
- upload_backup "${TEMP_DIR}/${backup_file}" "${backup_file}"
127
 
128
- rm -f "${TEMP_DIR}/${backup_file}"
129
  else
130
- echo "数据目录尚不存在,等待下次同步..."
131
  fi
132
 
133
- SYNC_INTERVAL=${SYNC_INTERVAL:-3600}
134
  echo "下次同步将在 ${SYNC_INTERVAL} 秒后进行..."
135
  sleep $SYNC_INTERVAL
136
  done
137
  }
138
 
139
- # 启动同步进程
140
- sync_data
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  # 检查环境变量
4
  if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
5
+ echo "缺少HF_TOKEN或DATASET_ID环境变量,无法启用备份功能"
6
  exit 0
7
  fi
8
 
9
+ # 激活虚拟环境
10
+ source /opt/venv/bin/activate
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  # 上传备份
13
  upload_backup() {
 
21
  import sys
22
  import os
23
  def manage_backups(api, repo_id, max_files=10):
24
+ files = api.list_repo_files(repo_id=repo_id, repo_type=\"dataset\")
25
+ backup_files = [f for f in files if f.startswith(\"linkwarden_backup_\") and f.endswith(\".tar.gz\")]
26
  backup_files.sort()
27
 
28
  if len(backup_files) >= max_files:
29
  files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
30
  for file_to_delete in files_to_delete:
31
  try:
32
+ api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type=\"dataset\")
33
+ print(f\"已删除旧备份: {file_to_delete}\")
34
  except Exception as e:
35
+ print(f\"删除 {file_to_delete} 时出错: {str(e)}\")
36
+ api = HfApi(token=\"$token\")
37
  try:
38
  api.upload_file(
39
+ path_or_fileobj=\"$file_path\",
40
+ path_in_repo=\"$file_name\",
41
+ repo_id=\"$repo_id\",
42
+ repo_type=\"dataset\"
43
  )
44
+ print(f\"成功上传 $file_name\")
45
 
46
+ manage_backups(api, \"$repo_id\")
47
  except Exception as e:
48
+ print(f\"上传文件时出错: {str(e)}\")
49
  "
50
  }
51
 
 
53
  download_latest_backup() {
54
  token="$HF_TOKEN"
55
  repo_id="$DATASET_ID"
 
 
 
 
56
 
57
  python3 -c "
58
  from huggingface_hub import HfApi
 
60
  import os
61
  import tarfile
62
  import tempfile
63
+ api = HfApi(token=\"$token\")
64
  try:
65
+ files = api.list_repo_files(repo_id=\"$repo_id\", repo_type=\"dataset\")
66
+ backup_files = [f for f in files if f.startswith(\"linkwarden_backup_\") and f.endswith(\".tar.gz\")]
67
 
68
  if not backup_files:
69
+ print(\"未找到备份文件\")
70
  sys.exit()
71
 
72
  latest_backup = sorted(backup_files)[-1]
73
 
74
  with tempfile.TemporaryDirectory() as temp_dir:
75
  filepath = api.hf_hub_download(
76
+ repo_id=\"$repo_id\",
77
  filename=latest_backup,
78
+ repo_type=\"dataset\",
79
  local_dir=temp_dir
80
  )
81
 
82
  if filepath and os.path.exists(filepath):
83
+ with tarfile.open(filepath, \"r:gz\") as tar:
84
+ tar.extractall(\"/app/data\")
85
+ print(f\"成功从 {latest_backup} 恢复备份\")
86
 
87
  except Exception as e:
88
+ print(f\"下载备份时出错: {str(e)}\")
89
  "
90
  }
91
 
92
+ # 同步数据
 
 
 
 
93
  sync_data() {
94
  while true; do
95
+ echo "开始同步过程,时间: $(date)"
96
 
97
+ if [ -d /app/data ]; then
 
98
  timestamp=$(date +%Y%m%d_%H%M%S)
99
+ backup_file="linkwarden_backup_${timestamp}.tar.gz"
100
 
101
  # 压缩数据目录
102
+ tar -czf "/tmp/${backup_file}" -C /app/data .
103
 
104
  echo "正在上传备份到HuggingFace..."
105
+ upload_backup "/tmp/${backup_file}" "${backup_file}"
106
 
107
+ rm -f "/tmp/${backup_file}"
108
  else
109
+ echo "数据目录不存在,等待下次同步..."
110
  fi
111
 
112
+ SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
113
  echo "下次同步将在 ${SYNC_INTERVAL} 秒后进行..."
114
  sleep $SYNC_INTERVAL
115
  done
116
  }
117
 
118
+ # 根据命令行参数执行不同操作
119
+ case "$1" in
120
+ download)
121
+ download_latest_backup
122
+ ;;
123
+ sync)
124
+ sync_data
125
+ ;;
126
+ *)
127
+ echo "用法: $0 {download|sync}"
128
+ exit 1
129
+ ;;
130
+ esac