lexlepty commited on
Commit
533f9c6
·
verified ·
1 Parent(s): 3a72bb2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -19
app.py CHANGED
@@ -124,35 +124,56 @@ def download_file(filepath):
124
  try:
125
  url = f"https://{Config.PROXY_DOMAIN}/datasets/{Config.HF_DATASET_ID}/resolve/{Config.HF_BRANCH}/{filepath}"
126
 
127
- response = requests.get(
128
- url,
 
129
  headers={'Authorization': f'Bearer {Config.HF_TOKEN}'},
130
- stream=True
131
  )
132
 
133
- if response.ok:
134
- filename = os.path.basename(filepath)
135
- encoded_filename = urllib.parse.quote(filename.encode('utf-8'))
 
 
 
136
 
137
- # Explicit content type handling for TXT files
138
- content_type = response.headers.get('content-type')
139
- if filename.lower().endswith('.txt'):
140
- content_type = 'text/plain; charset=utf-8'
 
 
 
 
 
 
141
 
142
- return Response(
143
- response.iter_content(chunk_size=1048576),
144
- headers={
 
 
145
  'Content-Disposition': f'attachment; filename*=UTF-8\'\'{encoded_filename}',
146
- 'Content-Type': content_type or 'application/octet-stream',
147
- 'Content-Length': response.headers.get('content-length'),
148
- 'Accept-Ranges': 'bytes'
 
 
 
149
  }
150
- )
151
-
 
 
 
 
 
 
 
152
  return jsonify({'error': 'File not found'}), 404
153
 
154
  except Exception as e:
155
- # Add logging for better error tracking
156
  print(f"Download error for {filepath}: {str(e)}")
157
  return jsonify({'error': str(e)}), 500
158
 
 
124
  try:
125
  url = f"https://{Config.PROXY_DOMAIN}/datasets/{Config.HF_DATASET_ID}/resolve/{Config.HF_BRANCH}/{filepath}"
126
 
127
+ # 先发送 HEAD 请求获取文件信息
128
+ head_response = requests.head(
129
+ url,
130
  headers={'Authorization': f'Bearer {Config.HF_TOKEN}'},
131
+ allow_redirects=True
132
  )
133
 
134
+ if head_response.ok:
135
+ # 获取文件基本信息
136
+ content_type = head_response.headers.get('content-type', 'application/octet-stream')
137
+ content_length = head_response.headers.get('content-length')
138
+ last_modified = head_response.headers.get('last-modified')
139
+ etag = head_response.headers.get('etag')
140
 
141
+ # 如果是txt文件但没有指定字符集,设置为text/plain
142
+ if filepath.lower().endswith('.txt') and 'charset' not in content_type:
143
+ content_type = 'text/plain'
144
+
145
+ # 获取文件内容
146
+ response = requests.get(
147
+ url,
148
+ headers={'Authorization': f'Bearer {Config.HF_TOKEN}'},
149
+ stream=True
150
+ )
151
 
152
+ if response.ok:
153
+ filename = os.path.basename(filepath)
154
+ encoded_filename = urllib.parse.quote(filename.encode('utf-8'))
155
+
156
+ headers = {
157
  'Content-Disposition': f'attachment; filename*=UTF-8\'\'{encoded_filename}',
158
+ 'Content-Type': content_type,
159
+ 'Content-Length': content_length,
160
+ 'Accept-Ranges': 'bytes',
161
+ 'Cache-Control': 'no-cache',
162
+ 'Last-Modified': last_modified,
163
+ 'ETag': etag
164
  }
165
+
166
+ # 移除为None的header
167
+ headers = {k: v for k, v in headers.items() if v is not None}
168
+
169
+ return Response(
170
+ response.iter_content(chunk_size=1048576),
171
+ headers=headers
172
+ )
173
+
174
  return jsonify({'error': 'File not found'}), 404
175
 
176
  except Exception as e:
 
177
  print(f"Download error for {filepath}: {str(e)}")
178
  return jsonify({'error': str(e)}), 500
179