CloudVaultclass

Sleeping

App Files Files Community

lexlepty commited on Jan 3

Commit

533f9c6

verified ·

1 Parent(s): 3a72bb2

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -19

app.py CHANGED Viewed

@@ -124,35 +124,56 @@ def download_file(filepath):
     try:
         url = f"https://{Config.PROXY_DOMAIN}/datasets/{Config.HF_DATASET_ID}/resolve/{Config.HF_BRANCH}/{filepath}"
-        response = requests.get(
-            url,
             headers={'Authorization': f'Bearer {Config.HF_TOKEN}'},
-            stream=True
         )
-        if response.ok:
-            filename = os.path.basename(filepath)
-            encoded_filename = urllib.parse.quote(filename.encode('utf-8'))
-            # Explicit content type handling for TXT files
-            content_type = response.headers.get('content-type')
-            if filename.lower().endswith('.txt'):
-                content_type = 'text/plain; charset=utf-8'
-            return Response(
-                response.iter_content(chunk_size=1048576),
-                headers={
                     'Content-Disposition': f'attachment; filename*=UTF-8\'\'{encoded_filename}',
-                    'Content-Type': content_type or 'application/octet-stream',
-                    'Content-Length': response.headers.get('content-length'),
-                    'Accept-Ranges': 'bytes'
                 }
-            )
         return jsonify({'error': 'File not found'}), 404
     except Exception as e:
-        # Add logging for better error tracking
         print(f"Download error for {filepath}: {str(e)}")
         return jsonify({'error': str(e)}), 500

     try:
         url = f"https://{Config.PROXY_DOMAIN}/datasets/{Config.HF_DATASET_ID}/resolve/{Config.HF_BRANCH}/{filepath}"
+        # 先发送 HEAD 请求获取文件信息
+        head_response = requests.head(
+            url,
             headers={'Authorization': f'Bearer {Config.HF_TOKEN}'},
+            allow_redirects=True
         )
+        if head_response.ok:
+            # 获取文件基本信息
+            content_type = head_response.headers.get('content-type', 'application/octet-stream')
+            content_length = head_response.headers.get('content-length')
+            last_modified = head_response.headers.get('last-modified')
+            etag = head_response.headers.get('etag')
+            # 如果是txt文件但没有指定字符集,设置为text/plain
+            if filepath.lower().endswith('.txt') and 'charset' not in content_type:
+                content_type = 'text/plain'
+            # 获取文件内容
+            response = requests.get(
+                url,
+                headers={'Authorization': f'Bearer {Config.HF_TOKEN}'},
+                stream=True
+            )
+            if response.ok:
+                filename = os.path.basename(filepath)
+                encoded_filename = urllib.parse.quote(filename.encode('utf-8'))
+                headers = {
                     'Content-Disposition': f'attachment; filename*=UTF-8\'\'{encoded_filename}',
+                    'Content-Type': content_type,
+                    'Content-Length': content_length,
+                    'Accept-Ranges': 'bytes',
+                    'Cache-Control': 'no-cache',
+                    'Last-Modified': last_modified,
+                    'ETag': etag
                 }
+                # 移除为None的header
+                headers = {k: v for k, v in headers.items() if v is not None}
+                return Response(
+                    response.iter_content(chunk_size=1048576),
+                    headers=headers
+                )
         return jsonify({'error': 'File not found'}), 404
     except Exception as e:
         print(f"Download error for {filepath}: {str(e)}")
         return jsonify({'error': str(e)}), 500