Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -80,14 +80,24 @@ def get_hf_files(repo, name, path=""):
|
|
80 |
processed_files.extend(get_hf_files(repo, name, dir_part))
|
81 |
continue
|
82 |
|
83 |
-
# Fetch raw file content
|
84 |
raw_url = f"https://huggingface.co/spaces/{repo}/{name}/raw/main/{file_path}"
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
continue
|
92 |
|
93 |
# Process file
|
@@ -147,9 +157,11 @@ def process_file_content(file_info, owner, repo, is_hf=False):
|
|
147 |
response = requests.get(file_url, timeout=10)
|
148 |
response.raise_for_status()
|
149 |
|
150 |
-
# Ensure we get raw content, not HTML
|
151 |
if response.headers.get('Content-Type', '').startswith('text/html'):
|
152 |
raise Exception(f"Received HTML instead of raw content for {file_path}: {response.text[:100]}...")
|
|
|
|
|
153 |
|
154 |
content_raw = response.content
|
155 |
size = len(content_raw)
|
|
|
80 |
processed_files.extend(get_hf_files(repo, name, dir_part))
|
81 |
continue
|
82 |
|
83 |
+
# Fetch raw file content with authentication if needed (optional token)
|
84 |
raw_url = f"https://huggingface.co/spaces/{repo}/{name}/raw/main/{file_path}"
|
85 |
+
try:
|
86 |
+
response = requests.get(raw_url, timeout=10)
|
87 |
+
response.raise_for_status()
|
88 |
+
|
89 |
+
# Ensure we get raw content, not HTML
|
90 |
+
if response.headers.get('Content-Type', '').startswith('text/html'):
|
91 |
+
print(f"Warning: Received HTML instead of raw content for {file_path}: {response.text[:100]}...")
|
92 |
+
continue
|
93 |
+
|
94 |
+
# Check if the response is a valid file (non-HTML, non-JSON)
|
95 |
+
if not response.headers.get('Content-Type', '').startswith(('text/plain', 'application/octet-stream', 'text/')):
|
96 |
+
print(f"Unexpected content type for {file_path}: {response.headers.get('Content-Type', '')}")
|
97 |
+
continue
|
98 |
+
|
99 |
+
except requests.exceptions.RequestException as e:
|
100 |
+
print(f"Error downloading {file_path} from {raw_url}: {str(e)}")
|
101 |
continue
|
102 |
|
103 |
# Process file
|
|
|
157 |
response = requests.get(file_url, timeout=10)
|
158 |
response.raise_for_status()
|
159 |
|
160 |
+
# Ensure we get raw content, not HTML or JSON
|
161 |
if response.headers.get('Content-Type', '').startswith('text/html'):
|
162 |
raise Exception(f"Received HTML instead of raw content for {file_path}: {response.text[:100]}...")
|
163 |
+
if response.headers.get('Content-Type', '').startswith('application/json'):
|
164 |
+
raise Exception(f"Received JSON instead of raw content for {file_path}: {response.text[:100]}...")
|
165 |
|
166 |
content_raw = response.content
|
167 |
size = len(content_raw)
|