broadfield-dev commited on
Commit
0a9dfc8
·
verified ·
1 Parent(s): 398bf5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -8
app.py CHANGED
@@ -80,14 +80,24 @@ def get_hf_files(repo, name, path=""):
80
  processed_files.extend(get_hf_files(repo, name, dir_part))
81
  continue
82
 
83
- # Fetch raw file content
84
  raw_url = f"https://huggingface.co/spaces/{repo}/{name}/raw/main/{file_path}"
85
- response = requests.get(raw_url, timeout=10)
86
- response.raise_for_status()
87
-
88
- # Ensure we get raw content, not HTML
89
- if response.headers.get('Content-Type', '').startswith('text/html'):
90
- print(f"Received HTML instead of raw content for {file_path}: {response.text[:100]}...")
 
 
 
 
 
 
 
 
 
 
91
  continue
92
 
93
  # Process file
@@ -147,9 +157,11 @@ def process_file_content(file_info, owner, repo, is_hf=False):
147
  response = requests.get(file_url, timeout=10)
148
  response.raise_for_status()
149
 
150
- # Ensure we get raw content, not HTML
151
  if response.headers.get('Content-Type', '').startswith('text/html'):
152
  raise Exception(f"Received HTML instead of raw content for {file_path}: {response.text[:100]}...")
 
 
153
 
154
  content_raw = response.content
155
  size = len(content_raw)
 
80
  processed_files.extend(get_hf_files(repo, name, dir_part))
81
  continue
82
 
83
+ # Fetch raw file content with authentication if needed (optional token)
84
  raw_url = f"https://huggingface.co/spaces/{repo}/{name}/raw/main/{file_path}"
85
+ try:
86
+ response = requests.get(raw_url, timeout=10)
87
+ response.raise_for_status()
88
+
89
+ # Ensure we get raw content, not HTML
90
+ if response.headers.get('Content-Type', '').startswith('text/html'):
91
+ print(f"Warning: Received HTML instead of raw content for {file_path}: {response.text[:100]}...")
92
+ continue
93
+
94
+ # Check if the response is a valid file (non-HTML, non-JSON)
95
+ if not response.headers.get('Content-Type', '').startswith(('text/plain', 'application/octet-stream', 'text/')):
96
+ print(f"Unexpected content type for {file_path}: {response.headers.get('Content-Type', '')}")
97
+ continue
98
+
99
+ except requests.exceptions.RequestException as e:
100
+ print(f"Error downloading {file_path} from {raw_url}: {str(e)}")
101
  continue
102
 
103
  # Process file
 
157
  response = requests.get(file_url, timeout=10)
158
  response.raise_for_status()
159
 
160
+ # Ensure we get raw content, not HTML or JSON
161
  if response.headers.get('Content-Type', '').startswith('text/html'):
162
  raise Exception(f"Received HTML instead of raw content for {file_path}: {response.text[:100]}...")
163
+ if response.headers.get('Content-Type', '').startswith('application/json'):
164
+ raise Exception(f"Received JSON instead of raw content for {file_path}: {response.text[:100]}...")
165
 
166
  content_raw = response.content
167
  size = len(content_raw)