Spaces:

broadfield-dev
/

repo_to_md

Running

App Files Files Community

broadfield-dev commited on Feb 26

Commit

0a9dfc8

verified ·

1 Parent(s): 398bf5b

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -8

app.py CHANGED Viewed

@@ -80,14 +80,24 @@ def get_hf_files(repo, name, path=""):
                     processed_files.extend(get_hf_files(repo, name, dir_part))
                     continue
-            # Fetch raw file content
             raw_url = f"https://huggingface.co/spaces/{repo}/{name}/raw/main/{file_path}"
-            response = requests.get(raw_url, timeout=10)
-            response.raise_for_status()
-            # Ensure we get raw content, not HTML
-            if response.headers.get('Content-Type', '').startswith('text/html'):
-                print(f"Received HTML instead of raw content for {file_path}: {response.text[:100]}...")
                 continue
             # Process file
@@ -147,9 +157,11 @@ def process_file_content(file_info, owner, repo, is_hf=False):
             response = requests.get(file_url, timeout=10)
             response.raise_for_status()
-            # Ensure we get raw content, not HTML
             if response.headers.get('Content-Type', '').startswith('text/html'):
                 raise Exception(f"Received HTML instead of raw content for {file_path}: {response.text[:100]}...")
             content_raw = response.content
             size = len(content_raw)

                     processed_files.extend(get_hf_files(repo, name, dir_part))
                     continue
+            # Fetch raw file content with authentication if needed (optional token)
             raw_url = f"https://huggingface.co/spaces/{repo}/{name}/raw/main/{file_path}"
+            try:
+                response = requests.get(raw_url, timeout=10)
+                response.raise_for_status()
+                # Ensure we get raw content, not HTML
+                if response.headers.get('Content-Type', '').startswith('text/html'):
+                    print(f"Warning: Received HTML instead of raw content for {file_path}: {response.text[:100]}...")
+                    continue
+                # Check if the response is a valid file (non-HTML, non-JSON)
+                if not response.headers.get('Content-Type', '').startswith(('text/plain', 'application/octet-stream', 'text/')):
+                    print(f"Unexpected content type for {file_path}: {response.headers.get('Content-Type', '')}")
+                    continue
+            except requests.exceptions.RequestException as e:
+                print(f"Error downloading {file_path} from {raw_url}: {str(e)}")
                 continue
             # Process file
             response = requests.get(file_url, timeout=10)
             response.raise_for_status()
+            # Ensure we get raw content, not HTML or JSON
             if response.headers.get('Content-Type', '').startswith('text/html'):
                 raise Exception(f"Received HTML instead of raw content for {file_path}: {response.text[:100]}...")
+            if response.headers.get('Content-Type', '').startswith('application/json'):
+                raise Exception(f"Received JSON instead of raw content for {file_path}: {response.text[:100]}...")
             content_raw = response.content
             size = len(content_raw)