Spaces:

broadfield-dev
/

repo_to_md

Running

App Files Files Community

broadfield-dev commited on Feb 26

Commit

95c57d3

verified ·

1 Parent(s): e342171

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -58

app.py CHANGED Viewed

@@ -31,84 +31,92 @@ def generate_file_tree(paths):
 def get_all_files(owner, repo, path="", is_hf=False):
     """Recursively fetch all files from a repository."""
     if is_hf:
-        # Attempt to fetch file list from Hugging Face Space (publicly accessible files)
-        api_url = f"https://huggingface.co/spaces/{owner}/{repo}/tree/main/{path}".rstrip('/')
     else:
         api_url = f"{GITHUB_API}{owner}/{repo}/contents/{path}".rstrip('/')
     try:
-        response = requests.get(api_url)
-        print(response.content)
         response = requests.get(api_url, headers={"Accept": "application/json"})
-        print(str(response.json()))
         response.raise_for_status()
-        items = response.json()
-        # Hugging Face might not return JSON in the same format; adjust if HTML is returned
-        if isinstance(items, str):  # If response isn’t JSON, it’s likely HTML
-            return None  # Fallback to error handling
         files = []
         for item in items:
-            if item['type'] == 'file':
                 files.append(item)
-            elif item['type'] == 'dir':
-                files.extend(get_all_files(owner, repo, item['path'], is_hf))
-        print(files)
         return files
-    except Exception as e:
         return None
 def get_hf_files(repo, name, path=""):
     api = HfApi()
-    file_list = api.list_repo_files(repo_id=f'{repo}/{name}', repo_type="space")
-    print(f"Files in {repo}/{name}: {file_list}")
-    processed_files = []
-    # Create base directory if it doesn't exist
-    if not os.path.exists(name):
-        os.makedirs(name)
-    for file_path in file_list:
-        # Handle nested directories
-        if "/" in file_path:
-            # Split into directory and remainder
-            dir_part, file_part = file_path.split("/", 1)
-            # Ensure directory exists
-            dir_path = os.path.join(name, dir_part)
-            if not os.path.exists(dir_path):
-                os.makedirs(dir_path)
-            # Recursively handle subdirectories if needed
-            if "/" in file_part:
-                processed_files.extend(get_hf_files(repo, name, dir_part))
-                continue
-        # Safely split filename into prefix and extension
-        filename = os.path.basename(file_path)
-        if "." in filename:
-            pf, sf = filename.rsplit(".", 1)  # Safely split on last period
-            f_name = f"{pf}.{sf}"
-        else:
-            pf = filename
-            sf = ""
-            f_name = pf
-        # Construct full local path
-        local_path = os.path.join(name, file_path)
-        # Ensure subdirectory exists for nested files
-        os.makedirs(os.path.dirname(local_path), exist_ok=True)
-        # Download file content
-        r = requests.get(f'https://huggingface.co/spaces/{repo}/{name}/raw/main/{file_path}')
-        print(f"Downloading: {file_path}")
-        with open(local_path, 'wb') as file:
-            file.write(r.content)
-        processed_files.append({"path": file_path})
-    print(f"Processed files: {processed_files}")
-    return processed_files
 def get_repo_contents(url):
     """Parse URL and fetch repository contents."""

 def get_all_files(owner, repo, path="", is_hf=False):
     """Recursively fetch all files from a repository."""
     if is_hf:
+        api_url = f"https://huggingface.co/api/spaces/{owner}/{repo}/tree/main/{path}".rstrip('/')
     else:
         api_url = f"{GITHUB_API}{owner}/{repo}/contents/{path}".rstrip('/')
     try:
         response = requests.get(api_url, headers={"Accept": "application/json"})
         response.raise_for_status()
+        # Check if the response is JSON
+        if response.headers.get('Content-Type', '').startswith('application/json'):
+            items = response.json()
+        else:
+            # If not JSON, it might be HTML (e.g., error page)
+            print(f"Received non-JSON response: {response.text[:100]}...")
+            return None
         files = []
         for item in items:
+            if isinstance(item, dict) and item.get('type') == 'file':
                 files.append(item)
+            elif isinstance(item, dict) and item.get('type') == 'dir':
+                sub_files = get_all_files(owner, repo, item['path'], is_hf)
+                if sub_files:
+                    files.extend(sub_files)
         return files
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching repository contents: {str(e)}")
         return None
 def get_hf_files(repo, name, path=""):
     api = HfApi()
+    try:
+        file_list = api.list_repo_files(repo_id=f'{repo}/{name}', repo_type="space")
+        print(f"Files in {repo}/{name}: {file_list}")
+        processed_files = []
+        if not os.path.exists(name):
+            os.makedirs(name)
+        for file_path in file_list:
+            # Handle nested directories
+            if "/" in file_path:
+                dir_part, file_part = file_path.split("/", 1)
+                dir_path = os.path.join(name, dir_part)
+                if not os.path.exists(dir_path):
+                    os.makedirs(dir_path)
+                if "/" in file_part:
+                    processed_files.extend(get_hf_files(repo, name, dir_part))
+                    continue
+            # Fetch raw file content
+            raw_url = f"https://huggingface.co/spaces/{repo}/{name}/raw/main/{file_path}"
+            response = requests.get(raw_url, timeout=10)
+            response.raise_for_status()
+            # Ensure we get raw content, not HTML
+            if response.headers.get('Content-Type', '').startswith('text/html'):
+                print(f"Received HTML instead of raw content for {file_path}: {response.text[:100]}...")
+                continue
+            # Process file
+            filename = os.path.basename(file_path)
+            if "." in filename:
+                pf, sf = filename.rsplit(".", 1)
+                f_name = f"{pf}.{sf}"
+            else:
+                pf = filename
+                sf = ""
+                f_name = pf
+            local_path = os.path.join(name, file_path)
+            os.makedirs(os.path.dirname(local_path), exist_ok=True)
+            with open(local_path, 'wb') as file:
+                file.write(response.content)
+            processed_files.append({"path": file_path})
+        print(f"Processed files: {processed_files}")
+        return processed_files
+    except Exception as e:
+        print(f"Error processing Hugging Face files: {str(e)}")
+        return []
 def get_repo_contents(url):
     """Parse URL and fetch repository contents."""