broadfield-dev commited on
Commit
95c57d3
·
verified ·
1 Parent(s): e342171

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -58
app.py CHANGED
@@ -31,84 +31,92 @@ def generate_file_tree(paths):
31
  def get_all_files(owner, repo, path="", is_hf=False):
32
  """Recursively fetch all files from a repository."""
33
  if is_hf:
34
- # Attempt to fetch file list from Hugging Face Space (publicly accessible files)
35
- api_url = f"https://huggingface.co/spaces/{owner}/{repo}/tree/main/{path}".rstrip('/')
36
  else:
37
  api_url = f"{GITHUB_API}{owner}/{repo}/contents/{path}".rstrip('/')
38
 
39
  try:
40
- response = requests.get(api_url)
41
- print(response.content)
42
  response = requests.get(api_url, headers={"Accept": "application/json"})
43
- print(str(response.json()))
44
  response.raise_for_status()
45
- items = response.json()
46
 
47
- # Hugging Face might not return JSON in the same format; adjust if HTML is returned
48
- if isinstance(items, str): # If response isn’t JSON, it’s likely HTML
49
- return None # Fallback to error handling
 
 
 
 
50
 
51
  files = []
52
  for item in items:
53
- if item['type'] == 'file':
54
  files.append(item)
55
- elif item['type'] == 'dir':
56
- files.extend(get_all_files(owner, repo, item['path'], is_hf))
57
- print(files)
 
58
  return files
59
 
60
- except Exception as e:
 
61
  return None
62
 
 
63
  def get_hf_files(repo, name, path=""):
64
  api = HfApi()
65
- file_list = api.list_repo_files(repo_id=f'{repo}/{name}', repo_type="space")
66
- print(f"Files in {repo}/{name}: {file_list}")
67
- processed_files = []
68
-
69
- # Create base directory if it doesn't exist
70
- if not os.path.exists(name):
71
- os.makedirs(name)
72
-
73
- for file_path in file_list:
74
- # Handle nested directories
75
- if "/" in file_path:
76
- # Split into directory and remainder
77
- dir_part, file_part = file_path.split("/", 1)
78
- # Ensure directory exists
79
- dir_path = os.path.join(name, dir_part)
80
- if not os.path.exists(dir_path):
81
- os.makedirs(dir_path)
82
- # Recursively handle subdirectories if needed
83
- if "/" in file_part:
84
- processed_files.extend(get_hf_files(repo, name, dir_part))
85
- continue
86
-
87
- # Safely split filename into prefix and extension
88
- filename = os.path.basename(file_path)
89
- if "." in filename:
90
- pf, sf = filename.rsplit(".", 1) # Safely split on last period
91
- f_name = f"{pf}.{sf}"
92
- else:
93
- pf = filename
94
- sf = ""
95
- f_name = pf
96
 
97
- # Construct full local path
98
- local_path = os.path.join(name, file_path)
99
- # Ensure subdirectory exists for nested files
100
- os.makedirs(os.path.dirname(local_path), exist_ok=True)
101
 
102
- # Download file content
103
- r = requests.get(f'https://huggingface.co/spaces/{repo}/{name}/raw/main/{file_path}')
104
- print(f"Downloading: {file_path}")
105
- with open(local_path, 'wb') as file:
106
- file.write(r.content)
107
-
108
- processed_files.append({"path": file_path})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
- print(f"Processed files: {processed_files}")
111
- return processed_files
 
 
 
 
112
 
113
  def get_repo_contents(url):
114
  """Parse URL and fetch repository contents."""
 
31
  def get_all_files(owner, repo, path="", is_hf=False):
32
  """Recursively fetch all files from a repository."""
33
  if is_hf:
34
+ api_url = f"https://huggingface.co/api/spaces/{owner}/{repo}/tree/main/{path}".rstrip('/')
 
35
  else:
36
  api_url = f"{GITHUB_API}{owner}/{repo}/contents/{path}".rstrip('/')
37
 
38
  try:
 
 
39
  response = requests.get(api_url, headers={"Accept": "application/json"})
 
40
  response.raise_for_status()
 
41
 
42
+ # Check if the response is JSON
43
+ if response.headers.get('Content-Type', '').startswith('application/json'):
44
+ items = response.json()
45
+ else:
46
+ # If not JSON, it might be HTML (e.g., error page)
47
+ print(f"Received non-JSON response: {response.text[:100]}...")
48
+ return None
49
 
50
  files = []
51
  for item in items:
52
+ if isinstance(item, dict) and item.get('type') == 'file':
53
  files.append(item)
54
+ elif isinstance(item, dict) and item.get('type') == 'dir':
55
+ sub_files = get_all_files(owner, repo, item['path'], is_hf)
56
+ if sub_files:
57
+ files.extend(sub_files)
58
  return files
59
 
60
+ except requests.exceptions.RequestException as e:
61
+ print(f"Error fetching repository contents: {str(e)}")
62
  return None
63
 
64
+
65
  def get_hf_files(repo, name, path=""):
66
  api = HfApi()
67
+ try:
68
+ file_list = api.list_repo_files(repo_id=f'{repo}/{name}', repo_type="space")
69
+ print(f"Files in {repo}/{name}: {file_list}")
70
+ processed_files = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
+ if not os.path.exists(name):
73
+ os.makedirs(name)
 
 
74
 
75
+ for file_path in file_list:
76
+ # Handle nested directories
77
+ if "/" in file_path:
78
+ dir_part, file_part = file_path.split("/", 1)
79
+ dir_path = os.path.join(name, dir_part)
80
+ if not os.path.exists(dir_path):
81
+ os.makedirs(dir_path)
82
+ if "/" in file_part:
83
+ processed_files.extend(get_hf_files(repo, name, dir_part))
84
+ continue
85
+
86
+ # Fetch raw file content
87
+ raw_url = f"https://huggingface.co/spaces/{repo}/{name}/raw/main/{file_path}"
88
+ response = requests.get(raw_url, timeout=10)
89
+ response.raise_for_status()
90
+
91
+ # Ensure we get raw content, not HTML
92
+ if response.headers.get('Content-Type', '').startswith('text/html'):
93
+ print(f"Received HTML instead of raw content for {file_path}: {response.text[:100]}...")
94
+ continue
95
+
96
+ # Process file
97
+ filename = os.path.basename(file_path)
98
+ if "." in filename:
99
+ pf, sf = filename.rsplit(".", 1)
100
+ f_name = f"{pf}.{sf}"
101
+ else:
102
+ pf = filename
103
+ sf = ""
104
+ f_name = pf
105
+
106
+ local_path = os.path.join(name, file_path)
107
+ os.makedirs(os.path.dirname(local_path), exist_ok=True)
108
+
109
+ with open(local_path, 'wb') as file:
110
+ file.write(response.content)
111
+
112
+ processed_files.append({"path": file_path})
113
 
114
+ print(f"Processed files: {processed_files}")
115
+ return processed_files
116
+
117
+ except Exception as e:
118
+ print(f"Error processing Hugging Face files: {str(e)}")
119
+ return []
120
 
121
  def get_repo_contents(url):
122
  """Parse URL and fetch repository contents."""