broadfield-dev commited on
Commit
d6d177a
·
verified ·
1 Parent(s): a8175e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -19
app.py CHANGED
@@ -25,15 +25,20 @@ def generate_file_tree(paths):
25
  def get_all_files(owner, repo, path="", is_hf=False):
26
  """Recursively fetch all files from a repository."""
27
  if is_hf:
28
- api_url = f"https://huggingface.co/api/spaces/{owner}/{repo}/tree/main/{path}".rstrip('/')
 
29
  else:
30
  api_url = f"{GITHUB_API}{owner}/{repo}/contents/{path}".rstrip('/')
31
 
32
  try:
33
- response = requests.get(api_url)
34
  response.raise_for_status()
35
  items = response.json()
36
 
 
 
 
 
37
  files = []
38
  for item in items:
39
  if item['type'] == 'file':
@@ -50,9 +55,15 @@ def get_repo_contents(url):
50
  if "huggingface.co" in url:
51
  parts = url.rstrip('/').split('/')
52
  owner, repo = parts[-2], parts[-1]
53
- files = get_all_files(owner, repo, "", True)
54
- if files is None:
55
- raise Exception("Failed to fetch Hugging Face Space contents")
 
 
 
 
 
 
56
  return owner, repo, files, True
57
  else: # Assume GitHub URL
58
  parts = url.rstrip('/').split('/')
@@ -71,7 +82,7 @@ def process_file_content(file_info, owner, repo, is_hf=False):
71
 
72
  try:
73
  if is_hf:
74
- file_url = f"https://huggingface.co/spaces/{owner}/{repo}/resolve/main/{file_path}"
75
  else:
76
  file_url = f"{GITHUB_API}{owner}/{repo}/contents/{file_path}"
77
 
@@ -200,31 +211,36 @@ def index():
200
 
201
  @app.route('/process', methods=['POST'])
202
  def process():
 
 
 
203
  if 'files[]' in request.files:
204
  files = request.files.getlist('files[]')
205
  if not files:
206
- return jsonify({'error': 'No files uploaded'}), 400
 
207
 
208
  markdown_content = create_markdown_document(files=files)
209
- html_content = markdown.markdown(markdown_content)
210
- filename = "uploaded_files_summary.md"
 
211
  else:
212
  repo_url = request.json.get('repo_url')
213
  if not repo_url:
214
- return jsonify({'error': 'Please provide a repository URL or upload files'}), 400
 
215
 
216
  markdown_content = create_markdown_document(repo_url)
217
- html_content = markdown.markdown(markdown_content)
218
- owner, repo, _, is_hf = get_repo_contents(repo_url)
219
  if not owner:
220
- return jsonify({'error': markdown_content}), 400
221
- filename = f"{owner}_{repo}_summary.md"
 
 
 
 
222
 
223
- return jsonify({
224
- 'markdown': markdown_content,
225
- 'html': html_content,
226
- 'filename': filename
227
- })
228
 
229
  @app.route('/download', methods=['POST'])
230
  def download():
 
25
  def get_all_files(owner, repo, path="", is_hf=False):
26
  """Recursively fetch all files from a repository."""
27
  if is_hf:
28
+ # Attempt to fetch file list from Hugging Face Space (publicly accessible files)
29
+ api_url = f"https://huggingface.co/spaces/{owner}/{repo}/tree/main/{path}".rstrip('/')
30
  else:
31
  api_url = f"{GITHUB_API}{owner}/{repo}/contents/{path}".rstrip('/')
32
 
33
  try:
34
+ response = requests.get(api_url, headers={"Accept": "application/json"})
35
  response.raise_for_status()
36
  items = response.json()
37
 
38
+ # Hugging Face might not return JSON in the same format; adjust if HTML is returned
39
+ if isinstance(items, str): # If response isn’t JSON, it’s likely HTML
40
+ return None # Fallback to error handling
41
+
42
  files = []
43
  for item in items:
44
  if item['type'] == 'file':
 
55
  if "huggingface.co" in url:
56
  parts = url.rstrip('/').split('/')
57
  owner, repo = parts[-2], parts[-1]
58
+ # Fallback approach: manually fetch known files or use a simpler file list
59
+ # For now, assume a flat structure and fetch known files directly
60
+ # This is a workaround until a proper API token or endpoint is confirmed
61
+ known_files = [
62
+ {'path': 'app.py', 'type': 'file'},
63
+ {'path': 'README.md', 'type': 'file'}
64
+ # Add more known paths or implement HTML scraping if needed
65
+ ]
66
+ files = get_all_files(owner, repo, "", True) or known_files
67
  return owner, repo, files, True
68
  else: # Assume GitHub URL
69
  parts = url.rstrip('/').split('/')
 
82
 
83
  try:
84
  if is_hf:
85
+ file_url = f"https://huggingface.co/spaces/{owner}/{repo}/raw/main/{file_path}"
86
  else:
87
  file_url = f"{GITHUB_API}{owner}/{repo}/contents/{file_path}"
88
 
 
211
 
212
  @app.route('/process', methods=['POST'])
213
  def process():
214
+ # Ensure consistent response structure
215
+ response_data = {'markdown': '', 'html': '', 'filename': '', 'error': None}
216
+
217
  if 'files[]' in request.files:
218
  files = request.files.getlist('files[]')
219
  if not files:
220
+ response_data['error'] = 'No files uploaded'
221
+ return jsonify(response_data), 400
222
 
223
  markdown_content = create_markdown_document(files=files)
224
+ response_data['markdown'] = markdown_content
225
+ response_data['html'] = markdown.markdown(markdown_content)
226
+ response_data['filename'] = "uploaded_files_summary.md"
227
  else:
228
  repo_url = request.json.get('repo_url')
229
  if not repo_url:
230
+ response_data['error'] = 'Please provide a repository URL or upload files'
231
+ return jsonify(response_data), 400
232
 
233
  markdown_content = create_markdown_document(repo_url)
234
+ owner, repo, contents, is_hf = get_repo_contents(repo_url)
 
235
  if not owner:
236
+ response_data['error'] = markdown_content # Error message from get_repo_contents
237
+ return jsonify(response_data), 400
238
+
239
+ response_data['markdown'] = markdown_content
240
+ response_data['html'] = markdown.markdown(markdown_content)
241
+ response_data['filename'] = f"{owner}_{repo}_summary.md"
242
 
243
+ return jsonify(response_data)
 
 
 
 
244
 
245
  @app.route('/download', methods=['POST'])
246
  def download():