broadfield-dev commited on
Commit
e342171
·
verified ·
1 Parent(s): ba14112

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +285 -1
app.py CHANGED
@@ -1,3 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  def get_hf_files(repo, name, path=""):
2
  api = HfApi()
3
  file_list = api.list_repo_files(repo_id=f'{repo}/{name}', repo_type="space")
@@ -46,4 +108,226 @@ def get_hf_files(repo, name, path=""):
46
  processed_files.append({"path": file_path})
47
 
48
  print(f"Processed files: {processed_files}")
49
- return processed_files
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, jsonify, send_file
2
+ from huggingface_hub import HfApi
3
+ import requests
4
+ import base64
5
+ import markdown
6
+ import json
7
+ import mimetypes
8
+ import os
9
+ import io
10
+ import uuid
11
+ from pathlib import Path
12
+
13
+ app = Flask(__name__)
14
+
15
+ GITHUB_API = "https://api.github.com/repos/"
16
+ #HF_API = "https://huggingface.co/api/spaces/"
17
+
18
+ def generate_file_tree(paths):
19
+ """Generate a simple file tree from a list of paths."""
20
+ print("generating file tree")
21
+ tree = ["📁 Root"]
22
+ sorted_paths = sorted(paths)
23
+ for path in sorted_paths:
24
+ parts = path.split('/')
25
+ indent = " " * (len(parts) - 1)
26
+ tree.append(f"{indent}📄 {parts[-1]}")
27
+ print("generating file tree - Complete")
28
+
29
+ return "\n".join(tree) + "\n\n"
30
+
31
+ def get_all_files(owner, repo, path="", is_hf=False):
32
+ """Recursively fetch all files from a repository."""
33
+ if is_hf:
34
+ # Attempt to fetch file list from Hugging Face Space (publicly accessible files)
35
+ api_url = f"https://huggingface.co/spaces/{owner}/{repo}/tree/main/{path}".rstrip('/')
36
+ else:
37
+ api_url = f"{GITHUB_API}{owner}/{repo}/contents/{path}".rstrip('/')
38
+
39
+ try:
40
+ response = requests.get(api_url)
41
+ print(response.content)
42
+ response = requests.get(api_url, headers={"Accept": "application/json"})
43
+ print(str(response.json()))
44
+ response.raise_for_status()
45
+ items = response.json()
46
+
47
+ # Hugging Face might not return JSON in the same format; adjust if HTML is returned
48
+ if isinstance(items, str): # If response isn’t JSON, it’s likely HTML
49
+ return None # Fallback to error handling
50
+
51
+ files = []
52
+ for item in items:
53
+ if item['type'] == 'file':
54
+ files.append(item)
55
+ elif item['type'] == 'dir':
56
+ files.extend(get_all_files(owner, repo, item['path'], is_hf))
57
+ print(files)
58
+ return files
59
+
60
+ except Exception as e:
61
+ return None
62
+
63
  def get_hf_files(repo, name, path=""):
64
  api = HfApi()
65
  file_list = api.list_repo_files(repo_id=f'{repo}/{name}', repo_type="space")
 
108
  processed_files.append({"path": file_path})
109
 
110
  print(f"Processed files: {processed_files}")
111
+ return processed_files
112
+
113
+ def get_repo_contents(url):
114
+ """Parse URL and fetch repository contents."""
115
+ try:
116
+ if "huggingface.co" in url:
117
+ parts = url.rstrip('/').split('/')
118
+ owner, repo = parts[-2], parts[-1]
119
+ # Fallback approach: manually fetch known files or use a simpler file list
120
+ # For now, assume a flat structure and fetch known files directly
121
+ # This is a workaround until a proper API token or endpoint is confirmed
122
+ known_files = [
123
+ {'path': 'app.py', 'type': 'file'},
124
+ {'path': 'README.md', 'type': 'file'}
125
+ # Add more known paths or implement HTML scraping if needed
126
+ ]
127
+ #files = get_all_files(owner, repo, "", True) or known_files
128
+ files = get_hf_files(owner, repo)
129
+ return owner, repo, files, True
130
+ else: # Assume GitHub URL
131
+ parts = url.rstrip('/').split('/')
132
+ owner, repo = parts[-2], parts[-1]
133
+ files = get_all_files(owner, repo, "", False)
134
+ if files is None:
135
+ raise Exception("Failed to fetch GitHub repository contents")
136
+ return owner, repo, files, False
137
+ except Exception as e:
138
+ return None, None, f"Error fetching repo contents: {str(e)}", False
139
+
140
+ def process_file_content(file_info, owner, repo, is_hf=False):
141
+ """Process individual file content from a repository."""
142
+ content = ""
143
+ file_path = file_info['path']
144
+
145
+ try:
146
+ if is_hf:
147
+ file_url = f"https://huggingface.co/spaces/{owner}/{repo}/raw/main/{file_path}"
148
+ else:
149
+ file_url = f"{GITHUB_API}{owner}/{repo}/contents/{file_path}"
150
+
151
+ response = requests.get(file_url)
152
+ response.raise_for_status()
153
+
154
+ if is_hf:
155
+ content_raw = response.content
156
+ size = len(content_raw)
157
+ file_extension = file_path.split('.')[-1] if '.' in file_path else ''
158
+ mime_type, _ = mimetypes.guess_type(file_path)
159
+ is_text = (mime_type and mime_type.startswith('text')) or file_extension in ['py', 'md', 'txt', 'js', 'html', 'css', 'json']
160
+
161
+ if is_text:
162
+ try:
163
+ text_content = content_raw.decode('utf-8')
164
+ if file_extension == 'json':
165
+ try:
166
+ json_data = json.loads(text_content)
167
+ formatted_json = json.dumps(json_data, indent=2)
168
+ content = f"### File: {file_path}\n```json\n{formatted_json}\n```\n\n"
169
+ except json.JSONDecodeError:
170
+ content = f"### File: {file_path}\n```json\n{text_content}\n```\n[Note: Invalid JSON format]\n\n"
171
+ else:
172
+ content = f"### File: {file_path}\n```{file_extension or 'text'}\n{text_content}\n```\n\n"
173
+ except UnicodeDecodeError:
174
+ content = f"### File: {file_path}\n[Binary file - {size} bytes]\n\n"
175
+ else:
176
+ content = f"### File: {file_path}\n[Binary file - {size} bytes]\n\n"
177
+ else: # GitHub
178
+ data = response.json()
179
+ if 'content' in data:
180
+ content_raw = base64.b64decode(data['content'])
181
+ size = data['size']
182
+ file_extension = file_path.split('.')[-1] if '.' in file_path else ''
183
+ mime_type, _ = mimetypes.guess_type(file_path)
184
+ is_text = (mime_type and mime_type.startswith('text')) or file_extension in ['py', 'md', 'txt', 'js', 'html', 'css', 'json']
185
+
186
+ if is_text:
187
+ try:
188
+ text_content = content_raw.decode('utf-8')
189
+ if file_extension == 'json':
190
+ try:
191
+ json_data = json.loads(text_content)
192
+ formatted_json = json.dumps(json_data, indent=2)
193
+ content = f"### File: {file_path}\n```json\n{formatted_json}\n```\n\n"
194
+ except json.JSONDecodeError:
195
+ content = f"### File: {file_path}\n```json\n{text_content}\n```\n[Note: Invalid JSON format]\n\n"
196
+ else:
197
+ content = f"### File: {file_path}\n```{file_extension or 'text'}\n{text_content}\n```\n\n"
198
+ except UnicodeDecodeError:
199
+ content = f"### File: {file_path}\n[Binary file - {size} bytes]\n\n"
200
+ else:
201
+ content = f"### File: {file_path}\n[Binary file - {size} bytes]\n\n"
202
+ else:
203
+ content = f"### File: {file_path}\n[No content available]\n\n"
204
+ except Exception as e:
205
+ content = f"### File: {file_path}\n[Error fetching file content: {str(e)}]\n\n"
206
+
207
+ return content
208
+
209
+ def process_uploaded_file(file):
210
+ """Process uploaded file content."""
211
+ content = ""
212
+ filename = file.filename
213
+ file_extension = filename.split('.')[-1] if '.' in filename else ''
214
+
215
+ try:
216
+ content_raw = file.read() # Read file content into memory
217
+ size = len(content_raw) # Compute size in bytes
218
+
219
+ mime_type, _ = mimetypes.guess_type(filename)
220
+ is_text = (mime_type and mime_type.startswith('text')) or file_extension in ['py', 'md', 'txt', 'js', 'html', 'css', 'json']
221
+
222
+ if is_text:
223
+ try:
224
+ text_content = content_raw.decode('utf-8')
225
+ if file_extension == 'json':
226
+ try:
227
+ json_data = json.loads(text_content)
228
+ formatted_json = json.dumps(json_data, indent=2)
229
+ content = f"### File: {filename}\n```json\n{formatted_json}\n```\n\n"
230
+ except json.JSONDecodeError:
231
+ content = f"### File: {filename}\n```json\n{text_content}\n```\n[Note: Invalid JSON format]\n\n"
232
+ else:
233
+ content = f"### File: {filename}\n```{file_extension or 'text'}\n{text_content}\n```\n\n"
234
+ except UnicodeDecodeError:
235
+ content = f"### File: {filename}\n[Binary file - {size} bytes]\n\n"
236
+ else:
237
+ content = f"### File: {filename}\n[Binary file - {size} bytes]\n\n"
238
+ except Exception as e:
239
+ content = f"### File: {filename}\n[Error processing file: {str(e)}]\n\n"
240
+
241
+ return content
242
+
243
+ def create_markdown_document(url=None, files=None):
244
+ """Create markdown document from repo contents or uploaded files."""
245
+ if url:
246
+ owner, repo, contents, is_hf = get_repo_contents(url)
247
+
248
+ if isinstance(contents, str): # Error case
249
+ return f"Error: {contents}"
250
+
251
+ markdown_content = f"# {'Space' if is_hf else 'Repository'}: {owner}/{repo}\n\n"
252
+ markdown_content += "## File Structure\n```\n"
253
+ markdown_content += generate_file_tree([item['path'] for item in contents])
254
+ markdown_content += "```\n\n"
255
+ markdown_content += f"Below are the contents of all files in the {'space' if is_hf else 'repository'}:\n\n"
256
+
257
+ for item in contents:
258
+ markdown_content += process_file_content(item, owner, repo, is_hf)
259
+ else: # Handle uploaded files
260
+ markdown_content = "# Uploaded Files\n\n"
261
+ markdown_content += "## File Structure\n```\n"
262
+ markdown_content += generate_file_tree([file.filename for file in files])
263
+ markdown_content += "```\n\n"
264
+ markdown_content += "Below are the contents of all uploaded files:\n\n"
265
+ for file in files:
266
+ markdown_content += process_uploaded_file(file)
267
+
268
+ return markdown_content
269
+
270
+ @app.route('/')
271
+ def index():
272
+ return render_template('index.html')
273
+
274
+ @app.route('/process', methods=['POST'])
275
+ def process():
276
+ # Ensure consistent response structure
277
+ response_data = {'markdown': '', 'html': '', 'filename': '', 'error': None}
278
+
279
+ if 'files[]' in request.files:
280
+ files = request.files.getlist('files[]')
281
+ if not files:
282
+ response_data['error'] = 'No files uploaded'
283
+ return jsonify(response_data), 400
284
+
285
+ markdown_content = create_markdown_document(files=files)
286
+ response_data['markdown'] = "```markdown\n" + markdown_content + "\n```"
287
+ response_data['html'] = markdown.markdown(markdown_content)
288
+ response_data['filename'] = "uploaded_files_summary.md"
289
+ else:
290
+ repo_url = request.json.get('repo_url')
291
+ if not repo_url:
292
+ response_data['error'] = 'Please provide a repository URL or upload files'
293
+ return jsonify(response_data), 400
294
+
295
+ markdown_content = create_markdown_document(repo_url)
296
+ owner, repo, contents, is_hf = get_repo_contents(repo_url)
297
+ if not owner:
298
+ response_data['error'] = markdown_content # Error message from get_repo_contents
299
+ return jsonify(response_data), 400
300
+
301
+ response_data['markdown'] = markdown_content
302
+ response_data['html'] = markdown.markdown(markdown_content)
303
+ response_data['filename'] = f"{owner}_{repo}_summary.md"
304
+
305
+ return jsonify(response_data)
306
+
307
+ @app.route('/download', methods=['POST'])
308
+ def download():
309
+ markdown_content = request.json.get('markdown')
310
+ filename = request.json.get('filename')
311
+
312
+ buffer = io.BytesIO()
313
+ buffer.write(markdown_content.encode('utf-8'))
314
+ buffer.seek(0)
315
+
316
+ return send_file(
317
+ buffer,
318
+ as_attachment=True,
319
+ download_name=filename,
320
+ mimetype='text/markdown'
321
+ )
322
+ with open("html_template.html", "r") as f:
323
+ html_template=f.read()
324
+ f.close()
325
+
326
+
327
+ if not os.path.exists('templates'):
328
+ os.makedirs('templates')
329
+ with open('templates/index.html', 'w') as f:
330
+ f.write(html_template)
331
+
332
+ if __name__ == '__main__':
333
+ app.run(host="0.0.0.0", port=7860, debug=True)