broadfield-dev commited on
Commit
23b82dc
·
verified ·
1 Parent(s): 6197a2f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -339
app.py CHANGED
@@ -1,360 +1,106 @@
1
  from flask import Flask, render_template, request, jsonify, send_file
2
- from huggingface_hub import HfApi
3
- import requests
4
- import base64
5
  import markdown
6
- import json
7
- import mimetypes
8
  import os
9
- import io
10
- from pathlib import Path
11
 
12
- app = Flask(__name__)
13
-
14
- GITHUB_API = "https://api.github.com/repos/"
15
-
16
- def generate_file_tree(paths):
17
- """Generate a simple file tree from a list of paths."""
18
- print("generating file tree")
19
- tree = ["📁 Root"]
20
- sorted_paths = sorted(paths)
21
- for path in sorted_paths:
22
- parts = path.split('/')
23
- indent = " " * (len(parts) - 1)
24
- tree.append(f"{indent}📄 {parts[-1]}")
25
- print("generating file tree - Complete")
26
-
27
- return "\n".join(tree) + "\n\n"
28
-
29
- def get_all_files(owner, repo, path="", is_hf=False):
30
- """Recursively fetch all files from a repository."""
31
- if is_hf:
32
- api_url = f"https://huggingface.co/api/spaces/{owner}/{repo}/tree/main/{path}".rstrip('/')
33
- else:
34
- api_url = f"{GITHUB_API}{owner}/{repo}/contents/{path}".rstrip('/')
35
-
36
  try:
37
- response = requests.get(api_url, headers={"Accept": "application/json"}, timeout=10)
38
- response.raise_for_status()
39
-
40
- # Check if the response is JSON
41
- if response.headers.get('Content-Type', '').startswith('application/json'):
42
- items = response.json()
 
 
 
 
 
 
 
 
 
43
  else:
44
- print(f"Received non-JSON response from {api_url}: {response.text[:100]}...")
45
- return None
46
-
47
- files = []
48
- for item in items:
49
- if isinstance(item, dict) and item.get('type') == 'file':
50
- files.append(item)
51
- elif isinstance(item, dict) and item.get('type') == 'dir':
52
- sub_files = get_all_files(owner, repo, item['path'], is_hf)
53
- if sub_files:
54
- files.extend(sub_files)
55
- return files
56
-
57
- except requests.exceptions.RequestException as e:
58
- print(f"Error fetching repository contents from {api_url}: {str(e)}")
59
- return None
60
-
61
- def get_hf_files(repo, name):
62
- """Fetch all files from a Hugging Face Space with robust error handling."""
63
- api = HfApi(token=os.getenv('HF_TOKEN'))
64
- try:
65
- # Use HfApi to list files, which is more reliable for Spaces
66
- file_list = api.list_repo_files(repo_id=f'{repo}/{name}', repo_type="space")
67
- print(f"Files in {repo}/{name}: {file_list}")
68
- processed_files = []
69
-
70
- if not os.path.exists(name):
71
- os.makedirs(name)
72
-
73
- for file_path in file_list:
74
- # Fetch raw file content with strict validation
75
- raw_url = f"https://huggingface.co/spaces/{repo}/{name}/raw/main/{file_path}"
76
- try:
77
- response = requests.get(raw_url, timeout=10)
78
- response.raise_for_status()
79
-
80
- # Ensure we get raw content, not HTML or JSON
81
- content_type = response.headers.get('Content-Type', '').lower()
82
- if content_type.startswith('text/html'):
83
- print(f"Warning: Received HTML instead of raw content for {file_path}: {response.text[:100]}...")
84
- continue
85
- if content_type.startswith('application/json'):
86
- print(f"Warning: Received JSON instead of raw content for {file_path}: {response.text[:100]}...")
87
- continue
88
-
89
- # Verify it's a valid file (e.g., text/plain or binary)
90
- if not content_type.startswith(('text/plain', 'application/octet-stream', 'text/')) and 'text/' not in content_type:
91
- print(f"Unexpected content type for {file_path}: {content_type}")
92
- continue
93
-
94
- except requests.exceptions.RequestException as e:
95
- print(f"Error downloading {file_path} from {raw_url}: {str(e)}")
96
- continue
97
-
98
- # Process file
99
- filename = os.path.basename(file_path)
100
- if "." in filename:
101
- pf, sf = filename.rsplit(".", 1)
102
- f_name = f"{pf}.{sf}"
103
  else:
104
- pf = filename
105
- sf = ""
106
- f_name = pf
107
-
108
- local_path = os.path.join(name, file_path)
109
- os.makedirs(os.path.dirname(local_path), exist_ok=True)
110
-
111
- with open(local_path, 'wb') as file:
112
- file.write(response.content)
113
-
114
- processed_files.append({"path": file_path})
115
 
116
- print(f"Processed files: {processed_files}")
117
- return processed_files
118
-
119
- except Exception as e:
120
- print(f"Error processing Hugging Face files for {repo}/{name}: {str(e)}")
121
- return []
122
 
123
- def get_repo_contents(url):
124
- """Parse URL and fetch repository contents with robust error handling."""
125
- try:
126
- if "huggingface.co" in url.lower():
127
- parts = url.rstrip('/').split('/')
128
- owner, repo = parts[-2], parts[-1]
129
- # Ensure the Space exists and is accessible
130
- try:
131
- api = HfApi()
132
- api.list_repo_files(repo_id=f'{owner}/{repo}', repo_type="space") # Pre-check
133
- except Exception as e:
134
- raise Exception("HfApi Error")
135
 
136
- files = get_hf_files(owner, repo)
137
- if not files: # Empty list is valid, but check for errors
138
- raise Exception("No files found in the Hugging Face Space")
139
- return owner, repo, files, True
140
- else: # Assume GitHub URL
141
- parts = url.rstrip('/').split('/')
142
- owner, repo = parts[-2], parts[-1]
143
- files = get_all_files(owner, repo, "", False)
144
- if files is None:
145
- raise Exception("Failed to fetch GitHub repository contents")
146
- return owner, repo, files, False
147
- except Exception as e:
148
- print(f"Error processing URL {url}: {str(e)}")
149
- return None, None, f"Error fetching repo contents: {str(e)}", False
150
 
151
- def process_file_content(file_info, owner, repo, is_hf=False):
152
- """Process individual file content from a repository."""
153
- content = ""
154
- file_path = file_info['path']
155
-
156
- try:
157
- if is_hf:
158
- file_url = f"https://huggingface.co/spaces/{owner}/{repo}/raw/main/{file_path}"
159
- response = requests.get(file_url, timeout=10)
160
- response.raise_for_status()
161
-
162
- # Ensure we get raw content, not HTML or JSON
163
- content_type = response.headers.get('Content-Type', '').lower()
164
- if content_type.startswith('text/html'):
165
- raise Exception(f"Received HTML instead of raw content for {file_path}: {response.text[:100]}...")
166
- if content_type.startswith('application/json'):
167
- raise Exception(f"Received JSON instead of raw content for {file_path}: {response.text[:100]}...")
168
-
169
- content_raw = response.content
170
- size = len(content_raw)
171
- file_extension = file_path.split('.')[-1] if '.' in file_path else ''
172
- mime_type, _ = mimetypes.guess_type(file_path)
173
- is_text = (mime_type and mime_type.startswith('text')) or file_extension in ['py', 'md', 'txt', 'js', 'html', 'css', 'json'] or "Dockerfile" in file_path
174
-
175
- if is_text:
176
- try:
177
- text_content = content_raw.decode('utf-8')
178
- if file_extension == 'json':
179
- try:
180
- json_data = json.loads(text_content)
181
- formatted_json = json.dumps(json_data, indent=2)
182
- content = f"### File: {file_path}\n```json\n{formatted_json}\n```\n\n"
183
- except json.JSONDecodeError:
184
- content = f"### File: {file_path}\n```json\n{text_content}\n```\n[Note: Invalid JSON format]\n\n"
185
- else:
186
- content = f"### File: {file_path}\n```{file_extension or 'text'}\n{text_content}\n```\n\n"
187
- except UnicodeDecodeError:
188
- content = f"### File: {file_path}\n[Binary file - {size} bytes]\n\n"
189
- else:
190
- content = f"### File: {file_path}\n[Binary file - {size} bytes]\n\n"
191
- else: # GitHub
192
- file_url = f"{GITHUB_API}{owner}/{repo}/contents/{file_path}"
193
- response = requests.get(file_url, headers={"Accept": "application/json"}, timeout=10)
194
- response.raise_for_status()
195
-
196
- # Ensure we get JSON, not HTML
197
- if response.headers.get('Content-Type', '').startswith('text/html'):
198
- raise Exception(f"Received HTML instead of JSON for {file_path}: {response.text[:100]}...")
199
-
200
- data = response.json()
201
- if 'content' in data:
202
- content_raw = base64.b64decode(data['content'])
203
- size = data['size']
204
- file_extension = file_path.split('.')[-1] if '.' in file_path else ''
205
- mime_type, _ = mimetypes.guess_type(file_path)
206
- is_text = (mime_type and mime_type.startswith('text')) or file_extension in ['py', 'md', 'txt', 'js', 'html', 'css', 'json']
207
 
208
- if is_text:
209
- try:
210
- text_content = content_raw.decode('utf-8')
211
- if file_extension == 'json':
212
- try:
213
- json_data = json.loads(text_content)
214
- formatted_json = json.dumps(json_data, indent=2)
215
- content = f"### File: {file_path}\n```json\n{formatted_json}\n```\n\n"
216
- except json.JSONDecodeError:
217
- content = f"### File: {file_path}\n```json\n{text_content}\n```\n[Note: Invalid JSON format]\n\n"
218
- else:
219
- content = f"### File: {file_path}\n```{file_extension or 'text'}\n{text_content}\n```\n\n"
220
- except UnicodeDecodeError:
221
- content = f"### File: {file_path}\n[Binary file - {size} bytes]\n\n"
222
- else:
223
- content = f"### File: {file_path}\n[Binary file - {size} bytes]\n\n"
224
  else:
225
- content = f"### File: {file_path}\n[No content available]\n\n"
226
- except Exception as e:
227
- content = f"### File: {file_path}\n[Error fetching file content: {str(e)}]\n\n"
228
-
229
- return content
230
-
231
- def process_uploaded_file(file):
232
- """Process uploaded file content."""
233
- content = ""
234
- filename = file.filename
235
- file_extension = filename.split('.')[-1] if '.' in filename else ''
236
-
237
- try:
238
- content_raw = file.read() # Read file content into memory
239
- size = len(content_raw) # Compute size in bytes
240
 
241
- mime_type, _ = mimetypes.guess_type(filename)
242
- is_text = (mime_type and mime_type.startswith('text')) or file_extension in ['py', 'md', 'txt', 'js', 'html', 'css', 'json'] or "Dockerfile" in file_path
 
243
 
244
- if is_text:
245
- try:
246
- text_content = content_raw.decode('utf-8')
247
- if file_extension == 'json':
248
- try:
249
- json_data = json.loads(text_content)
250
- formatted_json = json.dumps(json_data, indent=2)
251
- content = f"### File: {filename}\n```json\n{formatted_json}\n```\n\n"
252
- except json.JSONDecodeError:
253
- content = f"### File: {filename}\n```json\n{text_content}\n```\n[Note: Invalid JSON format]\n\n"
254
- else:
255
- content = f"### File: {filename}\n```{file_extension or 'text'}\n{text_content}\n```\n\n"
256
- except UnicodeDecodeError:
257
- content = f"### File: {filename}\n[Binary file - {size} bytes]\n\n"
258
- else:
259
- content = f"### File: {filename}\n[Binary file - {size} bytes]\n\n"
260
- except Exception as e:
261
- content = f"### File: {filename}\n[Error processing file: {str(e)}]\n\n"
262
-
263
- return content
264
 
265
- def create_markdown_document(url=None, files=None):
266
- """Create markdown document from repo contents or uploaded files."""
267
- if url:
268
- owner, repo, contents, is_hf = get_repo_contents(url)
269
-
270
- if isinstance(contents, str): # Error case
271
- return f"Error: {contents}"
272
 
273
- markdown_content = f"# {'Space' if is_hf else 'Repository'}: {owner}/{repo}\n\n"
274
- markdown_content += "## File Structure\n```\n"
275
- markdown_content += generate_file_tree([item['path'] for item in contents])
276
- markdown_content += "```\n\n"
277
- markdown_content += f"Below are the contents of all files in the {'space' if is_hf else 'repository'}:\n\n"
278
 
279
- for item in contents:
280
- markdown_content += process_file_content(item, owner, repo, is_hf)
281
- else: # Handle uploaded files
282
- markdown_content = "# Uploaded Files\n\n"
283
- markdown_content += "## File Structure\n```\n"
284
- markdown_content += generate_file_tree([file.filename for file in files])
285
- markdown_content += "```\n\n"
286
- markdown_content += "Below are the contents of all uploaded files:\n\n"
287
- for file in files:
288
- markdown_content += process_uploaded_file(file)
289
-
290
- return markdown_content
291
-
292
- @app.route('/')
293
- def index():
294
- return render_template('index.html')
295
-
296
- @app.route('/process', methods=['POST'])
297
- def process():
298
- # Ensure consistent response structure as JSON, even for errors
299
- response_data = {'markdown': '', 'html': '', 'filename': '', 'error': None}
300
-
301
- try:
302
- if 'files[]' in request.files:
303
- files = request.files.getlist('files[]')
304
- if not files:
305
- response_data['error'] = 'No files uploaded'
306
- return jsonify(response_data), 400
307
-
308
- markdown_content = create_markdown_document(files=files)
309
- response_data['markdown'] = "```markdown\n" + markdown_content + "\n```"
310
- response_data['html'] = markdown.markdown(markdown_content)
311
- response_data['filename'] = "uploaded_files_summary.md"
312
- else:
313
- repo_url = request.json.get('repo_url', '').strip()
314
- if not repo_url:
315
- response_data['error'] = 'Please provide a repository URL or upload files'
316
- return jsonify(response_data), 400
317
-
318
- markdown_content = create_markdown_document(repo_url)
319
- owner, repo, contents, is_hf = get_repo_contents(repo_url)
320
- if not owner:
321
- response_data['error'] = markdown_content
322
- return jsonify(response_data), 400
323
-
324
- response_data['markdown'] = markdown_content
325
- response_data['html'] = markdown.markdown(markdown_content)
326
- response_data['filename'] = f"{owner}_{repo}_summary.md"
327
-
328
- except Exception as e:
329
- response_data['error'] = f"Server error processing request: {str(e)}"
330
- return jsonify(response_data), 500
331
-
332
- return jsonify(response_data)
333
-
334
- @app.route('/download', methods=['POST'])
335
- def download():
336
- markdown_content = request.json.get('markdown', '')
337
- filename = request.json.get('filename', 'document.md')
338
-
339
- buffer = io.BytesIO()
340
- buffer.write(markdown_content.encode('utf-8'))
341
- buffer.seek(0)
342
-
343
- return send_file(
344
- buffer,
345
- as_attachment=True,
346
- download_name=filename,
347
- mimetype='text/markdown'
348
- )
349
-
350
- with open("html_template.html", "r") as f:
351
- html_template = f.read()
352
- f.close()
353
 
354
- if not os.path.exists('templates'):
355
- os.makedirs('templates')
356
- with open('templates/index.html', 'w') as f:
357
- f.write(html_template)
358
 
359
  if __name__ == '__main__':
360
- app.run(host="0.0.0.0", port=7860, debug=True)
 
1
  from flask import Flask, render_template, request, jsonify, send_file
2
+ from .core import create_markdown_document
 
 
3
  import markdown
 
 
4
  import os
5
+ import pkg_resources
6
+ import sys
7
 
8
+ def find_template_path():
9
+ """Find the templates directory, either from installed package or source."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  try:
11
+ # Try to use pkg_resources for installed packages
12
+ template_path = pkg_resources.resource_filename("repo_to_md", "templates")
13
+ except Exception as e:
14
+ # Fallback for running from source (development)
15
+ current_dir = os.path.dirname(os.path.abspath(__file__))
16
+ template_path = os.path.join(current_dir, "../templates")
17
+ template_path = os.path.abspath(template_path)
18
+
19
+ # Check if the template path exists
20
+ if not os.path.exists(template_path):
21
+ # Try an alternative path within the package directory
22
+ package_dir = os.path.dirname(os.path.abspath(__file__))
23
+ alternative_path = os.path.join(package_dir, "templates")
24
+ if os.path.exists(alternative_path):
25
+ template_path = alternative_path
26
  else:
27
+ # One last attempt: check if installed but in a different structure
28
+ site_packages = os.path.join(os.path.dirname(sys.executable), "site-packages")
29
+ installed_path = os.path.join(site_packages, "repo_to_md", "templates")
30
+ if os.path.exists(installed_path):
31
+ template_path = installed_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  else:
33
+ raise FileNotFoundError(
34
+ f"Template directory not found at: {template_path}, {alternative_path}, or {installed_path}"
35
+ )
 
 
 
 
 
 
 
 
36
 
37
+ return template_path
 
 
 
 
 
38
 
39
+ def run_demo(host="0.0.0.0", port=7860, debug=True):
40
+ # Get the correct template path
41
+ template_path = find_template_path()
 
 
 
 
 
 
 
 
 
42
 
43
+ # Create Flask app with the resolved template folder
44
+ app = Flask(__name__, template_folder=template_path)
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ @app.route('/')
47
+ def index():
48
+ return render_template('index.html')
49
+
50
+ @app.route('/process', methods=['POST'])
51
+ def process():
52
+ response_data = {'markdown': '', 'html': '', 'filename': '', 'error': None}
53
+
54
+ try:
55
+ if 'files[]' in request.files:
56
+ files = request.files.getlist('files[]')
57
+ if not files:
58
+ response_data['error'] = 'No files uploaded'
59
+ return jsonify(response_data), 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
+ markdown_content = create_markdown_document(files=files)
62
+ response_data['markdown'] = markdown_content
63
+ response_data['html'] = markdown.markdown(markdown_content)
64
+ response_data['filename'] = "uploaded_files_summary.md"
 
 
 
 
 
 
 
 
 
 
 
 
65
  else:
66
+ repo_url = request.json.get('repo_url', '').strip()
67
+ if not repo_url:
68
+ response_data['error'] = 'Please provide a repository URL or upload files'
69
+ return jsonify(response_data), 400
70
+
71
+ markdown_content = create_markdown_document(repo_url)
72
+ if markdown_content.startswith("Error:"):
73
+ response_data['error'] = markdown_content
74
+ return jsonify(response_data), 400
75
+
76
+ response_data['markdown'] = markdown_content
77
+ response_data['html'] = markdown.markdown(markdown_content)
78
+ owner, repo = repo_url.rstrip('/').split('/')[-2:]
79
+ response_data['filename'] = f"{owner}_{repo}_summary.md"
 
80
 
81
+ except Exception as e:
82
+ response_data['error'] = f"Server error processing request: {str(e)}"
83
+ return jsonify(response_data), 500
84
 
85
+ return jsonify(response_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
+ @app.route('/download', methods=['POST'])
88
+ def download():
89
+ markdown_content = request.json.get('markdown', '')
90
+ filename = request.json.get('filename', 'document.md')
 
 
 
91
 
92
+ buffer = io.BytesIO()
93
+ buffer.write(markdown_content.encode('utf-8'))
94
+ buffer.seek(0)
 
 
95
 
96
+ return send_file(
97
+ buffer,
98
+ as_attachment=True,
99
+ download_name=filename,
100
+ mimetype='text/markdown'
101
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
+ app.run(host=host, port=port, debug=debug)
 
 
 
104
 
105
  if __name__ == '__main__':
106
+ run_demo()