broadfield-dev commited on
Commit
b42fffe
·
verified ·
1 Parent(s): 540d342

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +221 -75
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from flask import Flask, render_template, request, jsonify
2
  import requests
3
  import base64
4
  import markdown
@@ -6,70 +6,132 @@ from bs4 import BeautifulSoup
6
  import os
7
  import mimetypes
8
  import json
 
 
9
 
10
  app = Flask(__name__)
11
 
12
  GITHUB_API = "https://api.github.com/repos/"
 
13
 
14
- def get_repo_contents(repo_url):
15
- """Extract contents from GitHub repo URL"""
 
 
 
 
 
 
 
 
 
 
 
 
16
  try:
17
- parts = repo_url.rstrip('/').split('/')
18
- owner, repo = parts[-2], parts[-1]
19
- api_url = f"{GITHUB_API}{owner}/{repo}/contents"
20
- response = requests.get(api_url)
21
- response.raise_for_status()
22
- return owner, repo, response.json()
 
 
 
 
 
 
 
 
23
  except Exception as e:
24
- return None, None, str(e)
25
 
26
- def process_file_content(file_info, owner, repo):
27
  """Process individual file content"""
28
  content = ""
29
- file_path = file_info['path']
30
 
31
- if file_info['type'] == 'file':
32
- file_url = f"{GITHUB_API}{owner}/{repo}/contents/{file_path}"
 
 
 
 
33
  file_response = requests.get(file_url)
34
- file_data = file_response.json()
35
 
36
- if 'content' in file_data:
37
- file_extension = file_path.split('.')[-1] if '.' in file_path else ''
38
- mime_type, _ = mimetypes.guess_type(file_path)
39
- is_text = (mime_type and mime_type.startswith('text')) or file_extension in ['py', 'md', 'txt', 'js', 'html', 'css', 'json']
40
-
41
- if is_text:
 
 
 
 
 
 
 
 
42
  try:
43
- decoded_content = base64.b64decode(file_data['content']).decode('utf-8')
44
- # Special handling for JSON files
45
- if file_extension == 'json':
46
- try:
47
- json_data = json.loads(decoded_content)
48
- formatted_json = json.dumps(json_data, indent=2)
49
- content = f"### File: {file_path}\n```json\n{formatted_json}\n```\n\n"
50
- except json.JSONDecodeError:
51
- content = f"### File: {file_path}\n```json\n{decoded_content}\n```\n[Note: Invalid JSON format]\n\n"
52
- else:
53
- content = f"### File: {file_path}\n```{(file_extension if file_extension else 'text')}\n{decoded_content}\n```\n\n"
54
- except UnicodeDecodeError:
55
- content = f"### File: {file_path}\n[Text content could not be decoded - possibly corrupted or non-UTF-8 text]\n\n"
56
  else:
57
- content = f"### File: {file_path}\n[Binary file - {file_data['size']} bytes]\n\n"
 
 
58
 
59
  return content
60
 
61
- def create_markdown_document(repo_url):
62
- """Create markdown document from repo contents"""
63
- owner, repo, contents = get_repo_contents(repo_url)
64
-
65
- if isinstance(contents, str):
66
- return f"Error: {contents}"
67
 
68
- markdown_content = f"# Repository: {owner}/{repo}\n\n"
69
- markdown_content += "Below are the contents of all files in the repository:\n\n"
 
 
 
 
 
 
 
 
 
 
 
70
 
71
- for item in contents:
72
- markdown_content += process_file_content(item, owner, repo)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  return markdown_content
75
 
@@ -78,24 +140,52 @@ def index():
78
  return render_template('index.html')
79
 
80
  @app.route('/process', methods=['POST'])
81
- def process_repo():
82
- repo_url = request.json.get('repo_url')
83
- if not repo_url:
84
- return jsonify({'error': 'Please provide a repository URL'}), 400
85
-
86
- markdown_content = create_markdown_document(repo_url)
87
- html_content = markdown.markdown(markdown_content)
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  return jsonify({
90
  'markdown': markdown_content,
91
- 'html': html_content
 
92
  })
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  html_template = """
95
  <!DOCTYPE html>
96
  <html>
97
  <head>
98
- <title>GitHub Repo to Markdown</title>
99
  <style>
100
  body {
101
  font-family: Arial, sans-serif;
@@ -118,6 +208,7 @@ html_template = """
118
  color: white;
119
  border: none;
120
  cursor: pointer;
 
121
  }
122
  button:hover {
123
  background-color: #45a049;
@@ -146,10 +237,15 @@ html_template = """
146
  </head>
147
  <body>
148
  <div class="container">
149
- <h1>GitHub Repository to Markdown Converter</h1>
150
- <p>Enter a GitHub repository URL (e.g., https://github.com/username/repository)</p>
151
- <input type="text" id="repoUrl" style="width: 100%; padding: 8px;" placeholder="Enter GitHub repository URL">
152
- <button onclick="processRepo()">Convert to Markdown</button>
 
 
 
 
 
153
  <div id="spinner" class="spinner"></div>
154
 
155
  <h2>Markdown Output:</h2>
@@ -158,41 +254,91 @@ html_template = """
158
  <h2>Preview:</h2>
159
  <div id="output"></div>
160
  </div>
161
-
162
  <script>
 
 
 
163
  async function processRepo() {
164
  const repoUrl = document.getElementById('repoUrl').value;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  const spinner = document.getElementById('spinner');
166
- const button = document.querySelector('button');
167
 
168
- // Show spinner, disable button
169
  spinner.style.display = 'block';
170
- button.disabled = true;
171
 
172
  try {
173
- const response = await fetch('/process', {
174
  method: 'POST',
175
- headers: {
176
- 'Content-Type': 'application/json',
177
- },
178
- body: JSON.stringify({ repo_url: repoUrl })
179
- });
180
 
181
- const data = await response.json();
 
182
 
183
- if (data.error) {
184
- alert(data.error);
185
  return;
186
  }
187
 
188
- document.getElementById('markdownOutput').value = data.markdown;
189
- document.getElementById('output').innerHTML = data.html;
 
 
 
190
  } catch (error) {
191
  alert('An error occurred: ' + error.message);
192
  } finally {
193
- // Hide spinner, enable button
194
  spinner.style.display = 'none';
195
- button.disabled = false;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  }
197
  }
198
  </script>
 
1
+ from flask import Flask, render_template, request, jsonify, send_file
2
  import requests
3
  import base64
4
  import markdown
 
6
  import os
7
  import mimetypes
8
  import json
9
+ from io import BytesIO
10
+ from pathlib import Path
11
 
12
  app = Flask(__name__)
13
 
14
  GITHUB_API = "https://api.github.com/repos/"
15
+ HF_API = "https://huggingface.co/api/spaces/"
16
 
17
+ def generate_file_tree(contents, is_hf=False):
18
+ """Generate a file structure tree"""
19
+ tree = ["📁 Root"]
20
+ paths = sorted([item['path'] for item in contents if isinstance(item, dict) and 'path' in item])
21
+
22
+ for path in paths:
23
+ parts = path.split('/')
24
+ indent = " " * (len(parts) - 1)
25
+ tree.append(f"{indent}📄 {parts[-1]}")
26
+
27
+ return "\n".join(tree) + "\n\n"
28
+
29
+ def get_repo_contents(url):
30
+ """Extract contents from GitHub or Hugging Face URL"""
31
  try:
32
+ if "huggingface.co" in url:
33
+ parts = url.rstrip('/').split('/')
34
+ owner, repo = parts[-2], parts[-1]
35
+ api_url = f"{HF_API}{owner}/{repo}/files"
36
+ response = requests.get(api_url)
37
+ response.raise_for_status()
38
+ return owner, repo, response.json()["files"], True
39
+ else:
40
+ parts = url.rstrip('/').split('/')
41
+ owner, repo = parts[-2], parts[-1]
42
+ api_url = f"{GITHUB_API}{owner}/{repo}/contents"
43
+ response = requests.get(api_url)
44
+ response.raise_for_status()
45
+ return owner, repo, response.json(), False
46
  except Exception as e:
47
+ return None, None, str(e), False
48
 
49
+ def process_file_content(file_info, owner, repo, is_hf=False):
50
  """Process individual file content"""
51
  content = ""
52
+ file_path = file_info['path'] if not is_hf else file_info
53
 
54
+ if isinstance(file_info, dict) and 'type' in file_info and file_info['type'] == 'file' or isinstance(file_info, str):
55
+ if is_hf:
56
+ file_url = f"https://huggingface.co/spaces/{owner}/{repo}/raw/main/{file_path}"
57
+ else:
58
+ file_url = f"{GITHUB_API}{owner}/{repo}/contents/{file_path}"
59
+
60
  file_response = requests.get(file_url)
 
61
 
62
+ if is_hf:
63
+ content_raw = file_response.text
64
+ else:
65
+ file_data = file_response.json()
66
+ if 'content' not in file_data:
67
+ return f"### File: {file_path}\n[No content available]\n\n"
68
+ content_raw = base64.b64decode(file_data['content']).decode('utf-8', errors='ignore')
69
+
70
+ file_extension = file_path.split('.')[-1] if '.' in file_path else ''
71
+ mime_type, _ = mimetypes.guess_type(file_path)
72
+ is_text = (mime_type and mime_type.startswith('text')) or file_extension in ['py', 'md', 'txt', 'js', 'html', 'css', 'json']
73
+
74
+ if is_text:
75
+ if file_extension == 'json':
76
  try:
77
+ json_data = json.loads(content_raw)
78
+ formatted_json = json.dumps(json_data, indent=2)
79
+ content = f"### File: {file_path}\n```json\n{formatted_json}\n```\n\n"
80
+ except json.JSONDecodeError:
81
+ content = f"### File: {file_path}\n```json\n{content_raw}\n```\n[Note: Invalid JSON format]\n\n"
 
 
 
 
 
 
 
 
82
  else:
83
+ content = f"### File: {file_path}\n```{(file_extension if file_extension else 'text')}\n{content_raw}\n```\n\n"
84
+ else:
85
+ content = f"### File: {file_path}\n[Binary file]\n\n"
86
 
87
  return content
88
 
89
+ def process_uploaded_file(file):
90
+ """Process uploaded file content"""
91
+ content = ""
92
+ filename = file.filename
93
+ file_extension = filename.split('.')[-1] if '.' in filename else ''
 
94
 
95
+ try:
96
+ content_raw = file.read().decode('utf-8', errors='ignore')
97
+ if file_extension == 'json':
98
+ try:
99
+ json_data = json.loads(content_raw)
100
+ formatted_json = json.dumps(json_data, indent=2)
101
+ content = f"### File: {filename}\n```json\n{formatted_json}\n```\n\n"
102
+ except json.JSONDecodeError:
103
+ content = f"### File: {filename}\n```json\n{content_raw}\n```\n[Note: Invalid JSON format]\n\n"
104
+ else:
105
+ content = f"### File: {filename}\n```{(file_extension if file_extension else 'text')}\n{content_raw}\n```\n\n"
106
+ except Exception as e:
107
+ content = f"### File: {filename}\n[Error processing file: {str(e)}]\n\n"
108
 
109
+ return content
110
+
111
+ def create_markdown_document(url=None, files=None):
112
+ """Create markdown document from repo contents or uploaded files"""
113
+ if url:
114
+ owner, repo, contents, is_hf = get_repo_contents(url)
115
+
116
+ if isinstance(contents, str):
117
+ return f"Error: {contents}"
118
+
119
+ markdown_content = f"# {'Space' if is_hf else 'Repository'}: {owner}/{repo}\n\n"
120
+ markdown_content += "## File Structure\n```\n"
121
+ markdown_content += generate_file_tree(contents, is_hf)
122
+ markdown_content += "```\n\n"
123
+ markdown_content += f"Below are the contents of all files in the {'space' if is_hf else 'repository'}:\n\n"
124
+
125
+ for item in contents:
126
+ markdown_content += process_file_content(item, owner, repo, is_hf)
127
+ else:
128
+ markdown_content = "# Uploaded Files\n\n"
129
+ markdown_content += "## File Structure\n```\n"
130
+ markdown_content += "📁 Uploads\n" + "\n".join([f" 📄 {file.filename}" for file in files]) + "\n"
131
+ markdown_content += "```\n\n"
132
+ markdown_content += "Below are the contents of all uploaded files:\n\n"
133
+ for file in files:
134
+ markdown_content += process_uploaded_file(file)
135
 
136
  return markdown_content
137
 
 
140
  return render_template('index.html')
141
 
142
  @app.route('/process', methods=['POST'])
143
+ def process():
144
+ if 'files[]' in request.files:
145
+ files = request.files.getlist('files[]')
146
+ if not files:
147
+ return jsonify({'error': 'No files uploaded'}), 400
148
+
149
+ markdown_content = create_markdown_document(files=files)
150
+ html_content = markdown.markdown(markdown_content)
151
+ filename = "uploaded_files_summary.md"
152
+ else:
153
+ repo_url = request.json.get('repo_url')
154
+ if not repo_url:
155
+ return jsonify({'error': 'Please provide a repository URL or upload files'}), 400
156
+
157
+ markdown_content = create_markdown_document(repo_url)
158
+ html_content = markdown.markdown(markdown_content)
159
+ owner, repo, _, is_hf = get_repo_contents(repo_url)
160
+ filename = f"{owner}_{repo}_summary.md"
161
 
162
  return jsonify({
163
  'markdown': markdown_content,
164
+ 'html': html_content,
165
+ 'filename': filename
166
  })
167
 
168
+ @app.route('/download', methods=['POST'])
169
+ def download():
170
+ markdown_content = request.json.get('markdown')
171
+ filename = request.json.get('filename')
172
+
173
+ buffer = BytesIO()
174
+ buffer.write(markdown_content.encode('utf-8'))
175
+ buffer.seek(0)
176
+
177
+ return send_file(
178
+ buffer,
179
+ as_attachment=True,
180
+ download_name=filename,
181
+ mimetype='text/markdown'
182
+ )
183
+
184
  html_template = """
185
  <!DOCTYPE html>
186
  <html>
187
  <head>
188
+ <title>Repo & Files to Markdown</title>
189
  <style>
190
  body {
191
  font-family: Arial, sans-serif;
 
208
  color: white;
209
  border: none;
210
  cursor: pointer;
211
+ margin: 5px;
212
  }
213
  button:hover {
214
  background-color: #45a049;
 
237
  </head>
238
  <body>
239
  <div class="container">
240
+ <h1>Repository & Files to Markdown Converter</h1>
241
+ <p>Enter a GitHub/Hugging Face Space URL or upload files</p>
242
+ <input type="text" id="repoUrl" style="width: 100%; padding: 8px;" placeholder="Enter GitHub or Hugging Face Space URL">
243
+ <p>OR</p>
244
+ <input type="file" id="fileInput" multiple style="margin: 10px 0;">
245
+ <br>
246
+ <button onclick="processRepo()">Convert URL</button>
247
+ <button onclick="processFiles()">Convert Files</button>
248
+ <button id="downloadBtn" style="display: none;" onclick="downloadMarkdown()">Download .md</button>
249
  <div id="spinner" class="spinner"></div>
250
 
251
  <h2>Markdown Output:</h2>
 
254
  <h2>Preview:</h2>
255
  <div id="output"></div>
256
  </div>
 
257
  <script>
258
+ let currentMarkdown = '';
259
+ let currentFilename = '';
260
+
261
  async function processRepo() {
262
  const repoUrl = document.getElementById('repoUrl').value;
263
+ await processContent('/process', { repo_url: repoUrl });
264
+ }
265
+
266
+ async function processFiles() {
267
+ const files = document.getElementById('fileInput').files;
268
+ if (files.length === 0) {
269
+ alert('Please select at least one file');
270
+ return;
271
+ }
272
+
273
+ const formData = new FormData();
274
+ for (let file of files) {
275
+ formData.append('files[]', file);
276
+ }
277
+
278
+ await processContent('/process', formData, false);
279
+ }
280
+
281
+ async function processContent(url, data, isJson = true) {
282
  const spinner = document.getElementById('spinner');
283
+ const buttons = document.querySelectorAll('button');
284
 
 
285
  spinner.style.display = 'block';
286
+ buttons.forEach(btn => btn.disabled = true);
287
 
288
  try {
289
+ const options = {
290
  method: 'POST',
291
+ ...(isJson ? {
292
+ headers: { 'Content-Type': 'application/json' },
293
+ body: JSON.stringify(data)
294
+ } : { body: data })
295
+ };
296
 
297
+ const response = await fetch(url, options);
298
+ const result = await response.json();
299
 
300
+ if (result.error) {
301
+ alert(result.error);
302
  return;
303
  }
304
 
305
+ currentMarkdown = result.markdown;
306
+ currentFilename = result.filename;
307
+ document.getElementById('markdownOutput').value = result.markdown;
308
+ document.getElementById('output').innerHTML = result.html;
309
+ document.getElementById('downloadBtn').style.display = 'inline-block';
310
  } catch (error) {
311
  alert('An error occurred: ' + error.message);
312
  } finally {
 
313
  spinner.style.display = 'none';
314
+ buttons.forEach(btn => btn.disabled = false);
315
+ }
316
+ }
317
+
318
+ async function downloadMarkdown() {
319
+ try {
320
+ const response = await fetch('/download', {
321
+ method: 'POST',
322
+ headers: {
323
+ 'Content-Type': 'application/json',
324
+ },
325
+ body: JSON.stringify({
326
+ markdown: currentMarkdown,
327
+ filename: currentFilename
328
+ })
329
+ });
330
+
331
+ const blob = await response.blob();
332
+ const url = window.URL.createObjectURL(blob);
333
+ const a = document.createElement('a');
334
+ a.href = url;
335
+ a.download = currentFilename;
336
+ document.body.appendChild(a);
337
+ a.click();
338
+ a.remove();
339
+ window.URL.revokeObjectURL(url);
340
+ } catch (error) {
341
+ alert('Error downloading file: ' + error.message);
342
  }
343
  }
344
  </script>