broadfield-dev commited on
Commit
a8175e6
·
verified ·
1 Parent(s): b42fffe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -67
app.py CHANGED
@@ -2,132 +2,191 @@ from flask import Flask, render_template, request, jsonify, send_file
2
  import requests
3
  import base64
4
  import markdown
5
- from bs4 import BeautifulSoup
6
- import os
7
- import mimetypes
8
  import json
9
- from io import BytesIO
10
- from pathlib import Path
 
11
 
12
  app = Flask(__name__)
13
 
14
  GITHUB_API = "https://api.github.com/repos/"
15
  HF_API = "https://huggingface.co/api/spaces/"
16
 
17
- def generate_file_tree(contents, is_hf=False):
18
- """Generate a file structure tree"""
19
  tree = ["📁 Root"]
20
- paths = sorted([item['path'] for item in contents if isinstance(item, dict) and 'path' in item])
21
-
22
- for path in paths:
23
  parts = path.split('/')
24
  indent = " " * (len(parts) - 1)
25
  tree.append(f"{indent}📄 {parts[-1]}")
26
-
27
  return "\n".join(tree) + "\n\n"
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def get_repo_contents(url):
30
- """Extract contents from GitHub or Hugging Face URL"""
31
  try:
32
  if "huggingface.co" in url:
33
  parts = url.rstrip('/').split('/')
34
  owner, repo = parts[-2], parts[-1]
35
- api_url = f"{HF_API}{owner}/{repo}/files"
36
- response = requests.get(api_url)
37
- response.raise_for_status()
38
- return owner, repo, response.json()["files"], True
39
- else:
40
  parts = url.rstrip('/').split('/')
41
  owner, repo = parts[-2], parts[-1]
42
- api_url = f"{GITHUB_API}{owner}/{repo}/contents"
43
- response = requests.get(api_url)
44
- response.raise_for_status()
45
- return owner, repo, response.json(), False
46
  except Exception as e:
47
- return None, None, str(e), False
48
 
49
  def process_file_content(file_info, owner, repo, is_hf=False):
50
- """Process individual file content"""
51
  content = ""
52
- file_path = file_info['path'] if not is_hf else file_info
53
 
54
- if isinstance(file_info, dict) and 'type' in file_info and file_info['type'] == 'file' or isinstance(file_info, str):
55
  if is_hf:
56
- file_url = f"https://huggingface.co/spaces/{owner}/{repo}/raw/main/{file_path}"
57
  else:
58
  file_url = f"{GITHUB_API}{owner}/{repo}/contents/{file_path}"
59
 
60
- file_response = requests.get(file_url)
 
61
 
62
  if is_hf:
63
- content_raw = file_response.text
64
- else:
65
- file_data = file_response.json()
66
- if 'content' not in file_data:
67
- return f"### File: {file_path}\n[No content available]\n\n"
68
- content_raw = base64.b64decode(file_data['content']).decode('utf-8', errors='ignore')
69
-
70
- file_extension = file_path.split('.')[-1] if '.' in file_path else ''
71
- mime_type, _ = mimetypes.guess_type(file_path)
72
- is_text = (mime_type and mime_type.startswith('text')) or file_extension in ['py', 'md', 'txt', 'js', 'html', 'css', 'json']
73
-
74
- if is_text:
75
- if file_extension == 'json':
76
  try:
77
- json_data = json.loads(content_raw)
78
- formatted_json = json.dumps(json_data, indent=2)
79
- content = f"### File: {file_path}\n```json\n{formatted_json}\n```\n\n"
80
- except json.JSONDecodeError:
81
- content = f"### File: {file_path}\n```json\n{content_raw}\n```\n[Note: Invalid JSON format]\n\n"
 
 
 
 
 
 
 
82
  else:
83
- content = f"### File: {file_path}\n```{(file_extension if file_extension else 'text')}\n{content_raw}\n```\n\n"
84
- else:
85
- content = f"### File: {file_path}\n[Binary file]\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
  return content
88
 
89
  def process_uploaded_file(file):
90
- """Process uploaded file content"""
91
  content = ""
92
  filename = file.filename
93
  file_extension = filename.split('.')[-1] if '.' in filename else ''
94
 
95
  try:
96
- content_raw = file.read().decode('utf-8', errors='ignore')
97
- if file_extension == 'json':
 
 
 
 
 
98
  try:
99
- json_data = json.loads(content_raw)
100
- formatted_json = json.dumps(json_data, indent=2)
101
- content = f"### File: {filename}\n```json\n{formatted_json}\n```\n\n"
102
- except json.JSONDecodeError:
103
- content = f"### File: {filename}\n```json\n{content_raw}\n```\n[Note: Invalid JSON format]\n\n"
 
 
 
 
 
 
 
104
  else:
105
- content = f"### File: {filename}\n```{(file_extension if file_extension else 'text')}\n{content_raw}\n```\n\n"
106
  except Exception as e:
107
  content = f"### File: {filename}\n[Error processing file: {str(e)}]\n\n"
108
 
109
  return content
110
 
111
  def create_markdown_document(url=None, files=None):
112
- """Create markdown document from repo contents or uploaded files"""
113
  if url:
114
  owner, repo, contents, is_hf = get_repo_contents(url)
115
 
116
- if isinstance(contents, str):
117
  return f"Error: {contents}"
118
 
119
  markdown_content = f"# {'Space' if is_hf else 'Repository'}: {owner}/{repo}\n\n"
120
  markdown_content += "## File Structure\n```\n"
121
- markdown_content += generate_file_tree(contents, is_hf)
122
  markdown_content += "```\n\n"
123
  markdown_content += f"Below are the contents of all files in the {'space' if is_hf else 'repository'}:\n\n"
124
 
125
  for item in contents:
126
  markdown_content += process_file_content(item, owner, repo, is_hf)
127
- else:
128
  markdown_content = "# Uploaded Files\n\n"
129
  markdown_content += "## File Structure\n```\n"
130
- markdown_content += "📁 Uploads\n" + "\n".join([f" 📄 {file.filename}" for file in files]) + "\n"
131
  markdown_content += "```\n\n"
132
  markdown_content += "Below are the contents of all uploaded files:\n\n"
133
  for file in files:
@@ -157,6 +216,8 @@ def process():
157
  markdown_content = create_markdown_document(repo_url)
158
  html_content = markdown.markdown(markdown_content)
159
  owner, repo, _, is_hf = get_repo_contents(repo_url)
 
 
160
  filename = f"{owner}_{repo}_summary.md"
161
 
162
  return jsonify({
@@ -170,7 +231,7 @@ def download():
170
  markdown_content = request.json.get('markdown')
171
  filename = request.json.get('filename')
172
 
173
- buffer = BytesIO()
174
  buffer.write(markdown_content.encode('utf-8'))
175
  buffer.seek(0)
176
 
@@ -238,10 +299,10 @@ html_template = """
238
  <body>
239
  <div class="container">
240
  <h1>Repository & Files to Markdown Converter</h1>
241
- <p>Enter a GitHub/Hugging Face Space URL or upload files</p>
242
  <input type="text" id="repoUrl" style="width: 100%; padding: 8px;" placeholder="Enter GitHub or Hugging Face Space URL">
243
- <p>OR</p>
244
- <input type="file" id="fileInput" multiple style="margin: 10px 0;">
245
  <br>
246
  <button onclick="processRepo()">Convert URL</button>
247
  <button onclick="processFiles()">Convert Files</button>
@@ -266,7 +327,7 @@ html_template = """
266
  async function processFiles() {
267
  const files = document.getElementById('fileInput').files;
268
  if (files.length === 0) {
269
- alert('Please select at least one file');
270
  return;
271
  }
272
 
 
2
  import requests
3
  import base64
4
  import markdown
 
 
 
5
  import json
6
+ import mimetypes
7
+ import os
8
+ import io
9
 
10
  app = Flask(__name__)
11
 
12
  GITHUB_API = "https://api.github.com/repos/"
13
  HF_API = "https://huggingface.co/api/spaces/"
14
 
15
+ def generate_file_tree(paths):
16
+ """Generate a simple file tree from a list of paths."""
17
  tree = ["📁 Root"]
18
+ sorted_paths = sorted(paths)
19
+ for path in sorted_paths:
 
20
  parts = path.split('/')
21
  indent = " " * (len(parts) - 1)
22
  tree.append(f"{indent}📄 {parts[-1]}")
 
23
  return "\n".join(tree) + "\n\n"
24
 
25
+ def get_all_files(owner, repo, path="", is_hf=False):
26
+ """Recursively fetch all files from a repository."""
27
+ if is_hf:
28
+ api_url = f"https://huggingface.co/api/spaces/{owner}/{repo}/tree/main/{path}".rstrip('/')
29
+ else:
30
+ api_url = f"{GITHUB_API}{owner}/{repo}/contents/{path}".rstrip('/')
31
+
32
+ try:
33
+ response = requests.get(api_url)
34
+ response.raise_for_status()
35
+ items = response.json()
36
+
37
+ files = []
38
+ for item in items:
39
+ if item['type'] == 'file':
40
+ files.append(item)
41
+ elif item['type'] == 'dir':
42
+ files.extend(get_all_files(owner, repo, item['path'], is_hf))
43
+ return files
44
+ except Exception as e:
45
+ return None
46
+
47
  def get_repo_contents(url):
48
+ """Parse URL and fetch repository contents."""
49
  try:
50
  if "huggingface.co" in url:
51
  parts = url.rstrip('/').split('/')
52
  owner, repo = parts[-2], parts[-1]
53
+ files = get_all_files(owner, repo, "", True)
54
+ if files is None:
55
+ raise Exception("Failed to fetch Hugging Face Space contents")
56
+ return owner, repo, files, True
57
+ else: # Assume GitHub URL
58
  parts = url.rstrip('/').split('/')
59
  owner, repo = parts[-2], parts[-1]
60
+ files = get_all_files(owner, repo, "", False)
61
+ if files is None:
62
+ raise Exception("Failed to fetch GitHub repository contents")
63
+ return owner, repo, files, False
64
  except Exception as e:
65
+ return None, None, f"Error fetching repo contents: {str(e)}", False
66
 
67
  def process_file_content(file_info, owner, repo, is_hf=False):
68
+ """Process individual file content from a repository."""
69
  content = ""
70
+ file_path = file_info['path']
71
 
72
+ try:
73
  if is_hf:
74
+ file_url = f"https://huggingface.co/spaces/{owner}/{repo}/resolve/main/{file_path}"
75
  else:
76
  file_url = f"{GITHUB_API}{owner}/{repo}/contents/{file_path}"
77
 
78
+ response = requests.get(file_url)
79
+ response.raise_for_status()
80
 
81
  if is_hf:
82
+ content_raw = response.content
83
+ size = len(content_raw)
84
+ file_extension = file_path.split('.')[-1] if '.' in file_path else ''
85
+ mime_type, _ = mimetypes.guess_type(file_path)
86
+ is_text = (mime_type and mime_type.startswith('text')) or file_extension in ['py', 'md', 'txt', 'js', 'html', 'css', 'json']
87
+
88
+ if is_text:
 
 
 
 
 
 
89
  try:
90
+ text_content = content_raw.decode('utf-8')
91
+ if file_extension == 'json':
92
+ try:
93
+ json_data = json.loads(text_content)
94
+ formatted_json = json.dumps(json_data, indent=2)
95
+ content = f"### File: {file_path}\n```json\n{formatted_json}\n```\n\n"
96
+ except json.JSONDecodeError:
97
+ content = f"### File: {file_path}\n```json\n{text_content}\n```\n[Note: Invalid JSON format]\n\n"
98
+ else:
99
+ content = f"### File: {file_path}\n```{file_extension or 'text'}\n{text_content}\n```\n\n"
100
+ except UnicodeDecodeError:
101
+ content = f"### File: {file_path}\n[Binary file - {size} bytes]\n\n"
102
  else:
103
+ content = f"### File: {file_path}\n[Binary file - {size} bytes]\n\n"
104
+ else: # GitHub
105
+ data = response.json()
106
+ if 'content' in data:
107
+ content_raw = base64.b64decode(data['content'])
108
+ size = data['size']
109
+ file_extension = file_path.split('.')[-1] if '.' in file_path else ''
110
+ mime_type, _ = mimetypes.guess_type(file_path)
111
+ is_text = (mime_type and mime_type.startswith('text')) or file_extension in ['py', 'md', 'txt', 'js', 'html', 'css', 'json']
112
+
113
+ if is_text:
114
+ try:
115
+ text_content = content_raw.decode('utf-8')
116
+ if file_extension == 'json':
117
+ try:
118
+ json_data = json.loads(text_content)
119
+ formatted_json = json.dumps(json_data, indent=2)
120
+ content = f"### File: {file_path}\n```json\n{formatted_json}\n```\n\n"
121
+ except json.JSONDecodeError:
122
+ content = f"### File: {file_path}\n```json\n{text_content}\n```\n[Note: Invalid JSON format]\n\n"
123
+ else:
124
+ content = f"### File: {file_path}\n```{file_extension or 'text'}\n{text_content}\n```\n\n"
125
+ except UnicodeDecodeError:
126
+ content = f"### File: {file_path}\n[Binary file - {size} bytes]\n\n"
127
+ else:
128
+ content = f"### File: {file_path}\n[Binary file - {size} bytes]\n\n"
129
+ else:
130
+ content = f"### File: {file_path}\n[No content available]\n\n"
131
+ except Exception as e:
132
+ content = f"### File: {file_path}\n[Error fetching file content: {str(e)}]\n\n"
133
 
134
  return content
135
 
136
  def process_uploaded_file(file):
137
+ """Process uploaded file content."""
138
  content = ""
139
  filename = file.filename
140
  file_extension = filename.split('.')[-1] if '.' in filename else ''
141
 
142
  try:
143
+ content_raw = file.read() # Read file content into memory
144
+ size = len(content_raw) # Compute size in bytes
145
+
146
+ mime_type, _ = mimetypes.guess_type(filename)
147
+ is_text = (mime_type and mime_type.startswith('text')) or file_extension in ['py', 'md', 'txt', 'js', 'html', 'css', 'json']
148
+
149
+ if is_text:
150
  try:
151
+ text_content = content_raw.decode('utf-8')
152
+ if file_extension == 'json':
153
+ try:
154
+ json_data = json.loads(text_content)
155
+ formatted_json = json.dumps(json_data, indent=2)
156
+ content = f"### File: {filename}\n```json\n{formatted_json}\n```\n\n"
157
+ except json.JSONDecodeError:
158
+ content = f"### File: {filename}\n```json\n{text_content}\n```\n[Note: Invalid JSON format]\n\n"
159
+ else:
160
+ content = f"### File: {filename}\n```{file_extension or 'text'}\n{text_content}\n```\n\n"
161
+ except UnicodeDecodeError:
162
+ content = f"### File: {filename}\n[Binary file - {size} bytes]\n\n"
163
  else:
164
+ content = f"### File: {filename}\n[Binary file - {size} bytes]\n\n"
165
  except Exception as e:
166
  content = f"### File: {filename}\n[Error processing file: {str(e)}]\n\n"
167
 
168
  return content
169
 
170
  def create_markdown_document(url=None, files=None):
171
+ """Create markdown document from repo contents or uploaded files."""
172
  if url:
173
  owner, repo, contents, is_hf = get_repo_contents(url)
174
 
175
+ if isinstance(contents, str): # Error case
176
  return f"Error: {contents}"
177
 
178
  markdown_content = f"# {'Space' if is_hf else 'Repository'}: {owner}/{repo}\n\n"
179
  markdown_content += "## File Structure\n```\n"
180
+ markdown_content += generate_file_tree([item['path'] for item in contents])
181
  markdown_content += "```\n\n"
182
  markdown_content += f"Below are the contents of all files in the {'space' if is_hf else 'repository'}:\n\n"
183
 
184
  for item in contents:
185
  markdown_content += process_file_content(item, owner, repo, is_hf)
186
+ else: # Handle uploaded files
187
  markdown_content = "# Uploaded Files\n\n"
188
  markdown_content += "## File Structure\n```\n"
189
+ markdown_content += generate_file_tree([file.filename for file in files])
190
  markdown_content += "```\n\n"
191
  markdown_content += "Below are the contents of all uploaded files:\n\n"
192
  for file in files:
 
216
  markdown_content = create_markdown_document(repo_url)
217
  html_content = markdown.markdown(markdown_content)
218
  owner, repo, _, is_hf = get_repo_contents(repo_url)
219
+ if not owner:
220
+ return jsonify({'error': markdown_content}), 400
221
  filename = f"{owner}_{repo}_summary.md"
222
 
223
  return jsonify({
 
231
  markdown_content = request.json.get('markdown')
232
  filename = request.json.get('filename')
233
 
234
+ buffer = io.BytesIO()
235
  buffer.write(markdown_content.encode('utf-8'))
236
  buffer.seek(0)
237
 
 
299
  <body>
300
  <div class="container">
301
  <h1>Repository & Files to Markdown Converter</h1>
302
+ <p>Enter a GitHub/Hugging Face Space URL (e.g., https://huggingface.co/spaces/username/space)</p>
303
  <input type="text" id="repoUrl" style="width: 100%; padding: 8px;" placeholder="Enter GitHub or Hugging Face Space URL">
304
+ <p>OR upload files (select multiple files or a folder - folder upload supported in Chrome)</p>
305
+ <input type="file" id="fileInput" multiple webkitdirectory style="margin: 10px 0;">
306
  <br>
307
  <button onclick="processRepo()">Convert URL</button>
308
  <button onclick="processFiles()">Convert Files</button>
 
327
  async function processFiles() {
328
  const files = document.getElementById('fileInput').files;
329
  if (files.length === 0) {
330
+ alert('Please select at least one file or folder');
331
  return;
332
  }
333