Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -25,15 +25,20 @@ def generate_file_tree(paths):
|
|
25 |
def get_all_files(owner, repo, path="", is_hf=False):
|
26 |
"""Recursively fetch all files from a repository."""
|
27 |
if is_hf:
|
28 |
-
|
|
|
29 |
else:
|
30 |
api_url = f"{GITHUB_API}{owner}/{repo}/contents/{path}".rstrip('/')
|
31 |
|
32 |
try:
|
33 |
-
response = requests.get(api_url)
|
34 |
response.raise_for_status()
|
35 |
items = response.json()
|
36 |
|
|
|
|
|
|
|
|
|
37 |
files = []
|
38 |
for item in items:
|
39 |
if item['type'] == 'file':
|
@@ -50,9 +55,15 @@ def get_repo_contents(url):
|
|
50 |
if "huggingface.co" in url:
|
51 |
parts = url.rstrip('/').split('/')
|
52 |
owner, repo = parts[-2], parts[-1]
|
53 |
-
files
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
return owner, repo, files, True
|
57 |
else: # Assume GitHub URL
|
58 |
parts = url.rstrip('/').split('/')
|
@@ -71,7 +82,7 @@ def process_file_content(file_info, owner, repo, is_hf=False):
|
|
71 |
|
72 |
try:
|
73 |
if is_hf:
|
74 |
-
file_url = f"https://huggingface.co/spaces/{owner}/{repo}/
|
75 |
else:
|
76 |
file_url = f"{GITHUB_API}{owner}/{repo}/contents/{file_path}"
|
77 |
|
@@ -200,31 +211,36 @@ def index():
|
|
200 |
|
201 |
@app.route('/process', methods=['POST'])
|
202 |
def process():
|
|
|
|
|
|
|
203 |
if 'files[]' in request.files:
|
204 |
files = request.files.getlist('files[]')
|
205 |
if not files:
|
206 |
-
|
|
|
207 |
|
208 |
markdown_content = create_markdown_document(files=files)
|
209 |
-
|
210 |
-
|
|
|
211 |
else:
|
212 |
repo_url = request.json.get('repo_url')
|
213 |
if not repo_url:
|
214 |
-
|
|
|
215 |
|
216 |
markdown_content = create_markdown_document(repo_url)
|
217 |
-
|
218 |
-
owner, repo, _, is_hf = get_repo_contents(repo_url)
|
219 |
if not owner:
|
220 |
-
|
221 |
-
|
|
|
|
|
|
|
|
|
222 |
|
223 |
-
return jsonify(
|
224 |
-
'markdown': markdown_content,
|
225 |
-
'html': html_content,
|
226 |
-
'filename': filename
|
227 |
-
})
|
228 |
|
229 |
@app.route('/download', methods=['POST'])
|
230 |
def download():
|
|
|
25 |
def get_all_files(owner, repo, path="", is_hf=False):
|
26 |
"""Recursively fetch all files from a repository."""
|
27 |
if is_hf:
|
28 |
+
# Attempt to fetch file list from Hugging Face Space (publicly accessible files)
|
29 |
+
api_url = f"https://huggingface.co/spaces/{owner}/{repo}/tree/main/{path}".rstrip('/')
|
30 |
else:
|
31 |
api_url = f"{GITHUB_API}{owner}/{repo}/contents/{path}".rstrip('/')
|
32 |
|
33 |
try:
|
34 |
+
response = requests.get(api_url, headers={"Accept": "application/json"})
|
35 |
response.raise_for_status()
|
36 |
items = response.json()
|
37 |
|
38 |
+
# Hugging Face might not return JSON in the same format; adjust if HTML is returned
|
39 |
+
if isinstance(items, str): # If response isn’t JSON, it’s likely HTML
|
40 |
+
return None # Fallback to error handling
|
41 |
+
|
42 |
files = []
|
43 |
for item in items:
|
44 |
if item['type'] == 'file':
|
|
|
55 |
if "huggingface.co" in url:
|
56 |
parts = url.rstrip('/').split('/')
|
57 |
owner, repo = parts[-2], parts[-1]
|
58 |
+
# Fallback approach: manually fetch known files or use a simpler file list
|
59 |
+
# For now, assume a flat structure and fetch known files directly
|
60 |
+
# This is a workaround until a proper API token or endpoint is confirmed
|
61 |
+
known_files = [
|
62 |
+
{'path': 'app.py', 'type': 'file'},
|
63 |
+
{'path': 'README.md', 'type': 'file'}
|
64 |
+
# Add more known paths or implement HTML scraping if needed
|
65 |
+
]
|
66 |
+
files = get_all_files(owner, repo, "", True) or known_files
|
67 |
return owner, repo, files, True
|
68 |
else: # Assume GitHub URL
|
69 |
parts = url.rstrip('/').split('/')
|
|
|
82 |
|
83 |
try:
|
84 |
if is_hf:
|
85 |
+
file_url = f"https://huggingface.co/spaces/{owner}/{repo}/raw/main/{file_path}"
|
86 |
else:
|
87 |
file_url = f"{GITHUB_API}{owner}/{repo}/contents/{file_path}"
|
88 |
|
|
|
211 |
|
212 |
@app.route('/process', methods=['POST'])
|
213 |
def process():
|
214 |
+
# Ensure consistent response structure
|
215 |
+
response_data = {'markdown': '', 'html': '', 'filename': '', 'error': None}
|
216 |
+
|
217 |
if 'files[]' in request.files:
|
218 |
files = request.files.getlist('files[]')
|
219 |
if not files:
|
220 |
+
response_data['error'] = 'No files uploaded'
|
221 |
+
return jsonify(response_data), 400
|
222 |
|
223 |
markdown_content = create_markdown_document(files=files)
|
224 |
+
response_data['markdown'] = markdown_content
|
225 |
+
response_data['html'] = markdown.markdown(markdown_content)
|
226 |
+
response_data['filename'] = "uploaded_files_summary.md"
|
227 |
else:
|
228 |
repo_url = request.json.get('repo_url')
|
229 |
if not repo_url:
|
230 |
+
response_data['error'] = 'Please provide a repository URL or upload files'
|
231 |
+
return jsonify(response_data), 400
|
232 |
|
233 |
markdown_content = create_markdown_document(repo_url)
|
234 |
+
owner, repo, contents, is_hf = get_repo_contents(repo_url)
|
|
|
235 |
if not owner:
|
236 |
+
response_data['error'] = markdown_content # Error message from get_repo_contents
|
237 |
+
return jsonify(response_data), 400
|
238 |
+
|
239 |
+
response_data['markdown'] = markdown_content
|
240 |
+
response_data['html'] = markdown.markdown(markdown_content)
|
241 |
+
response_data['filename'] = f"{owner}_{repo}_summary.md"
|
242 |
|
243 |
+
return jsonify(response_data)
|
|
|
|
|
|
|
|
|
244 |
|
245 |
@app.route('/download', methods=['POST'])
|
246 |
def download():
|