Spaces:

broadfield-dev
/

repo_to_md

Running

App Files Files Community

broadfield-dev commited on Feb 25

Commit

b42fffe

verified ·

1 Parent(s): 540d342

Update app.py

Browse files

Files changed (1) hide show

app.py +221 -75

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from flask import Flask, render_template, request, jsonify
 import requests
 import base64
 import markdown
@@ -6,70 +6,132 @@ from bs4 import BeautifulSoup
 import os
 import mimetypes
 import json
 app = Flask(__name__)
 GITHUB_API = "https://api.github.com/repos/"
-def get_repo_contents(repo_url):
-    """Extract contents from GitHub repo URL"""
     try:
-        parts = repo_url.rstrip('/').split('/')
-        owner, repo = parts[-2], parts[-1]
-        api_url = f"{GITHUB_API}{owner}/{repo}/contents"
-        response = requests.get(api_url)
-        response.raise_for_status()
-        return owner, repo, response.json()
     except Exception as e:
-        return None, None, str(e)
-def process_file_content(file_info, owner, repo):
     """Process individual file content"""
     content = ""
-    file_path = file_info['path']
-    if file_info['type'] == 'file':
-        file_url = f"{GITHUB_API}{owner}/{repo}/contents/{file_path}"
         file_response = requests.get(file_url)
-        file_data = file_response.json()
-        if 'content' in file_data:
-            file_extension = file_path.split('.')[-1] if '.' in file_path else ''
-            mime_type, _ = mimetypes.guess_type(file_path)
-            is_text = (mime_type and mime_type.startswith('text')) or file_extension in ['py', 'md', 'txt', 'js', 'html', 'css', 'json']
-            if is_text:
                 try:
-                    decoded_content = base64.b64decode(file_data['content']).decode('utf-8')
-                    # Special handling for JSON files
-                    if file_extension == 'json':
-                        try:
-                            json_data = json.loads(decoded_content)
-                            formatted_json = json.dumps(json_data, indent=2)
-                            content = f"### File: {file_path}\n```json\n{formatted_json}\n```\n\n"
-                        except json.JSONDecodeError:
-                            content = f"### File: {file_path}\n```json\n{decoded_content}\n```\n[Note: Invalid JSON format]\n\n"
-                    else:
-                        content = f"### File: {file_path}\n```{(file_extension if file_extension else 'text')}\n{decoded_content}\n```\n\n"
-                except UnicodeDecodeError:
-                    content = f"### File: {file_path}\n[Text content could not be decoded - possibly corrupted or non-UTF-8 text]\n\n"
             else:
-                content = f"### File: {file_path}\n[Binary file - {file_data['size']} bytes]\n\n"
     return content
-def create_markdown_document(repo_url):
-    """Create markdown document from repo contents"""
-    owner, repo, contents = get_repo_contents(repo_url)
-    if isinstance(contents, str):
-        return f"Error: {contents}"
-    markdown_content = f"# Repository: {owner}/{repo}\n\n"
-    markdown_content += "Below are the contents of all files in the repository:\n\n"
-    for item in contents:
-        markdown_content += process_file_content(item, owner, repo)
     return markdown_content
@@ -78,24 +140,52 @@ def index():
     return render_template('index.html')
 @app.route('/process', methods=['POST'])
-def process_repo():
-    repo_url = request.json.get('repo_url')
-    if not repo_url:
-        return jsonify({'error': 'Please provide a repository URL'}), 400
-    markdown_content = create_markdown_document(repo_url)
-    html_content = markdown.markdown(markdown_content)
     return jsonify({
         'markdown': markdown_content,
-        'html': html_content
     })
 html_template = """
 <!DOCTYPE html>
 <html>
 <head>
-    <title>GitHub Repo to Markdown</title>
     <style>
         body {
             font-family: Arial, sans-serif;
@@ -118,6 +208,7 @@ html_template = """
             color: white;
             border: none;
             cursor: pointer;
         }
         button:hover {
             background-color: #45a049;
@@ -146,10 +237,15 @@ html_template = """
 </head>
 <body>
     <div class="container">
-        <h1>GitHub Repository to Markdown Converter</h1>
-        <p>Enter a GitHub repository URL (e.g., https://github.com/username/repository)</p>
-        <input type="text" id="repoUrl" style="width: 100%; padding: 8px;" placeholder="Enter GitHub repository URL">
-        <button onclick="processRepo()">Convert to Markdown</button>
         <div id="spinner" class="spinner"></div>
         <h2>Markdown Output:</h2>
@@ -158,41 +254,91 @@ html_template = """
         <h2>Preview:</h2>
         <div id="output"></div>
     </div>
     <script>
         async function processRepo() {
             const repoUrl = document.getElementById('repoUrl').value;
             const spinner = document.getElementById('spinner');
-            const button = document.querySelector('button');
-            // Show spinner, disable button
             spinner.style.display = 'block';
-            button.disabled = true;
             try {
-                const response = await fetch('/process', {
                     method: 'POST',
-                    headers: {
-                        'Content-Type': 'application/json',
-                    },
-                    body: JSON.stringify({ repo_url: repoUrl })
-                });
-                const data = await response.json();
-                if (data.error) {
-                    alert(data.error);
                     return;
                 }
-                document.getElementById('markdownOutput').value = data.markdown;
-                document.getElementById('output').innerHTML = data.html;
             } catch (error) {
                 alert('An error occurred: ' + error.message);
             } finally {
-                // Hide spinner, enable button
                 spinner.style.display = 'none';
-                button.disabled = false;
             }
         }
     </script>

+from flask import Flask, render_template, request, jsonify, send_file
 import requests
 import base64
 import markdown
 import os
 import mimetypes
 import json
+from io import BytesIO
+from pathlib import Path
 app = Flask(__name__)
 GITHUB_API = "https://api.github.com/repos/"
+HF_API = "https://huggingface.co/api/spaces/"
+def generate_file_tree(contents, is_hf=False):
+    """Generate a file structure tree"""
+    tree = ["📁 Root"]
+    paths = sorted([item['path'] for item in contents if isinstance(item, dict) and 'path' in item])
+    for path in paths:
+        parts = path.split('/')
+        indent = "  " * (len(parts) - 1)
+        tree.append(f"{indent}📄 {parts[-1]}")
+    return "\n".join(tree) + "\n\n"
+def get_repo_contents(url):
+    """Extract contents from GitHub or Hugging Face URL"""
     try:
+        if "huggingface.co" in url:
+            parts = url.rstrip('/').split('/')
+            owner, repo = parts[-2], parts[-1]
+            api_url = f"{HF_API}{owner}/{repo}/files"
+            response = requests.get(api_url)
+            response.raise_for_status()
+            return owner, repo, response.json()["files"], True
+        else:
+            parts = url.rstrip('/').split('/')
+            owner, repo = parts[-2], parts[-1]
+            api_url = f"{GITHUB_API}{owner}/{repo}/contents"
+            response = requests.get(api_url)
+            response.raise_for_status()
+            return owner, repo, response.json(), False
     except Exception as e:
+        return None, None, str(e), False
+def process_file_content(file_info, owner, repo, is_hf=False):
     """Process individual file content"""
     content = ""
+    file_path = file_info['path'] if not is_hf else file_info
+    if isinstance(file_info, dict) and 'type' in file_info and file_info['type'] == 'file' or isinstance(file_info, str):
+        if is_hf:
+            file_url = f"https://huggingface.co/spaces/{owner}/{repo}/raw/main/{file_path}"
+        else:
+            file_url = f"{GITHUB_API}{owner}/{repo}/contents/{file_path}"
         file_response = requests.get(file_url)
+        if is_hf:
+            content_raw = file_response.text
+        else:
+            file_data = file_response.json()
+            if 'content' not in file_data:
+                return f"### File: {file_path}\n[No content available]\n\n"
+            content_raw = base64.b64decode(file_data['content']).decode('utf-8', errors='ignore')
+        file_extension = file_path.split('.')[-1] if '.' in file_path else ''
+        mime_type, _ = mimetypes.guess_type(file_path)
+        is_text = (mime_type and mime_type.startswith('text')) or file_extension in ['py', 'md', 'txt', 'js', 'html', 'css', 'json']
+        if is_text:
+            if file_extension == 'json':
                 try:
+                    json_data = json.loads(content_raw)
+                    formatted_json = json.dumps(json_data, indent=2)
+                    content = f"### File: {file_path}\n```json\n{formatted_json}\n```\n\n"
+                except json.JSONDecodeError:
+                    content = f"### File: {file_path}\n```json\n{content_raw}\n```\n[Note: Invalid JSON format]\n\n"
             else:
+                content = f"### File: {file_path}\n```{(file_extension if file_extension else 'text')}\n{content_raw}\n```\n\n"
+        else:
+            content = f"### File: {file_path}\n[Binary file]\n\n"
     return content
+def process_uploaded_file(file):
+    """Process uploaded file content"""
+    content = ""
+    filename = file.filename
+    file_extension = filename.split('.')[-1] if '.' in filename else ''
+    try:
+        content_raw = file.read().decode('utf-8', errors='ignore')
+        if file_extension == 'json':
+            try:
+                json_data = json.loads(content_raw)
+                formatted_json = json.dumps(json_data, indent=2)
+                content = f"### File: {filename}\n```json\n{formatted_json}\n```\n\n"
+            except json.JSONDecodeError:
+                content = f"### File: {filename}\n```json\n{content_raw}\n```\n[Note: Invalid JSON format]\n\n"
+        else:
+            content = f"### File: {filename}\n```{(file_extension if file_extension else 'text')}\n{content_raw}\n```\n\n"
+    except Exception as e:
+        content = f"### File: {filename}\n[Error processing file: {str(e)}]\n\n"
+    return content
+def create_markdown_document(url=None, files=None):
+    """Create markdown document from repo contents or uploaded files"""
+    if url:
+        owner, repo, contents, is_hf = get_repo_contents(url)
+        if isinstance(contents, str):
+            return f"Error: {contents}"
+        markdown_content = f"# {'Space' if is_hf else 'Repository'}: {owner}/{repo}\n\n"
+        markdown_content += "## File Structure\n```\n"
+        markdown_content += generate_file_tree(contents, is_hf)
+        markdown_content += "```\n\n"
+        markdown_content += f"Below are the contents of all files in the {'space' if is_hf else 'repository'}:\n\n"
+        for item in contents:
+            markdown_content += process_file_content(item, owner, repo, is_hf)
+    else:
+        markdown_content = "# Uploaded Files\n\n"
+        markdown_content += "## File Structure\n```\n"
+        markdown_content += "📁 Uploads\n" + "\n".join([f"  📄 {file.filename}" for file in files]) + "\n"
+        markdown_content += "```\n\n"
+        markdown_content += "Below are the contents of all uploaded files:\n\n"
+        for file in files:
+            markdown_content += process_uploaded_file(file)
     return markdown_content
     return render_template('index.html')
 @app.route('/process', methods=['POST'])
+def process():
+    if 'files[]' in request.files:
+        files = request.files.getlist('files[]')
+        if not files:
+            return jsonify({'error': 'No files uploaded'}), 400
+        markdown_content = create_markdown_document(files=files)
+        html_content = markdown.markdown(markdown_content)
+        filename = "uploaded_files_summary.md"
+    else:
+        repo_url = request.json.get('repo_url')
+        if not repo_url:
+            return jsonify({'error': 'Please provide a repository URL or upload files'}), 400
+        markdown_content = create_markdown_document(repo_url)
+        html_content = markdown.markdown(markdown_content)
+        owner, repo, _, is_hf = get_repo_contents(repo_url)
+        filename = f"{owner}_{repo}_summary.md"
     return jsonify({
         'markdown': markdown_content,
+        'html': html_content,
+        'filename': filename
     })
+@app.route('/download', methods=['POST'])
+def download():
+    markdown_content = request.json.get('markdown')
+    filename = request.json.get('filename')
+    buffer = BytesIO()
+    buffer.write(markdown_content.encode('utf-8'))
+    buffer.seek(0)
+    return send_file(
+        buffer,
+        as_attachment=True,
+        download_name=filename,
+        mimetype='text/markdown'
+    )
 html_template = """
 <!DOCTYPE html>
 <html>
 <head>
+    <title>Repo & Files to Markdown</title>
     <style>
         body {
             font-family: Arial, sans-serif;
             color: white;
             border: none;
             cursor: pointer;
+            margin: 5px;
         }
         button:hover {
             background-color: #45a049;
 </head>
 <body>
     <div class="container">
+        <h1>Repository & Files to Markdown Converter</h1>
+        <p>Enter a GitHub/Hugging Face Space URL or upload files</p>
+        <input type="text" id="repoUrl" style="width: 100%; padding: 8px;" placeholder="Enter GitHub or Hugging Face Space URL">
+        <p>OR</p>
+        <input type="file" id="fileInput" multiple style="margin: 10px 0;">
+        <br>
+        <button onclick="processRepo()">Convert URL</button>
+        <button onclick="processFiles()">Convert Files</button>
+        <button id="downloadBtn" style="display: none;" onclick="downloadMarkdown()">Download .md</button>
         <div id="spinner" class="spinner"></div>
         <h2>Markdown Output:</h2>
         <h2>Preview:</h2>
         <div id="output"></div>
     </div>
     <script>
+        let currentMarkdown = '';
+        let currentFilename = '';
         async function processRepo() {
             const repoUrl = document.getElementById('repoUrl').value;
+            await processContent('/process', { repo_url: repoUrl });
+        }
+        async function processFiles() {
+            const files = document.getElementById('fileInput').files;
+            if (files.length === 0) {
+                alert('Please select at least one file');
+                return;
+            }
+            const formData = new FormData();
+            for (let file of files) {
+                formData.append('files[]', file);
+            }
+            await processContent('/process', formData, false);
+        }
+        async function processContent(url, data, isJson = true) {
             const spinner = document.getElementById('spinner');
+            const buttons = document.querySelectorAll('button');
             spinner.style.display = 'block';
+            buttons.forEach(btn => btn.disabled = true);
             try {
+                const options = {
                     method: 'POST',
+                    ...(isJson ? {
+                        headers: { 'Content-Type': 'application/json' },
+                        body: JSON.stringify(data)
+                    } : { body: data })
+                };
+                const response = await fetch(url, options);
+                const result = await response.json();
+                if (result.error) {
+                    alert(result.error);
                     return;
                 }
+                currentMarkdown = result.markdown;
+                currentFilename = result.filename;
+                document.getElementById('markdownOutput').value = result.markdown;
+                document.getElementById('output').innerHTML = result.html;
+                document.getElementById('downloadBtn').style.display = 'inline-block';
             } catch (error) {
                 alert('An error occurred: ' + error.message);
             } finally {
                 spinner.style.display = 'none';
+                buttons.forEach(btn => btn.disabled = false);
+            }
+        }
+        async function downloadMarkdown() {
+            try {
+                const response = await fetch('/download', {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json',
+                    },
+                    body: JSON.stringify({
+                        markdown: currentMarkdown,
+                        filename: currentFilename
+                    })
+                });
+                const blob = await response.blob();
+                const url = window.URL.createObjectURL(blob);
+                const a = document.createElement('a');
+                a.href = url;
+                a.download = currentFilename;
+                document.body.appendChild(a);
+                a.click();
+                a.remove();
+                window.URL.revokeObjectURL(url);
+            } catch (error) {
+                alert('Error downloading file: ' + error.message);
             }
         }
     </script>