File size: 7,234 Bytes
29a56dd
0c835d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f71c49
29a56dd
 
0c835d8
428bf45
 
 
 
 
7f71c49
d0d9535
0c835d8
 
29a56dd
d0d9535
7de3bfe
7f71c49
0c835d8
 
 
 
428bf45
 
0c835d8
428bf45
0c835d8
 
428bf45
0c835d8
428bf45
0c835d8
428bf45
 
 
0c835d8
428bf45
 
29a56dd
428bf45
0c835d8
 
428bf45
0c835d8
 
 
29a56dd
 
428bf45
29a56dd
0c835d8
de80961
 
428bf45
0c835d8
428bf45
 
 
 
0c835d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428bf45
 
 
0c835d8
 
428bf45
 
0c835d8
 
 
 
 
428bf45
 
 
0c835d8
 
 
 
 
 
 
 
 
 
 
 
 
428bf45
 
 
0c835d8
428bf45
 
 
0c835d8
428bf45
0c835d8
 
 
29a56dd
428bf45
0c835d8
 
 
428bf45
 
 
 
0c835d8
428bf45
 
 
0c835d8
 
 
428bf45
 
 
 
0c835d8
428bf45
0c835d8
 
 
 
428bf45
0c835d8
29a56dd
0c835d8
29a56dd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
"""
Streamlit Universal File-Format Converter
----------------------------------------
A Streamlit app for Hugging Face Spaces that **actually converts** file
contents across a wide array of formats, leveraging local libraries
(no API keys needed):

• **Images** via Pillow (JPEG, PNG, GIF, BMP, TIFF, ICO, WebP)
• **Text & markup** via pypandoc (MD, HTML, LaTeX, DOCX, PDF, etc.)
• **Office docs** via unoconv + LibreOffice headless (PDF, DOCX, PPTX, XLSX)
• **Audio/video** via ffmpeg-python (MP3, WAV, MP4, AVI, MKV, MOV, etc.)
• **MIME detection** via python-magic

Disallowed uploads: `.exe`, `.bin`
All outputs are streamed into a ZIP for download.

Created 2025-05-22 • v3
"""
from __future__ import annotations

# Set up a writable Streamlit home BEFORE importing streamlit
import os, pathlib
os.environ.setdefault("STREAMLIT_HOME", "/tmp/.streamlit")
os.environ.setdefault("HOME", "/tmp")
pathlib.Path(os.environ["STREAMLIT_HOME"]).mkdir(parents=True, exist_ok=True)

import io
import zipfile
import tempfile
import subprocess
from datetime import datetime
from pathlib import Path

import streamlit as st
from PIL import Image
import pypandoc
import ffmpeg
import magic  # python-magic for mime detection

# -----------------------------------------------------------------------------
# Supported extensions
# -----------------------------------------------------------------------------
IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".ico", ".webp"}
TEXT_EXTS = {".txt", ".md", ".csv", ".json", ".xml", ".html", ".css", ".js"}
MEDIA_EXTS = {".mp3", ".wav", ".mp4", ".avi", ".mkv", ".mov"}
DOC_EXTS   = {".pdf", ".doc", ".docx", ".ppt", ".pptx", ".xls", ".xlsx", ".odt", ".ods"}

ALLOWED_TARGET_EXTS = sorted(IMAGE_EXTS | TEXT_EXTS | MEDIA_EXTS | DOC_EXTS)
DISALLOWED_SOURCE_EXTS = {".exe", ".bin"}

# -----------------------------------------------------------------------------
# UI elements
# -----------------------------------------------------------------------------
def sidebar_target_extension() -> str:
    st.sidebar.header("Settings")
    query = st.sidebar.text_input("Filter extensions… (optional)")
    choices = [e for e in ALLOWED_TARGET_EXTS if query.lower() in e]
    if not choices:
        st.sidebar.error("No extension matches that filter.")
        choices = ALLOWED_TARGET_EXTS
    return st.sidebar.selectbox(
        "Target extension for **all** files", choices, index=choices.index(".pdf") if ".pdf" in choices else 0
    )

def uploader():
    return st.file_uploader(
        "Upload files to convert", type=None, accept_multiple_files=True
    )

# -----------------------------------------------------------------------------
# Conversion functions
# -----------------------------------------------------------------------------
def convert_image(data: bytes, target_ext: str) -> bytes:
    img = Image.open(io.BytesIO(data))
    buf = io.BytesIO()
    fmt = {".jpg":"JPEG", ".jpeg":"JPEG", ".png":"PNG", ".gif":"GIF",
           ".bmp":"BMP", ".tiff":"TIFF", ".ico":"ICO", ".webp":"WEBP"}[target_ext]
    img.save(buf, format=fmt)
    return buf.getvalue()


def convert_text_markup(data: bytes, orig_ext: str, target_ext: str) -> bytes:
    text = data.decode("utf-8", errors="ignore")
    return pypandoc.convert_text(text, to=target_ext.lstrip('.'), format=orig_ext.lstrip('.')).encode('utf-8')


def convert_office(temp_dir: str, data: bytes, orig_ext: str, target_ext: str) -> bytes:
    # Use unoconv to convert office files
    suffix_in = orig_ext
    suffix_out = target_ext
    in_path = Path(temp_dir) / f"input{suffix_in}"
    out_path = Path(temp_dir) / f"output{suffix_out}"
    in_path.write_bytes(data)
    subprocess.run(["unoconv", "-f", suffix_out.lstrip('.'), "-o", str(out_path), str(in_path)], check=True)
    return out_path.read_bytes()


def convert_media(data: bytes, target_ext: str) -> bytes:
    # ffmpeg-python streaming
    process = (
        ffmpeg.input('pipe:0')
              .output('pipe:1', format=target_ext.lstrip('.'))
              .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True)
    )
    out, err = process.communicate(data)
    return out


def convert_file(file: st.runtime.uploaded_file_manager.UploadedFile, target_ext: str) -> tuple[bytes, str]:
    name = Path(file.name)
    orig_ext = name.suffix.lower()
    raw = file.read()

    if orig_ext in DISALLOWED_SOURCE_EXTS:
        raise ValueError(f"Disallowed: {orig_ext}")

    mime = magic.from_buffer(raw, mime=True) or ''

    try:
        if orig_ext in IMAGE_EXTS and target_ext in IMAGE_EXTS:
            return convert_image(raw, target_ext), "image converted"
        if mime.startswith('text/') or orig_ext in TEXT_EXTS:
            if orig_ext != target_ext:
                return convert_text_markup(raw, orig_ext, target_ext), "text/markup converted"
        if orig_ext in DOC_EXTS or target_ext in DOC_EXTS:
            with tempfile.TemporaryDirectory() as tmp:
                return convert_office(tmp, raw, orig_ext, target_ext), "office/doc converted"
        if mime.startswith(('audio/','video/')) or orig_ext in MEDIA_EXTS:
            if orig_ext != target_ext:
                return convert_media(raw, target_ext), "media converted"
    except Exception as e:
        st.warning(f"⚠️ Conversion failed for {file.name}: {e}. Falling back to rename.")

    # Fallback: no conversion, just rename
    return raw, "renamed only"

# -----------------------------------------------------------------------------
# ZIP packaging
# -----------------------------------------------------------------------------
def package_zip(files: list[st.runtime.uploaded_file_manager.UploadedFile], target_ext: str) -> io.BytesIO:
    buf = io.BytesIO()
    with zipfile.ZipFile(buf, 'w', zipfile.ZIP_DEFLATED) as zf:
        for file in files:
            name = Path(file.name)
            if name.suffix.lower() in DISALLOWED_SOURCE_EXTS:
                st.warning(f"Skipping disallowed file: {name.name}")
                continue
            data, note = convert_file(file, target_ext)
            out_name = name.with_suffix(target_ext).name
            zf.writestr(out_name, data)
            st.success(f"{note}: {name.name}{out_name}")
    buf.seek(0)
    return buf

# -----------------------------------------------------------------------------
# Main
# -----------------------------------------------------------------------------

def main():
    st.set_page_config("Universal Converter", page_icon="🔄", layout="centered")
    st.title("🔄 Universal File-Format Converter")
    st.write("Upload files of any format; choose a new extension; download a ZIP of converted files.")

    target_ext = sidebar_target_extension()
    files = uploader()

    if files and st.button("Convert & Download 🚀"):
        zip_buf = package_zip(files, target_ext)
        ts = datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
        st.download_button("⬇️ Download ZIP", zip_buf,
                            file_name=f"converted_{ts}.zip",
                            mime='application/zip')

    st.caption("© 2025 Universal Converter • Streamlit • Hugging Face Spaces")

if __name__ == '__main__':
    main()