Spaces:
Running
Running
File size: 7,234 Bytes
29a56dd 0c835d8 7f71c49 29a56dd 0c835d8 428bf45 7f71c49 d0d9535 0c835d8 29a56dd d0d9535 7de3bfe 7f71c49 0c835d8 428bf45 0c835d8 428bf45 0c835d8 428bf45 0c835d8 428bf45 0c835d8 428bf45 0c835d8 428bf45 29a56dd 428bf45 0c835d8 428bf45 0c835d8 29a56dd 428bf45 29a56dd 0c835d8 de80961 428bf45 0c835d8 428bf45 0c835d8 428bf45 0c835d8 428bf45 0c835d8 428bf45 0c835d8 428bf45 0c835d8 428bf45 0c835d8 428bf45 0c835d8 29a56dd 428bf45 0c835d8 428bf45 0c835d8 428bf45 0c835d8 428bf45 0c835d8 428bf45 0c835d8 428bf45 0c835d8 29a56dd 0c835d8 29a56dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
"""
Streamlit Universal File-Format Converter
----------------------------------------
A Streamlit app for Hugging Face Spaces that **actually converts** file
contents across a wide array of formats, leveraging local libraries
(no API keys needed):
• **Images** via Pillow (JPEG, PNG, GIF, BMP, TIFF, ICO, WebP)
• **Text & markup** via pypandoc (MD, HTML, LaTeX, DOCX, PDF, etc.)
• **Office docs** via unoconv + LibreOffice headless (PDF, DOCX, PPTX, XLSX)
• **Audio/video** via ffmpeg-python (MP3, WAV, MP4, AVI, MKV, MOV, etc.)
• **MIME detection** via python-magic
Disallowed uploads: `.exe`, `.bin`
All outputs are streamed into a ZIP for download.
Created 2025-05-22 • v3
"""
from __future__ import annotations
# Set up a writable Streamlit home BEFORE importing streamlit
import os, pathlib
os.environ.setdefault("STREAMLIT_HOME", "/tmp/.streamlit")
os.environ.setdefault("HOME", "/tmp")
pathlib.Path(os.environ["STREAMLIT_HOME"]).mkdir(parents=True, exist_ok=True)
import io
import zipfile
import tempfile
import subprocess
from datetime import datetime
from pathlib import Path
import streamlit as st
from PIL import Image
import pypandoc
import ffmpeg
import magic # python-magic for mime detection
# -----------------------------------------------------------------------------
# Supported extensions
# -----------------------------------------------------------------------------
IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".ico", ".webp"}
TEXT_EXTS = {".txt", ".md", ".csv", ".json", ".xml", ".html", ".css", ".js"}
MEDIA_EXTS = {".mp3", ".wav", ".mp4", ".avi", ".mkv", ".mov"}
DOC_EXTS = {".pdf", ".doc", ".docx", ".ppt", ".pptx", ".xls", ".xlsx", ".odt", ".ods"}
ALLOWED_TARGET_EXTS = sorted(IMAGE_EXTS | TEXT_EXTS | MEDIA_EXTS | DOC_EXTS)
DISALLOWED_SOURCE_EXTS = {".exe", ".bin"}
# -----------------------------------------------------------------------------
# UI elements
# -----------------------------------------------------------------------------
def sidebar_target_extension() -> str:
st.sidebar.header("Settings")
query = st.sidebar.text_input("Filter extensions… (optional)")
choices = [e for e in ALLOWED_TARGET_EXTS if query.lower() in e]
if not choices:
st.sidebar.error("No extension matches that filter.")
choices = ALLOWED_TARGET_EXTS
return st.sidebar.selectbox(
"Target extension for **all** files", choices, index=choices.index(".pdf") if ".pdf" in choices else 0
)
def uploader():
return st.file_uploader(
"Upload files to convert", type=None, accept_multiple_files=True
)
# -----------------------------------------------------------------------------
# Conversion functions
# -----------------------------------------------------------------------------
def convert_image(data: bytes, target_ext: str) -> bytes:
img = Image.open(io.BytesIO(data))
buf = io.BytesIO()
fmt = {".jpg":"JPEG", ".jpeg":"JPEG", ".png":"PNG", ".gif":"GIF",
".bmp":"BMP", ".tiff":"TIFF", ".ico":"ICO", ".webp":"WEBP"}[target_ext]
img.save(buf, format=fmt)
return buf.getvalue()
def convert_text_markup(data: bytes, orig_ext: str, target_ext: str) -> bytes:
text = data.decode("utf-8", errors="ignore")
return pypandoc.convert_text(text, to=target_ext.lstrip('.'), format=orig_ext.lstrip('.')).encode('utf-8')
def convert_office(temp_dir: str, data: bytes, orig_ext: str, target_ext: str) -> bytes:
# Use unoconv to convert office files
suffix_in = orig_ext
suffix_out = target_ext
in_path = Path(temp_dir) / f"input{suffix_in}"
out_path = Path(temp_dir) / f"output{suffix_out}"
in_path.write_bytes(data)
subprocess.run(["unoconv", "-f", suffix_out.lstrip('.'), "-o", str(out_path), str(in_path)], check=True)
return out_path.read_bytes()
def convert_media(data: bytes, target_ext: str) -> bytes:
# ffmpeg-python streaming
process = (
ffmpeg.input('pipe:0')
.output('pipe:1', format=target_ext.lstrip('.'))
.run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True)
)
out, err = process.communicate(data)
return out
def convert_file(file: st.runtime.uploaded_file_manager.UploadedFile, target_ext: str) -> tuple[bytes, str]:
name = Path(file.name)
orig_ext = name.suffix.lower()
raw = file.read()
if orig_ext in DISALLOWED_SOURCE_EXTS:
raise ValueError(f"Disallowed: {orig_ext}")
mime = magic.from_buffer(raw, mime=True) or ''
try:
if orig_ext in IMAGE_EXTS and target_ext in IMAGE_EXTS:
return convert_image(raw, target_ext), "image converted"
if mime.startswith('text/') or orig_ext in TEXT_EXTS:
if orig_ext != target_ext:
return convert_text_markup(raw, orig_ext, target_ext), "text/markup converted"
if orig_ext in DOC_EXTS or target_ext in DOC_EXTS:
with tempfile.TemporaryDirectory() as tmp:
return convert_office(tmp, raw, orig_ext, target_ext), "office/doc converted"
if mime.startswith(('audio/','video/')) or orig_ext in MEDIA_EXTS:
if orig_ext != target_ext:
return convert_media(raw, target_ext), "media converted"
except Exception as e:
st.warning(f"⚠️ Conversion failed for {file.name}: {e}. Falling back to rename.")
# Fallback: no conversion, just rename
return raw, "renamed only"
# -----------------------------------------------------------------------------
# ZIP packaging
# -----------------------------------------------------------------------------
def package_zip(files: list[st.runtime.uploaded_file_manager.UploadedFile], target_ext: str) -> io.BytesIO:
buf = io.BytesIO()
with zipfile.ZipFile(buf, 'w', zipfile.ZIP_DEFLATED) as zf:
for file in files:
name = Path(file.name)
if name.suffix.lower() in DISALLOWED_SOURCE_EXTS:
st.warning(f"Skipping disallowed file: {name.name}")
continue
data, note = convert_file(file, target_ext)
out_name = name.with_suffix(target_ext).name
zf.writestr(out_name, data)
st.success(f"{note}: {name.name} → {out_name}")
buf.seek(0)
return buf
# -----------------------------------------------------------------------------
# Main
# -----------------------------------------------------------------------------
def main():
st.set_page_config("Universal Converter", page_icon="🔄", layout="centered")
st.title("🔄 Universal File-Format Converter")
st.write("Upload files of any format; choose a new extension; download a ZIP of converted files.")
target_ext = sidebar_target_extension()
files = uploader()
if files and st.button("Convert & Download 🚀"):
zip_buf = package_zip(files, target_ext)
ts = datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
st.download_button("⬇️ Download ZIP", zip_buf,
file_name=f"converted_{ts}.zip",
mime='application/zip')
st.caption("© 2025 Universal Converter • Streamlit • Hugging Face Spaces")
if __name__ == '__main__':
main()
|