""" Streamlit Universal File-Format Converter ---------------------------------------- A Streamlit app for Hugging Face Spaces that **actually converts** file contents across a wide array of formats, leveraging local libraries (no API keys needed): • **Images** via Pillow (JPEG, PNG, GIF, BMP, TIFF, ICO, WebP) • **Text & markup** via pypandoc (MD, HTML, LaTeX, DOCX, PDF, etc.) • **Office docs** via unoconv + LibreOffice headless (PDF, DOCX, PPTX, XLSX) • **Audio/video** via ffmpeg-python (MP3, WAV, MP4, AVI, MKV, MOV, etc.) • **MIME detection** via python-magic Disallowed uploads: `.exe`, `.bin` All outputs are streamed into a ZIP for download. Created 2025-05-22 • v3 """ from __future__ import annotations # Set up a writable Streamlit home BEFORE importing streamlit import os, pathlib os.environ.setdefault("STREAMLIT_HOME", "/tmp/.streamlit") os.environ.setdefault("HOME", "/tmp") pathlib.Path(os.environ["STREAMLIT_HOME"]).mkdir(parents=True, exist_ok=True) import io import zipfile import tempfile import subprocess from datetime import datetime from pathlib import Path import streamlit as st from PIL import Image import pypandoc import ffmpeg import magic # python-magic for mime detection # ----------------------------------------------------------------------------- # Supported extensions # ----------------------------------------------------------------------------- IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".ico", ".webp"} TEXT_EXTS = {".txt", ".md", ".csv", ".json", ".xml", ".html", ".css", ".js"} MEDIA_EXTS = {".mp3", ".wav", ".mp4", ".avi", ".mkv", ".mov"} DOC_EXTS = {".pdf", ".doc", ".docx", ".ppt", ".pptx", ".xls", ".xlsx", ".odt", ".ods"} ALLOWED_TARGET_EXTS = sorted(IMAGE_EXTS | TEXT_EXTS | MEDIA_EXTS | DOC_EXTS) DISALLOWED_SOURCE_EXTS = {".exe", ".bin"} # ----------------------------------------------------------------------------- # UI elements # ----------------------------------------------------------------------------- def sidebar_target_extension() -> str: st.sidebar.header("Settings") query = st.sidebar.text_input("Filter extensions… (optional)") choices = [e for e in ALLOWED_TARGET_EXTS if query.lower() in e] if not choices: st.sidebar.error("No extension matches that filter.") choices = ALLOWED_TARGET_EXTS return st.sidebar.selectbox( "Target extension for **all** files", choices, index=choices.index(".pdf") if ".pdf" in choices else 0 ) def uploader(): return st.file_uploader( "Upload files to convert", type=None, accept_multiple_files=True ) # ----------------------------------------------------------------------------- # Conversion functions # ----------------------------------------------------------------------------- def convert_image(data: bytes, target_ext: str) -> bytes: img = Image.open(io.BytesIO(data)) buf = io.BytesIO() fmt = {".jpg":"JPEG", ".jpeg":"JPEG", ".png":"PNG", ".gif":"GIF", ".bmp":"BMP", ".tiff":"TIFF", ".ico":"ICO", ".webp":"WEBP"}[target_ext] img.save(buf, format=fmt) return buf.getvalue() def convert_text_markup(data: bytes, orig_ext: str, target_ext: str) -> bytes: text = data.decode("utf-8", errors="ignore") return pypandoc.convert_text(text, to=target_ext.lstrip('.'), format=orig_ext.lstrip('.')).encode('utf-8') def convert_office(temp_dir: str, data: bytes, orig_ext: str, target_ext: str) -> bytes: # Use unoconv to convert office files suffix_in = orig_ext suffix_out = target_ext in_path = Path(temp_dir) / f"input{suffix_in}" out_path = Path(temp_dir) / f"output{suffix_out}" in_path.write_bytes(data) subprocess.run(["unoconv", "-f", suffix_out.lstrip('.'), "-o", str(out_path), str(in_path)], check=True) return out_path.read_bytes() def convert_media(data: bytes, target_ext: str) -> bytes: # ffmpeg-python streaming process = ( ffmpeg.input('pipe:0') .output('pipe:1', format=target_ext.lstrip('.')) .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True) ) out, err = process.communicate(data) return out def convert_file(file: st.runtime.uploaded_file_manager.UploadedFile, target_ext: str) -> tuple[bytes, str]: name = Path(file.name) orig_ext = name.suffix.lower() raw = file.read() if orig_ext in DISALLOWED_SOURCE_EXTS: raise ValueError(f"Disallowed: {orig_ext}") mime = magic.from_buffer(raw, mime=True) or '' try: if orig_ext in IMAGE_EXTS and target_ext in IMAGE_EXTS: return convert_image(raw, target_ext), "image converted" if mime.startswith('text/') or orig_ext in TEXT_EXTS: if orig_ext != target_ext: return convert_text_markup(raw, orig_ext, target_ext), "text/markup converted" if orig_ext in DOC_EXTS or target_ext in DOC_EXTS: with tempfile.TemporaryDirectory() as tmp: return convert_office(tmp, raw, orig_ext, target_ext), "office/doc converted" if mime.startswith(('audio/','video/')) or orig_ext in MEDIA_EXTS: if orig_ext != target_ext: return convert_media(raw, target_ext), "media converted" except Exception as e: st.warning(f"⚠️ Conversion failed for {file.name}: {e}. Falling back to rename.") # Fallback: no conversion, just rename return raw, "renamed only" # ----------------------------------------------------------------------------- # ZIP packaging # ----------------------------------------------------------------------------- def package_zip(files: list[st.runtime.uploaded_file_manager.UploadedFile], target_ext: str) -> io.BytesIO: buf = io.BytesIO() with zipfile.ZipFile(buf, 'w', zipfile.ZIP_DEFLATED) as zf: for file in files: name = Path(file.name) if name.suffix.lower() in DISALLOWED_SOURCE_EXTS: st.warning(f"Skipping disallowed file: {name.name}") continue data, note = convert_file(file, target_ext) out_name = name.with_suffix(target_ext).name zf.writestr(out_name, data) st.success(f"{note}: {name.name} → {out_name}") buf.seek(0) return buf # ----------------------------------------------------------------------------- # Main # ----------------------------------------------------------------------------- def main(): st.set_page_config("Universal Converter", page_icon="🔄", layout="centered") st.title("🔄 Universal File-Format Converter") st.write("Upload files of any format; choose a new extension; download a ZIP of converted files.") target_ext = sidebar_target_extension() files = uploader() if files and st.button("Convert & Download 🚀"): zip_buf = package_zip(files, target_ext) ts = datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') st.download_button("⬇️ Download ZIP", zip_buf, file_name=f"converted_{ts}.zip", mime='application/zip') st.caption("© 2025 Universal Converter • Streamlit • Hugging Face Spaces") if __name__ == '__main__': main()