Spaces:
Running
Running
""" | |
Streamlit Universal File-Format Converter | |
---------------------------------------- | |
A Streamlit app for Hugging Face Spaces that **actually converts** file | |
contents across a wide array of formats, leveraging local libraries | |
(no API keys needed): | |
• **Images** via Pillow (JPEG, PNG, GIF, BMP, TIFF, ICO, WebP) | |
• **Text & markup** via pypandoc (MD, HTML, LaTeX, DOCX, PDF, etc.) | |
• **Office docs** via unoconv + LibreOffice headless (PDF, DOCX, PPTX, XLSX) | |
• **Audio/video** via ffmpeg-python (MP3, WAV, MP4, AVI, MKV, MOV, etc.) | |
• **MIME detection** via python-magic | |
Disallowed uploads: `.exe`, `.bin` | |
All outputs are streamed into a ZIP for download. | |
Created 2025-05-22 • v3 | |
""" | |
from __future__ import annotations | |
# Set up a writable Streamlit home BEFORE importing streamlit | |
import os, pathlib | |
os.environ.setdefault("STREAMLIT_HOME", "/tmp/.streamlit") | |
os.environ.setdefault("HOME", "/tmp") | |
pathlib.Path(os.environ["STREAMLIT_HOME"]).mkdir(parents=True, exist_ok=True) | |
import io | |
import zipfile | |
import tempfile | |
import subprocess | |
from datetime import datetime | |
from pathlib import Path | |
import streamlit as st | |
from PIL import Image | |
import pypandoc | |
import ffmpeg | |
import magic # python-magic for mime detection | |
# ----------------------------------------------------------------------------- | |
# Supported extensions | |
# ----------------------------------------------------------------------------- | |
IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".ico", ".webp"} | |
TEXT_EXTS = {".txt", ".md", ".csv", ".json", ".xml", ".html", ".css", ".js"} | |
MEDIA_EXTS = {".mp3", ".wav", ".mp4", ".avi", ".mkv", ".mov"} | |
DOC_EXTS = {".pdf", ".doc", ".docx", ".ppt", ".pptx", ".xls", ".xlsx", ".odt", ".ods"} | |
ALLOWED_TARGET_EXTS = sorted(IMAGE_EXTS | TEXT_EXTS | MEDIA_EXTS | DOC_EXTS) | |
DISALLOWED_SOURCE_EXTS = {".exe", ".bin"} | |
# ----------------------------------------------------------------------------- | |
# UI elements | |
# ----------------------------------------------------------------------------- | |
def sidebar_target_extension() -> str: | |
st.sidebar.header("Settings") | |
query = st.sidebar.text_input("Filter extensions… (optional)") | |
choices = [e for e in ALLOWED_TARGET_EXTS if query.lower() in e] | |
if not choices: | |
st.sidebar.error("No extension matches that filter.") | |
choices = ALLOWED_TARGET_EXTS | |
return st.sidebar.selectbox( | |
"Target extension for **all** files", choices, index=choices.index(".pdf") if ".pdf" in choices else 0 | |
) | |
def uploader(): | |
return st.file_uploader( | |
"Upload files to convert", type=None, accept_multiple_files=True | |
) | |
# ----------------------------------------------------------------------------- | |
# Conversion functions | |
# ----------------------------------------------------------------------------- | |
def convert_image(data: bytes, target_ext: str) -> bytes: | |
img = Image.open(io.BytesIO(data)) | |
buf = io.BytesIO() | |
fmt = {".jpg":"JPEG", ".jpeg":"JPEG", ".png":"PNG", ".gif":"GIF", | |
".bmp":"BMP", ".tiff":"TIFF", ".ico":"ICO", ".webp":"WEBP"}[target_ext] | |
img.save(buf, format=fmt) | |
return buf.getvalue() | |
def convert_text_markup(data: bytes, orig_ext: str, target_ext: str) -> bytes: | |
text = data.decode("utf-8", errors="ignore") | |
return pypandoc.convert_text(text, to=target_ext.lstrip('.'), format=orig_ext.lstrip('.')).encode('utf-8') | |
def convert_office(temp_dir: str, data: bytes, orig_ext: str, target_ext: str) -> bytes: | |
# Use unoconv to convert office files | |
suffix_in = orig_ext | |
suffix_out = target_ext | |
in_path = Path(temp_dir) / f"input{suffix_in}" | |
out_path = Path(temp_dir) / f"output{suffix_out}" | |
in_path.write_bytes(data) | |
subprocess.run(["unoconv", "-f", suffix_out.lstrip('.'), "-o", str(out_path), str(in_path)], check=True) | |
return out_path.read_bytes() | |
def convert_media(data: bytes, target_ext: str) -> bytes: | |
# ffmpeg-python streaming | |
process = ( | |
ffmpeg.input('pipe:0') | |
.output('pipe:1', format=target_ext.lstrip('.')) | |
.run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True) | |
) | |
out, err = process.communicate(data) | |
return out | |
def convert_file(file: st.runtime.uploaded_file_manager.UploadedFile, target_ext: str) -> tuple[bytes, str]: | |
name = Path(file.name) | |
orig_ext = name.suffix.lower() | |
raw = file.read() | |
if orig_ext in DISALLOWED_SOURCE_EXTS: | |
raise ValueError(f"Disallowed: {orig_ext}") | |
mime = magic.from_buffer(raw, mime=True) or '' | |
try: | |
if orig_ext in IMAGE_EXTS and target_ext in IMAGE_EXTS: | |
return convert_image(raw, target_ext), "image converted" | |
if mime.startswith('text/') or orig_ext in TEXT_EXTS: | |
if orig_ext != target_ext: | |
return convert_text_markup(raw, orig_ext, target_ext), "text/markup converted" | |
if orig_ext in DOC_EXTS or target_ext in DOC_EXTS: | |
with tempfile.TemporaryDirectory() as tmp: | |
return convert_office(tmp, raw, orig_ext, target_ext), "office/doc converted" | |
if mime.startswith(('audio/','video/')) or orig_ext in MEDIA_EXTS: | |
if orig_ext != target_ext: | |
return convert_media(raw, target_ext), "media converted" | |
except Exception as e: | |
st.warning(f"⚠️ Conversion failed for {file.name}: {e}. Falling back to rename.") | |
# Fallback: no conversion, just rename | |
return raw, "renamed only" | |
# ----------------------------------------------------------------------------- | |
# ZIP packaging | |
# ----------------------------------------------------------------------------- | |
def package_zip(files: list[st.runtime.uploaded_file_manager.UploadedFile], target_ext: str) -> io.BytesIO: | |
buf = io.BytesIO() | |
with zipfile.ZipFile(buf, 'w', zipfile.ZIP_DEFLATED) as zf: | |
for file in files: | |
name = Path(file.name) | |
if name.suffix.lower() in DISALLOWED_SOURCE_EXTS: | |
st.warning(f"Skipping disallowed file: {name.name}") | |
continue | |
data, note = convert_file(file, target_ext) | |
out_name = name.with_suffix(target_ext).name | |
zf.writestr(out_name, data) | |
st.success(f"{note}: {name.name} → {out_name}") | |
buf.seek(0) | |
return buf | |
# ----------------------------------------------------------------------------- | |
# Main | |
# ----------------------------------------------------------------------------- | |
def main(): | |
st.set_page_config("Universal Converter", page_icon="🔄", layout="centered") | |
st.title("🔄 Universal File-Format Converter") | |
st.write("Upload files of any format; choose a new extension; download a ZIP of converted files.") | |
target_ext = sidebar_target_extension() | |
files = uploader() | |
if files and st.button("Convert & Download 🚀"): | |
zip_buf = package_zip(files, target_ext) | |
ts = datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') | |
st.download_button("⬇️ Download ZIP", zip_buf, | |
file_name=f"converted_{ts}.zip", | |
mime='application/zip') | |
st.caption("© 2025 Universal Converter • Streamlit • Hugging Face Spaces") | |
if __name__ == '__main__': | |
main() | |