euler314 commited on
Commit
0c835d8
·
verified ·
1 Parent(s): 65be7bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -97
app.py CHANGED
@@ -1,20 +1,24 @@
1
  """
2
- Streamlit Universal FileFormat Changer
3
- --------------------------------------
4
- A Streamlit app ready for Hugging Face Spaces that **actually converts file
5
- contents when possible**, instead of merely renaming extensions.
6
-
7
- * Image ↔ image conversions via **Pillow** (JPEG, PNG, GIF, BMP, TIFF, ICO, WEBP)
8
- * Plain‑text files kept intact but re‑encoded (UTF‑8) when changing among
9
- text‑like extensions (txt, md, csv, json, xml, html, css, js)
10
- * Disallowed uploads: `.exe`, `.bin`
11
- * Everything is bundled into one ZIP download.
12
-
13
- Created 2025‑05‑22 • v2
 
 
 
 
14
  """
15
  from __future__ import annotations
16
 
17
- # NOTE: Set env vars *before* importing Streamlit ------------------------------
18
  import os, pathlib
19
  os.environ.setdefault("STREAMLIT_HOME", "/tmp/.streamlit")
20
  os.environ.setdefault("HOME", "/tmp")
@@ -22,147 +26,152 @@ pathlib.Path(os.environ["STREAMLIT_HOME"]).mkdir(parents=True, exist_ok=True)
22
 
23
  import io
24
  import zipfile
 
 
25
  from datetime import datetime
26
  from pathlib import Path
27
 
28
  import streamlit as st
29
- from PIL import Image # Pillow for real image conversion
 
 
 
30
 
31
  # -----------------------------------------------------------------------------
32
- # Supported extensions ---------------------------------------------------------
33
  # -----------------------------------------------------------------------------
34
- IMAGE_EXTS = {
35
- ".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".ico", ".webp",
36
- }
37
- TEXT_EXTS = {
38
- ".txt", ".md", ".csv", ".json", ".xml", ".html", ".css", ".js",
39
- }
40
- ARCHIVE_EXTS = {".zip", ".tar", ".gz", ".7z"}
41
  MEDIA_EXTS = {".mp3", ".wav", ".mp4", ".avi", ".mkv", ".mov"}
42
- DOC_EXTS = {".pdf", ".doc", ".docx"}
43
-
44
- ALLOWED_TARGET_EXTS: list[str] = sorted(
45
- IMAGE_EXTS | TEXT_EXTS | ARCHIVE_EXTS | MEDIA_EXTS | DOC_EXTS
46
- )
47
 
 
48
  DISALLOWED_SOURCE_EXTS = {".exe", ".bin"}
49
 
50
  # -----------------------------------------------------------------------------
51
- # Helpers ----------------------------------------------------------------------
52
  # -----------------------------------------------------------------------------
53
-
54
  def sidebar_target_extension() -> str:
55
  st.sidebar.header("Settings")
56
  query = st.sidebar.text_input("Filter extensions… (optional)")
57
- filtered = [e for e in ALLOWED_TARGET_EXTS if query.lower() in e]
58
- if not filtered:
59
  st.sidebar.error("No extension matches that filter.")
60
- target_ext = st.sidebar.selectbox(
61
- "Choose target extension (applied to **all** files)",
62
- filtered or ALLOWED_TARGET_EXTS,
63
- index=(filtered or ALLOWED_TARGET_EXTS).index(".png")
64
- if ".png" in (filtered or ALLOWED_TARGET_EXTS) else 0,
65
- )
66
- st.sidebar.markdown(
67
- "*Images are truly converted. Text files are re‑saved as UTF‑8. "
68
- "Other combinations fall back to a safe rename.*"
69
  )
70
- return target_ext
71
-
72
 
73
  def uploader():
74
  return st.file_uploader(
75
- "Upload any files (multiple allowed)",
76
- accept_multiple_files=True,
77
- type=None, # accept *all* extensions
78
- help="Drag‑and‑drop or click to browse.",
79
  )
80
 
81
-
82
  # -----------------------------------------------------------------------------
83
- # Conversion logic -------------------------------------------------------------
84
  # -----------------------------------------------------------------------------
85
-
86
  def convert_image(data: bytes, target_ext: str) -> bytes:
87
- """Return `bytes` of the image converted to `target_ext`. Raises if Pillow
88
- cannot save in that format."""
89
  img = Image.open(io.BytesIO(data))
90
  buf = io.BytesIO()
91
- # Map certain extensions to Pillow format names
92
- pil_fmt = {
93
- ".jpg": "JPEG", ".jpeg": "JPEG", ".png": "PNG", ".gif": "GIF",
94
- ".bmp": "BMP", ".tiff": "TIFF", ".ico": "ICO", ".webp": "WEBP",
95
- }[target_ext]
96
- img.save(buf, format=pil_fmt)
97
- buf.seek(0)
98
- return buf.read()
99
-
100
-
101
- def convert_text(data: bytes, _target_ext: str) -> bytes:
102
- """Return data re‑encoded as UTF‑8 (no format change)."""
103
- text = data.decode("utf‑8", errors="ignore")
104
- return text.encode("utf‑8")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
 
107
  def convert_file(file: st.runtime.uploaded_file_manager.UploadedFile, target_ext: str) -> tuple[bytes, str]:
108
- """Try to convert and return (bytes, conversion_note). On failure, return
109
- original data with a note that only rename happened."""
110
- orig_ext = Path(file.name).suffix.lower()
111
  raw = file.read()
112
 
 
 
 
 
 
113
  try:
114
  if orig_ext in IMAGE_EXTS and target_ext in IMAGE_EXTS:
115
  return convert_image(raw, target_ext), "image converted"
116
- if orig_ext in TEXT_EXTS and target_ext in TEXT_EXTS:
117
- return convert_text(raw, target_ext), "text re‑encoded"
118
- except Exception as err:
119
- st.warning(f"⚠️ Could not convert **{file.name}**: {err}. Falling back to rename.")
120
-
121
- # Fallback: no conversion
 
 
 
 
 
 
 
122
  return raw, "renamed only"
123
 
124
-
125
  # -----------------------------------------------------------------------------
126
- # Zip packaging ---------------------------------------------------------------
127
  # -----------------------------------------------------------------------------
128
-
129
  def package_zip(files: list[st.runtime.uploaded_file_manager.UploadedFile], target_ext: str) -> io.BytesIO:
130
  buf = io.BytesIO()
131
- with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
132
  for file in files:
133
- orig_path = Path(file.name)
134
- if orig_path.suffix.lower() in DISALLOWED_SOURCE_EXTS:
135
- st.warning(f"⏭️ Skipping disallowed file: **{orig_path.name}**")
136
  continue
137
  data, note = convert_file(file, target_ext)
138
- new_name = orig_path.with_suffix(target_ext).name
139
- zf.writestr(new_name, data)
140
- st.success(f"{note} • **{orig_path.name}****{new_name}**")
141
  buf.seek(0)
142
  return buf
143
 
144
-
145
  # -----------------------------------------------------------------------------
146
- # Main ------------------------------------------------------------------------
147
  # -----------------------------------------------------------------------------
148
 
149
  def main():
150
- st.set_page_config("Universal Format Changer", page_icon="🔄", layout="centered")
151
- st.title("🔄 Universal FileFormat Changer")
152
- st.write("Upload files, pick a target extension, and download a ZIP with the converted files.")
153
 
154
  target_ext = sidebar_target_extension()
155
  files = uploader()
156
 
157
- if files and st.button("🚀 Convert & Download"):
158
  zip_buf = package_zip(files, target_ext)
159
- ts = datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
160
- st.download_button(
161
- "⬇️ Get ZIP", zip_buf, file_name=f"converted_{ts}.zip", mime="application/zip"
162
- )
163
-
164
- st.caption("© 2025 Universal Changer • Streamlit • Hugging Face Spaces")
165
 
 
166
 
167
- if __name__ == "__main__":
168
  main()
 
1
  """
2
+ Streamlit Universal File-Format Converter
3
+ ----------------------------------------
4
+ A Streamlit app for Hugging Face Spaces that **actually converts** file
5
+ contents across a wide array of formats, leveraging local libraries
6
+ (no API keys needed):
7
+
8
+ **Images** via Pillow (JPEG, PNG, GIF, BMP, TIFF, ICO, WebP)
9
+ **Text & markup** via pypandoc (MD, HTML, LaTeX, DOCX, PDF, etc.)
10
+ **Office docs** via unoconv + LibreOffice headless (PDF, DOCX, PPTX, XLSX)
11
+ **Audio/video** via ffmpeg-python (MP3, WAV, MP4, AVI, MKV, MOV, etc.)
12
+ • **MIME detection** via python-magic
13
+
14
+ Disallowed uploads: `.exe`, `.bin`
15
+ All outputs are streamed into a ZIP for download.
16
+
17
+ Created 2025-05-22 • v3
18
  """
19
  from __future__ import annotations
20
 
21
+ # Set up a writable Streamlit home BEFORE importing streamlit
22
  import os, pathlib
23
  os.environ.setdefault("STREAMLIT_HOME", "/tmp/.streamlit")
24
  os.environ.setdefault("HOME", "/tmp")
 
26
 
27
  import io
28
  import zipfile
29
+ import tempfile
30
+ import subprocess
31
  from datetime import datetime
32
  from pathlib import Path
33
 
34
  import streamlit as st
35
+ from PIL import Image
36
+ import pypandoc
37
+ import ffmpeg
38
+ import magic # python-magic for mime detection
39
 
40
  # -----------------------------------------------------------------------------
41
+ # Supported extensions
42
  # -----------------------------------------------------------------------------
43
+ IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".ico", ".webp"}
44
+ TEXT_EXTS = {".txt", ".md", ".csv", ".json", ".xml", ".html", ".css", ".js"}
 
 
 
 
 
45
  MEDIA_EXTS = {".mp3", ".wav", ".mp4", ".avi", ".mkv", ".mov"}
46
+ DOC_EXTS = {".pdf", ".doc", ".docx", ".ppt", ".pptx", ".xls", ".xlsx", ".odt", ".ods"}
 
 
 
 
47
 
48
+ ALLOWED_TARGET_EXTS = sorted(IMAGE_EXTS | TEXT_EXTS | MEDIA_EXTS | DOC_EXTS)
49
  DISALLOWED_SOURCE_EXTS = {".exe", ".bin"}
50
 
51
  # -----------------------------------------------------------------------------
52
+ # UI elements
53
  # -----------------------------------------------------------------------------
 
54
  def sidebar_target_extension() -> str:
55
  st.sidebar.header("Settings")
56
  query = st.sidebar.text_input("Filter extensions… (optional)")
57
+ choices = [e for e in ALLOWED_TARGET_EXTS if query.lower() in e]
58
+ if not choices:
59
  st.sidebar.error("No extension matches that filter.")
60
+ choices = ALLOWED_TARGET_EXTS
61
+ return st.sidebar.selectbox(
62
+ "Target extension for **all** files", choices, index=choices.index(".pdf") if ".pdf" in choices else 0
 
 
 
 
 
 
63
  )
 
 
64
 
65
  def uploader():
66
  return st.file_uploader(
67
+ "Upload files to convert", type=None, accept_multiple_files=True
 
 
 
68
  )
69
 
 
70
  # -----------------------------------------------------------------------------
71
+ # Conversion functions
72
  # -----------------------------------------------------------------------------
 
73
  def convert_image(data: bytes, target_ext: str) -> bytes:
 
 
74
  img = Image.open(io.BytesIO(data))
75
  buf = io.BytesIO()
76
+ fmt = {".jpg":"JPEG", ".jpeg":"JPEG", ".png":"PNG", ".gif":"GIF",
77
+ ".bmp":"BMP", ".tiff":"TIFF", ".ico":"ICO", ".webp":"WEBP"}[target_ext]
78
+ img.save(buf, format=fmt)
79
+ return buf.getvalue()
80
+
81
+
82
+ def convert_text_markup(data: bytes, orig_ext: str, target_ext: str) -> bytes:
83
+ text = data.decode("utf-8", errors="ignore")
84
+ return pypandoc.convert_text(text, to=target_ext.lstrip('.'), format=orig_ext.lstrip('.')).encode('utf-8')
85
+
86
+
87
+ def convert_office(temp_dir: str, data: bytes, orig_ext: str, target_ext: str) -> bytes:
88
+ # Use unoconv to convert office files
89
+ suffix_in = orig_ext
90
+ suffix_out = target_ext
91
+ in_path = Path(temp_dir) / f"input{suffix_in}"
92
+ out_path = Path(temp_dir) / f"output{suffix_out}"
93
+ in_path.write_bytes(data)
94
+ subprocess.run(["unoconv", "-f", suffix_out.lstrip('.'), "-o", str(out_path), str(in_path)], check=True)
95
+ return out_path.read_bytes()
96
+
97
+
98
+ def convert_media(data: bytes, target_ext: str) -> bytes:
99
+ # ffmpeg-python streaming
100
+ process = (
101
+ ffmpeg.input('pipe:0')
102
+ .output('pipe:1', format=target_ext.lstrip('.'))
103
+ .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True)
104
+ )
105
+ out, err = process.communicate(data)
106
+ return out
107
 
108
 
109
  def convert_file(file: st.runtime.uploaded_file_manager.UploadedFile, target_ext: str) -> tuple[bytes, str]:
110
+ name = Path(file.name)
111
+ orig_ext = name.suffix.lower()
 
112
  raw = file.read()
113
 
114
+ if orig_ext in DISALLOWED_SOURCE_EXTS:
115
+ raise ValueError(f"Disallowed: {orig_ext}")
116
+
117
+ mime = magic.from_buffer(raw, mime=True) or ''
118
+
119
  try:
120
  if orig_ext in IMAGE_EXTS and target_ext in IMAGE_EXTS:
121
  return convert_image(raw, target_ext), "image converted"
122
+ if mime.startswith('text/') or orig_ext in TEXT_EXTS:
123
+ if orig_ext != target_ext:
124
+ return convert_text_markup(raw, orig_ext, target_ext), "text/markup converted"
125
+ if orig_ext in DOC_EXTS or target_ext in DOC_EXTS:
126
+ with tempfile.TemporaryDirectory() as tmp:
127
+ return convert_office(tmp, raw, orig_ext, target_ext), "office/doc converted"
128
+ if mime.startswith(('audio/','video/')) or orig_ext in MEDIA_EXTS:
129
+ if orig_ext != target_ext:
130
+ return convert_media(raw, target_ext), "media converted"
131
+ except Exception as e:
132
+ st.warning(f"⚠️ Conversion failed for {file.name}: {e}. Falling back to rename.")
133
+
134
+ # Fallback: no conversion, just rename
135
  return raw, "renamed only"
136
 
 
137
  # -----------------------------------------------------------------------------
138
+ # ZIP packaging
139
  # -----------------------------------------------------------------------------
 
140
  def package_zip(files: list[st.runtime.uploaded_file_manager.UploadedFile], target_ext: str) -> io.BytesIO:
141
  buf = io.BytesIO()
142
+ with zipfile.ZipFile(buf, 'w', zipfile.ZIP_DEFLATED) as zf:
143
  for file in files:
144
+ name = Path(file.name)
145
+ if name.suffix.lower() in DISALLOWED_SOURCE_EXTS:
146
+ st.warning(f"Skipping disallowed file: {name.name}")
147
  continue
148
  data, note = convert_file(file, target_ext)
149
+ out_name = name.with_suffix(target_ext).name
150
+ zf.writestr(out_name, data)
151
+ st.success(f"{note}: {name.name} → {out_name}")
152
  buf.seek(0)
153
  return buf
154
 
 
155
  # -----------------------------------------------------------------------------
156
+ # Main
157
  # -----------------------------------------------------------------------------
158
 
159
  def main():
160
+ st.set_page_config("Universal Converter", page_icon="🔄", layout="centered")
161
+ st.title("🔄 Universal File-Format Converter")
162
+ st.write("Upload files of any format; choose a new extension; download a ZIP of converted files.")
163
 
164
  target_ext = sidebar_target_extension()
165
  files = uploader()
166
 
167
+ if files and st.button("Convert & Download 🚀"):
168
  zip_buf = package_zip(files, target_ext)
169
+ ts = datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
170
+ st.download_button("⬇️ Download ZIP", zip_buf,
171
+ file_name=f"converted_{ts}.zip",
172
+ mime='application/zip')
 
 
173
 
174
+ st.caption("© 2025 Universal Converter • Streamlit • Hugging Face Spaces")
175
 
176
+ if __name__ == '__main__':
177
  main()