awacke1 commited on
Commit
6893dd3
·
verified ·
1 Parent(s): 876d01f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +499 -0
app.py ADDED
@@ -0,0 +1,499 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import aiofiles
2
+ import asyncio
3
+ import base64
4
+ import cv2
5
+ import fitz
6
+ import glob
7
+ import io
8
+ import json
9
+ import logging
10
+ import os
11
+ import pandas as pd
12
+ import pytz
13
+ import random
14
+ import re
15
+ import requests
16
+ import shutil
17
+ import streamlit as st
18
+ import sys
19
+ import time
20
+ import torch
21
+ import zipfile
22
+
23
+ from audio_recorder_streamlit import audio_recorder
24
+ from contextlib import redirect_stdout
25
+ from dataclasses import dataclass
26
+ from datetime import datetime
27
+ from diffusers import StableDiffusionPipeline
28
+ from io import BytesIO
29
+ from moviepy.editor import VideoFileClip
30
+ from openai import OpenAI
31
+ from PIL import Image
32
+ from PyPDF2 import PdfReader
33
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
34
+ from typing import Optional
35
+
36
+ # Initialize OpenAI client
37
+ client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
38
+
39
+ # Logging setup
40
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
41
+ logger = logging.getLogger(__name__)
42
+ log_records = []
43
+ class LogCaptureHandler(logging.Handler):
44
+ def emit(self, record):
45
+ log_records.append(record)
46
+ logger.addHandler(LogCaptureHandler())
47
+
48
+ # Streamlit configuration
49
+ st.set_page_config(
50
+ page_title="AI Multimodal Titan 🚀",
51
+ page_icon="🤖",
52
+ layout="wide",
53
+ initial_sidebar_state="expanded",
54
+ menu_items={
55
+ 'Get Help': 'https://huggingface.co/awacke1',
56
+ 'Report a Bug': 'https://huggingface.co/spaces/awacke1',
57
+ 'About': "AI Multimodal Titan: PDFs, OCR, Image Gen, Audio/Video Processing, Code Execution, and More! 🌌"
58
+ }
59
+ )
60
+
61
+ # Session state initialization
62
+ for key in ['history', 'builder', 'model_loaded', 'processing', 'asset_checkboxes', 'downloaded_pdfs', 'unique_counter', 'messages']:
63
+ st.session_state.setdefault(key, [] if key in ['history', 'messages'] else {} if key in ['asset_checkboxes', 'downloaded_pdfs', 'processing'] else None if key == 'builder' else 0 if key == 'unique_counter' else False)
64
+ st.session_state.setdefault('selected_model_type', "Causal LM")
65
+ st.session_state.setdefault('selected_model', "None")
66
+ st.session_state.setdefault('gallery_size', 2)
67
+ st.session_state.setdefault('asset_gallery_container', st.sidebar.empty())
68
+
69
+ @dataclass
70
+ class ModelConfig:
71
+ name: str
72
+ base_model: str
73
+ size: str
74
+ domain: Optional[str] = None
75
+ model_type: str = "causal_lm"
76
+ @property
77
+ def model_path(self):
78
+ return f"models/{self.name}"
79
+
80
+ @dataclass
81
+ class DiffusionConfig:
82
+ name: str
83
+ base_model: str
84
+ size: str
85
+ domain: Optional[str] = None
86
+ @property
87
+ def model_path(self):
88
+ return f"diffusion_models/{self.name}"
89
+
90
+ class ModelBuilder:
91
+ def __init__(self):
92
+ self.config = None
93
+ self.model = None
94
+ self.tokenizer = None
95
+ def load_model(self, model_path: str, config: Optional[ModelConfig] = None):
96
+ with st.spinner(f"Loading {model_path}... ⏳"):
97
+ self.model = AutoModelForCausalLM.from_pretrained(model_path)
98
+ self.tokenizer = AutoTokenizer.from_pretrained(model_path)
99
+ if self.tokenizer.pad_token is None:
100
+ self.tokenizer.pad_token = self.tokenizer.eos_token
101
+ if config:
102
+ self.config = config
103
+ self.model.to("cuda" if torch.cuda.is_available() else "cpu")
104
+ st.success(f"Model loaded! 🎉")
105
+ return self
106
+ def save_model(self, path: str):
107
+ with st.spinner("Saving model... 💾"):
108
+ os.makedirs(os.path.dirname(path), exist_ok=True)
109
+ self.model.save_pretrained(path)
110
+ self.tokenizer.save_pretrained(path)
111
+ st.success(f"Model saved at {path}! ✅")
112
+
113
+ class DiffusionBuilder:
114
+ def __init__(self):
115
+ self.config = None
116
+ self.pipeline = None
117
+ def load_model(self, model_path: str, config: Optional[DiffusionConfig] = None):
118
+ with st.spinner(f"Loading diffusion model {model_path}... ⏳"):
119
+ self.pipeline = StableDiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float32).to("cpu")
120
+ if config:
121
+ self.config = config
122
+ st.success("Diffusion model loaded! 🎨")
123
+ return self
124
+ def save_model(self, path: str):
125
+ with st.spinner("Saving diffusion model... 💾"):
126
+ os.makedirs(os.path.dirname(path), exist_ok=True)
127
+ self.pipeline.save_pretrained(path)
128
+ st.success(f"Diffusion model saved at {path}! ✅")
129
+ def generate(self, prompt: str):
130
+ return self.pipeline(prompt, num_inference_steps=20).images[0]
131
+
132
+ def generate_filename(prompt, ext="png"):
133
+ central = pytz.timezone('US/Central')
134
+ safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
135
+ safe_prompt = re.sub(r'[<>:"/\\|?*]', '_', prompt)[:240]
136
+ return f"{safe_date_time}_{safe_prompt}.{ext}"
137
+
138
+ def get_download_link(file_path, mime_type="application/pdf", label="Download"):
139
+ with open(file_path, "rb") as f:
140
+ data = base64.b64encode(f.read()).decode()
141
+ return f'<a href="data:{mime_type};base64,{data}" download="{os.path.basename(file_path)}">{label}</a>'
142
+
143
+ def zip_directory(directory_path, zip_path):
144
+ with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
145
+ for root, _, files in os.walk(directory_path):
146
+ for file in files:
147
+ zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), os.path.dirname(directory_path)))
148
+
149
+ def get_gallery_files(file_types=["png", "pdf", "md", "wav", "mp4"]):
150
+ return sorted(list({f for ext in file_types for f in glob.glob(f"*.{ext}")}))
151
+
152
+ def download_pdf(url, output_path):
153
+ try:
154
+ response = requests.get(url, stream=True, timeout=10)
155
+ if response.status_code == 200:
156
+ with open(output_path, "wb") as f:
157
+ for chunk in response.iter_content(chunk_size=8192):
158
+ f.write(chunk)
159
+ return True
160
+ except requests.RequestException as e:
161
+ logger.error(f"Failed to download {url}: {e}")
162
+ return False
163
+
164
+ async def process_pdf_snapshot(pdf_path, mode="single"):
165
+ start_time = time.time()
166
+ status = st.empty()
167
+ status.text(f"Processing PDF Snapshot ({mode})... (0s)")
168
+ try:
169
+ doc = fitz.open(pdf_path)
170
+ output_files = []
171
+ if mode == "single":
172
+ page = doc[0]
173
+ pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
174
+ output_file = generate_filename("single", "png")
175
+ pix.save(output_file)
176
+ output_files.append(output_file)
177
+ elif mode == "double":
178
+ if len(doc) >= 2:
179
+ pix1 = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
180
+ pix2 = doc[1].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
181
+ img1 = Image.frombytes("RGB", [pix1.width, pix1.height], pix1.samples)
182
+ img2 = Image.frombytes("RGB", [pix2.width, pix2.height], pix2.samples)
183
+ combined_img = Image.new("RGB", (pix1.width + pix2.width, max(pix1.height, pix2.height)))
184
+ combined_img.paste(img1, (0, 0))
185
+ combined_img.paste(img2, (pix1.width, 0))
186
+ output_file = generate_filename("double", "png")
187
+ combined_img.save(output_file)
188
+ output_files.append(output_file)
189
+ elif mode == "allpages":
190
+ for i in range(len(doc)):
191
+ page = doc[i]
192
+ pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
193
+ output_file = generate_filename(f"page_{i}", "png")
194
+ pix.save(output_file)
195
+ output_files.append(output_file)
196
+ doc.close()
197
+ elapsed = int(time.time() - start_time)
198
+ status.text(f"PDF Snapshot ({mode}) completed in {elapsed}s!")
199
+ return output_files
200
+ except Exception as e:
201
+ status.error(f"Failed to process PDF: {str(e)}")
202
+ return []
203
+
204
+ async def process_ocr(image, output_file):
205
+ start_time = time.time()
206
+ status = st.empty()
207
+ status.text("Processing GOT-OCR2_0... (0s)")
208
+ tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
209
+ model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, torch_dtype=torch.float32).to("cpu").eval()
210
+ temp_file = generate_filename("temp", "png")
211
+ image.save(temp_file)
212
+ result = model.chat(tokenizer, temp_file, ocr_type='ocr')
213
+ os.remove(temp_file)
214
+ elapsed = int(time.time() - start_time)
215
+ status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
216
+ async with aiofiles.open(output_file, "w") as f:
217
+ await f.write(result)
218
+ return result
219
+
220
+ async def process_image_gen(prompt, output_file):
221
+ start_time = time.time()
222
+ status = st.empty()
223
+ status.text("Processing Image Gen... (0s)")
224
+ pipeline = st.session_state['builder'].pipeline if st.session_state.get('builder') and isinstance(st.session_state['builder'], DiffusionBuilder) else StableDiffusionPipeline.from_pretrained("OFA-Sys/small-stable-diffusion-v0", torch_dtype=torch.float32).to("cpu")
225
+ gen_image = pipeline(prompt, num_inference_steps=20).images[0]
226
+ elapsed = int(time.time() - start_time)
227
+ status.text(f"Image Gen completed in {elapsed}s!")
228
+ gen_image.save(output_file)
229
+ return gen_image
230
+
231
+ def process_image_with_prompt(image, prompt, model="gpt-4o-mini", detail="auto"):
232
+ buffered = BytesIO()
233
+ image.save(buffered, format="PNG")
234
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
235
+ messages = [{"role": "user", "content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}", "detail": detail}}]}]
236
+ try:
237
+ response = client.chat.completions.create(model=model, messages=messages, max_tokens=300)
238
+ return response.choices[0].message.content
239
+ except Exception as e:
240
+ return f"Error processing image with GPT: {str(e)}"
241
+
242
+ def process_text_with_prompt(text, prompt, model="gpt-4o-mini"):
243
+ messages = [{"role": "user", "content": f"{prompt}\n\n{text}"}]
244
+ try:
245
+ response = client.chat.completions.create(model=model, messages=messages, max_tokens=300)
246
+ return response.choices[0].message.content
247
+ except Exception as e:
248
+ return f"Error processing text with GPT: {str(e)}"
249
+
250
+ def process_audio(audio_input, prompt):
251
+ with open(audio_input, "rb") as file:
252
+ transcription = client.audio.transcriptions.create(model="whisper-1", file=file)
253
+ response = client.chat.completions.create(model="gpt-4o-mini", messages=[{"role": "user", "content": f"{prompt}\n\n{transcription.text}"}])
254
+ return transcription.text, response.choices[0].message.content
255
+
256
+ def process_video(video_path, prompt):
257
+ base64Frames, audio_path = process_video_frames(video_path)
258
+ with open(video_path, "rb") as file:
259
+ transcription = client.audio.transcriptions.create(model="whisper-1", file=file)
260
+ messages = [{"role": "user", "content": ["These are the frames from the video.", *map(lambda x: {"type": "image_url", "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames), {"type": "text", "text": f"The audio transcription is: {transcription.text}\n\n{prompt}"}]}]
261
+ response = client.chat.completions.create(model="gpt-4o-mini", messages=messages)
262
+ return response.choices[0].message.content
263
+
264
+ def process_video_frames(video_path, seconds_per_frame=2):
265
+ base64Frames = []
266
+ base_video_path, _ = os.path.splitext(video_path)
267
+ video = cv2.VideoCapture(video_path)
268
+ total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
269
+ fps = video.get(cv2.CAP_PROP_FPS)
270
+ frames_to_skip = int(fps * seconds_per_frame)
271
+ curr_frame = 0
272
+ while curr_frame < total_frames - 1:
273
+ video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
274
+ success, frame = video.read()
275
+ if not success:
276
+ break
277
+ _, buffer = cv2.imencode(".jpg", frame)
278
+ base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
279
+ curr_frame += frames_to_skip
280
+ video.release()
281
+ audio_path = f"{base_video_path}.mp3"
282
+ try:
283
+ clip = VideoFileClip(video_path)
284
+ clip.audio.write_audiofile(audio_path, bitrate="32k")
285
+ clip.audio.close()
286
+ clip.close()
287
+ except:
288
+ logger.info("No audio track found in video.")
289
+ return base64Frames, audio_path
290
+
291
+ def execute_code(code):
292
+ buffer = io.StringIO()
293
+ try:
294
+ with redirect_stdout(buffer):
295
+ exec(code, {}, {})
296
+ return buffer.getvalue(), None
297
+ except Exception as e:
298
+ return None, str(e)
299
+ finally:
300
+ buffer.close()
301
+
302
+ # Sidebar
303
+ st.sidebar.subheader("Gallery Settings")
304
+ st.session_state['gallery_size'] = st.sidebar.slider("Gallery Size", 1, 10, st.session_state['gallery_size'], key="gallery_size_slider")
305
+
306
+ # Tabs
307
+ tabs = st.tabs(["Camera 📷", "Download 📥", "OCR 🔍", "Build 🌱", "Image Gen 🎨", "PDF 📄", "Image 🖼️", "Audio 🎵", "Video 🎥", "Code 🧑‍💻", "Gallery 📚"])
308
+ (tab_camera, tab_download, tab_ocr, tab_build, tab_imggen, tab_pdf, tab_image, tab_audio, tab_video, tab_code, tab_gallery) = tabs
309
+
310
+ with tab_camera:
311
+ st.header("Camera Snap 📷")
312
+ cols = st.columns(2)
313
+ for i, cam_key in enumerate(["cam0", "cam1"]):
314
+ with cols[i]:
315
+ cam_img = st.camera_input(f"Take a picture - Cam {i}", key=cam_key)
316
+ if cam_img:
317
+ filename = generate_filename(f"cam{i}")
318
+ with open(filename, "wb") as f:
319
+ f.write(cam_img.getvalue())
320
+ st.session_state[f'cam{i}_file'] = filename
321
+ st.session_state['history'].append(f"Snapshot from Cam {i}: {filename}")
322
+ st.image(Image.open(filename), caption=f"Camera {i}", use_container_width=True)
323
+
324
+ with tab_download:
325
+ st.header("Download PDFs 📥")
326
+ url_input = st.text_area("Enter PDF URLs (one per line)", height=200)
327
+ if st.button("Download 🤖"):
328
+ urls = url_input.strip().split("\n")
329
+ progress_bar = st.progress(0)
330
+ for idx, url in enumerate(urls):
331
+ if url:
332
+ output_path = generate_filename(url, "pdf")
333
+ if download_pdf(url, output_path):
334
+ st.session_state['downloaded_pdfs'][url] = output_path
335
+ st.session_state['history'].append(f"Downloaded PDF: {output_path}")
336
+ st.session_state['asset_checkboxes'][output_path] = True
337
+ progress_bar.progress((idx + 1) / len(urls))
338
+
339
+ with tab_ocr:
340
+ st.header("Test OCR 🔍")
341
+ all_files = get_gallery_files()
342
+ if all_files:
343
+ selected_file = st.selectbox("Select File", all_files, key="ocr_select")
344
+ if selected_file and st.button("Run OCR 🚀"):
345
+ if selected_file.endswith('.png'):
346
+ image = Image.open(selected_file)
347
+ else:
348
+ doc = fitz.open(selected_file)
349
+ pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
350
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
351
+ doc.close()
352
+ output_file = generate_filename("ocr_output", "txt")
353
+ result = asyncio.run(process_ocr(image, output_file))
354
+ st.text_area("OCR Result", result, height=200)
355
+ st.session_state['history'].append(f"OCR Test: {selected_file} -> {output_file}")
356
+
357
+ with tab_build:
358
+ st.header("Build Titan 🌱")
359
+ model_type = st.selectbox("Model Type", ["Causal LM", "Diffusion"], key="build_type")
360
+ base_model = st.selectbox("Select Model", ["HuggingFaceTB/SmolLM-135M", "Qwen/Qwen1.5-0.5B-Chat"] if model_type == "Causal LM" else ["OFA-Sys/small-stable-diffusion-v0", "stabilityai/stable-diffusion-2-base"])
361
+ model_name = st.text_input("Model Name", f"tiny-titan-{int(time.time())}")
362
+ if st.button("Download Model ⬇️"):
363
+ config = (ModelConfig if model_type == "Causal LM" else DiffusionConfig)(name=model_name, base_model=base_model, size="small")
364
+ builder = ModelBuilder() if model_type == "Causal LM" else DiffusionBuilder()
365
+ builder.load_model(base_model, config)
366
+ builder.save_model(config.model_path)
367
+ st.session_state['builder'] = builder
368
+ st.session_state['model_loaded'] = True
369
+
370
+ with tab_imggen:
371
+ st.header("Test Image Gen 🎨")
372
+ prompt = st.text_area("Prompt", "Generate a futuristic cityscape")
373
+ if st.button("Run Image Gen 🚀"):
374
+ output_file = generate_filename("gen_output", "png")
375
+ result = asyncio.run(process_image_gen(prompt, output_file))
376
+ st.image(result, caption="Generated Image", use_container_width=True)
377
+ st.session_state['history'].append(f"Image Gen Test: {prompt} -> {output_file}")
378
+
379
+ with tab_pdf:
380
+ st.header("PDF Process 📄")
381
+ uploaded_pdfs = st.file_uploader("Upload PDFs", type=["pdf"], accept_multiple_files=True)
382
+ view_mode = st.selectbox("View Mode", ["Single Page", "Double Page"], key="pdf_view_mode")
383
+ if st.button("Process PDFs"):
384
+ for pdf_file in uploaded_pdfs:
385
+ pdf_path = generate_filename(pdf_file.name, "pdf")
386
+ with open(pdf_path, "wb") as f:
387
+ f.write(pdf_file.read())
388
+ snapshots = asyncio.run(process_pdf_snapshot(pdf_path, "double" if view_mode == "Double Page" else "single"))
389
+ for snapshot in snapshots:
390
+ st.image(Image.open(snapshot), caption=snapshot)
391
+ text = process_image_with_prompt(Image.open(snapshot), "Extract the electronic text from image")
392
+ st.text_area(f"Extracted Text from {snapshot}", text)
393
+ code_prompt = f"Generate Python code based on this text:\n\n{text}"
394
+ code = process_text_with_prompt(text, code_prompt)
395
+ st.code(code, language="python")
396
+ if st.button(f"Execute Code from {snapshot}"):
397
+ output, error = execute_code(code)
398
+ if error:
399
+ st.error(f"Error: {error}")
400
+ else:
401
+ st.success(f"Output: {output or 'No output'}")
402
+
403
+ with tab_image:
404
+ st.header("Image Process 🖼️")
405
+ uploaded_images = st.file_uploader("Upload Images", type=["png", "jpg"], accept_multiple_files=True)
406
+ prompt = st.text_input("Prompt", "Extract the electronic text from image")
407
+ if st.button("Process Images"):
408
+ for img_file in uploaded_images:
409
+ img = Image.open(img_file)
410
+ st.image(img, caption=img_file.name)
411
+ result = process_image_with_prompt(img, prompt)
412
+ st.text_area(f"Result for {img_file.name}", result)
413
+
414
+ with tab_audio:
415
+ st.header("Audio Process 🎵")
416
+ audio_bytes = audio_recorder()
417
+ if audio_bytes:
418
+ filename = generate_filename("recording", "wav")
419
+ with open(filename, "wb") as f:
420
+ f.write(audio_bytes)
421
+ st.audio(filename)
422
+ transcript, summary = process_audio(filename, "Summarize this audio in markdown")
423
+ st.text_area("Transcript", transcript)
424
+ st.markdown(summary)
425
+
426
+ with tab_video:
427
+ st.header("Video Process 🎥")
428
+ video_input = st.file_uploader("Upload Video", type=["mp4"])
429
+ if video_input:
430
+ video_path = generate_filename(video_input.name, "mp4")
431
+ with open(video_path, "wb") as f:
432
+ f.write(video_input.read())
433
+ st.video(video_path)
434
+ result = process_video(video_path, "Summarize this video in markdown")
435
+ st.markdown(result)
436
+
437
+ with tab_code:
438
+ st.header("Code Executor 🧑‍💻")
439
+ code_input = st.text_area("Python Code", height=400)
440
+ if st.button("Run Code"):
441
+ output, error = execute_code(code_input)
442
+ if error:
443
+ st.error(f"Error: {error}")
444
+ else:
445
+ st.success(f"Output: {output or 'No output'}")
446
+
447
+ with tab_gallery:
448
+ st.header("Gallery 📚")
449
+ all_files = get_gallery_files()
450
+ for file in all_files:
451
+ if file.endswith('.png'):
452
+ st.image(Image.open(file), caption=file)
453
+ elif file.endswith('.pdf'):
454
+ doc = fitz.open(file)
455
+ pix = doc[0].get_pixmap(matrix=fitz.Matrix(0.5, 0.5))
456
+ st.image(Image.frombytes("RGB", [pix.width, pix.height], pix.samples), caption=file)
457
+ doc.close()
458
+ elif file.endswith('.md'):
459
+ with open(file, "r") as f:
460
+ st.markdown(f.read())
461
+ elif file.endswith('.wav'):
462
+ st.audio(file)
463
+ elif file.endswith('.mp4'):
464
+ st.video(file)
465
+
466
+ # Update gallery in sidebar
467
+ def update_gallery():
468
+ container = st.session_state['asset_gallery_container']
469
+ container.empty()
470
+ all_files = get_gallery_files()
471
+ if all_files:
472
+ container.markdown("### Asset Gallery 📸📖")
473
+ cols = container.columns(2)
474
+ for idx, file in enumerate(all_files[:st.session_state['gallery_size']]):
475
+ with cols[idx % 2]:
476
+ if file.endswith('.png'):
477
+ st.image(Image.open(file), caption=os.path.basename(file))
478
+ elif file.endswith('.pdf'):
479
+ doc = fitz.open(file)
480
+ pix = doc[0].get_pixmap(matrix=fitz.Matrix(0.5, 0.5))
481
+ st.image(Image.frombytes("RGB", [pix.width, pix.height], pix.samples), caption=os.path.basename(file))
482
+ doc.close()
483
+ st.checkbox("Select", key=f"asset_{file}", value=st.session_state['asset_checkboxes'].get(file, False))
484
+ st.markdown(get_download_link(file, "application/octet-stream", "Download"), unsafe_allow_html=True)
485
+ if st.button("Delete", key=f"delete_{file}"):
486
+ os.remove(file)
487
+ st.session_state['asset_checkboxes'].pop(file, None)
488
+ st.experimental_rerun()
489
+
490
+ update_gallery()
491
+
492
+ # Sidebar logs and history
493
+ st.sidebar.subheader("Action Logs 📜")
494
+ for record in log_records:
495
+ st.sidebar.write(f"{record.asctime} - {record.levelname} - {record.message}")
496
+ st.sidebar.subheader("History 📜")
497
+ for entry in st.session_state.get("history", []):
498
+ if entry:
499
+ st.sidebar.write(entry)