awacke1 commited on
Commit
a57b24e
·
verified ·
1 Parent(s): 5af73ab

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +667 -0
app.py ADDED
@@ -0,0 +1,667 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import aiofiles
2
+ import asyncio
3
+ import base64
4
+ import fitz
5
+ import glob
6
+ import logging
7
+ import os
8
+ import pandas as pd
9
+ import pytz
10
+ import random
11
+ import re
12
+ import requests
13
+ import shutil
14
+ import streamlit as st
15
+ import time
16
+ import torch
17
+ import zipfile
18
+
19
+ from dataclasses import dataclass
20
+ from datetime import datetime
21
+ from diffusers import StableDiffusionPipeline
22
+ from io import BytesIO
23
+ from openai import OpenAI
24
+ from PIL import Image
25
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
26
+ from typing import Optional
27
+
28
+ # 🤖 OpenAI wizardry: Summon your API magic!
29
+ client = OpenAI(
30
+ api_key=os.getenv('OPENAI_API_KEY'),
31
+ organization=os.getenv('OPENAI_ORG_ID')
32
+ )
33
+
34
+ # 📜 Logging activated: Capturing chaos and calm!
35
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
36
+ logger = logging.getLogger(__name__)
37
+ log_records = []
38
+ class LogCaptureHandler(logging.Handler):
39
+ def emit(self, record):
40
+ log_records.append(record)
41
+ logger.addHandler(LogCaptureHandler())
42
+
43
+ # 🎨 Streamlit styling: Designing a cosmic interface!
44
+ st.set_page_config(
45
+ page_title="AI Vision & SFT Titans 🚀",
46
+ page_icon="🤖",
47
+ layout="wide",
48
+ initial_sidebar_state="expanded",
49
+ menu_items={
50
+ 'Get Help': 'https://huggingface.co/awacke1',
51
+ 'Report a Bug': 'https://huggingface.co/spaces/awacke1',
52
+ 'About': "AI Vision & SFT Titans: PDFs, OCR, Image Gen, Line Drawings, Custom Diffusion, and SFT on CPU! 🌌"
53
+ }
54
+ )
55
+
56
+ st.session_state.setdefault('history', []) # 🌱 History: starting fresh if empty!
57
+ st.session_state.setdefault('builder', None) # 🛠️ Builder: set up the builder if it's missing!
58
+ st.session_state.setdefault('model_loaded', False) # 🚦 Model Loaded: mark as not loaded by default!
59
+ st.session_state.setdefault('processing', {}) # ⏳ Processing: initialize processing state as an empty dict!
60
+ st.session_state.setdefault('asset_checkboxes', {}) # ✅ Asset Checkboxes: default to an empty dictionary!
61
+ st.session_state.setdefault('downloaded_pdfs', {}) # 📄 Downloaded PDFs: start with no PDFs downloaded!
62
+ st.session_state.setdefault('unique_counter', 0) # 🔢 Unique Counter: initialize the counter to zero!
63
+ st.session_state.setdefault('selected_model_type', "Causal LM") # 🧠 Selected Model Type: default to "Causal LM"!
64
+ st.session_state.setdefault('selected_model', "None") # 🤖 Selected Model: set to "None" if not already set!
65
+ st.session_state.setdefault('cam0_file', None) # 📸 Cam0 File: no file loaded by default!
66
+ st.session_state.setdefault('cam1_file', None) # 📸 Cam1 File: no file loaded by default!
67
+
68
+ # Create a single placeholder for the asset gallery in the sidebar.
69
+ if 'asset_gallery_container' not in st.session_state:
70
+ st.session_state['asset_gallery_container'] = st.sidebar.empty()
71
+
72
+ @dataclass # 🎨 ModelConfig: A blueprint for model configurations!
73
+ class ModelConfig:
74
+ name: str
75
+ base_model: str
76
+ size: str
77
+ domain: Optional[str] = None
78
+ model_type: str = "causal_lm"
79
+ @property
80
+ def model_path(self): return f"models/{self.name}" # 🚀 Model Path: Home base for brilliance!
81
+
82
+ @dataclass # 🎨 DiffusionConfig: Where diffusion magic takes shape!
83
+ class DiffusionConfig:
84
+ name: str
85
+ base_model: str
86
+ size: str
87
+ domain: Optional[str] = None
88
+ @property
89
+ def model_path(self): return f"diffusion_models/{self.name}" # 🚀 Diffusion Path: Let the diffusion begin!
90
+
91
+ class ModelBuilder: # 🔧 ModelBuilder: Crafting AI wonders with wit!
92
+ def __init__(self): # 🚀 Initialize: Setting up the AI factory!
93
+ self.config = None # No config yet—waiting for genius!
94
+ self.model = None # Model not built until the magic happens!
95
+ self.tokenizer = None # Tokenizer: Ready to speak in AI!
96
+ self.jokes = [ # 🤣 Jokes to keep the circuits laughing!
97
+ "Why did the AI go to therapy? Too many layers to unpack! 😂",
98
+ "Training complete! Time for a binary coffee break. ☕",
99
+ "I told my neural network a joke; it couldn't stop dropping bits! 🤖",
100
+ "I asked the AI for a pun, and it said, 'I'm punning on parallel processing!' 😄",
101
+ "Debugging my code is like a stand-up routine—always a series of exceptions! 😆"
102
+ ]
103
+ def load_model(self, model_path: str, config: Optional[ModelConfig] = None): # 🔄 load_model: Booting up genius!
104
+ with st.spinner(f"Loading {model_path}... ⏳"): # ⏳ Spinner: Genius loading...
105
+ self.model = AutoModelForCausalLM.from_pretrained(model_path)
106
+ self.tokenizer = AutoTokenizer.from_pretrained(model_path)
107
+ if self.tokenizer.pad_token is None:
108
+ self.tokenizer.pad_token = self.tokenizer.eos_token # 🔧 Fix pad token if missing!
109
+ if config:
110
+ self.config = config # 🛠️ Config loaded—setting the stage!
111
+ self.model.to("cuda" if torch.cuda.is_available() else "cpu") # 💻 Deploying the model to its device!
112
+ st.success(f"Model loaded! 🎉 {random.choice(self.jokes)}") # 🎉 Success: Model is now in orbit!
113
+ return self
114
+ def save_model(self, path: str): # 💾 save_model: Securing your masterpiece!
115
+ with st.spinner("Saving model... 💾"): # ⏳ Spinner: Saving brilliance...
116
+ os.makedirs(os.path.dirname(path), exist_ok=True)
117
+ self.model.save_pretrained(path)
118
+ self.tokenizer.save_pretrained(path) # 📂 Directory magic: Creating and saving!
119
+ st.success(f"Model saved at {path}! ✅") # ✅ Success: Your model is safely stored!
120
+
121
+ class DiffusionBuilder:
122
+ def __init__(self):
123
+ self.config = None
124
+ self.pipeline = None
125
+ def load_model(self, model_path: str, config: Optional[DiffusionConfig] = None):
126
+ with st.spinner(f"Loading diffusion model {model_path}... ⏳"):
127
+ self.pipeline = StableDiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float32).to("cpu")
128
+ if config:
129
+ self.config = config
130
+ st.success("Diffusion model loaded! 🎨")
131
+ return self
132
+ def save_model(self, path: str):
133
+ with st.spinner("Saving diffusion model... 💾"):
134
+ os.makedirs(os.path.dirname(path), exist_ok=True)
135
+ self.pipeline.save_pretrained(path)
136
+ st.success(f"Diffusion model saved at {path}! ✅")
137
+ def generate(self, prompt: str):
138
+ return self.pipeline(prompt, num_inference_steps=20).images[0]
139
+
140
+ def generate_filename(sequence, ext="png"):
141
+ return f"{sequence}_{time.strftime('%d%m%Y%H%M%S')}.{ext}" # ⏳ Generate filename with timestamp magic!
142
+
143
+ def pdf_url_to_filename(url):
144
+ return re.sub(r'[<>:"/\\|?*]', '_', url) + ".pdf" # 📄 Convert URL to a safe PDF filename – no hackers allowed!
145
+
146
+ def get_download_link(file_path, mime_type="application/pdf", label="Download"):
147
+ return f'<a href="data:{mime_type};base64,{base64.b64encode(open(file_path, "rb").read()).decode()}" download="{os.path.basename(file_path)}">{label}</a>' # 🔗 Create a download link – click it like it's hot!
148
+
149
+ def zip_directory(directory_path, zip_path):
150
+ with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
151
+ [zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), os.path.dirname(directory_path)))
152
+ for root, _, files in os.walk(directory_path) for file in files] # 🎁 Zip directory: Packing files faster than Santa on Christmas Eve!
153
+
154
+ def get_model_files(model_type="causal_lm"):
155
+ return [d for d in glob.glob("models/*" if model_type == "causal_lm" else "diffusion_models/*") if os.path.isdir(d)] or ["None"] # 📂 Get model files: Hunting directories like a pro!
156
+
157
+ def get_gallery_files(file_types=["png", "pdf"]):
158
+ return sorted(list({f for ext in file_types for f in glob.glob(f"*.{ext}")})) # 🖼️ Get gallery files: Finding art in a digital haystack!
159
+
160
+ def get_pdf_files():
161
+ return sorted(glob.glob("*.pdf")) # 📄 Get PDF files: Sorted and served – no paper cuts here!
162
+
163
+ # 📥 Download PDF: Delivering docs faster than a caffeinated courier!
164
+ def download_pdf(url, output_path):
165
+ try:
166
+ response = requests.get(url, stream=True, timeout=10)
167
+ if response.status_code == 200:
168
+ with open(output_path, "wb") as f:
169
+ for chunk in response.iter_content(chunk_size=8192):
170
+ f.write(chunk)
171
+ ret = True
172
+ else:
173
+ ret = False
174
+ except requests.RequestException as e:
175
+ logger.error(f"Failed to download {url}: {e}")
176
+ ret = False
177
+ return ret
178
+
179
+ # 📚 Async PDF Snapshot: Snap your PDF pages without blocking—juggle pages like a ninja! 🥷
180
+ async def process_pdf_snapshot(pdf_path, mode="single"):
181
+ start_time = time.time()
182
+ status = st.empty()
183
+ status.text(f"Processing PDF Snapshot ({mode})... (0s)")
184
+ try:
185
+ doc = fitz.open(pdf_path)
186
+ output_files = []
187
+ if mode == "single":
188
+ page = doc[0]
189
+ pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
190
+ output_file = generate_filename("single", "png")
191
+ pix.save(output_file)
192
+ output_files.append(output_file)
193
+ elif mode == "twopage":
194
+ for i in range(min(2, len(doc))):
195
+ page = doc[i]
196
+ pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
197
+ output_file = generate_filename(f"twopage_{i}", "png")
198
+ pix.save(output_file)
199
+ output_files.append(output_file)
200
+ elif mode == "allpages":
201
+ for i in range(len(doc)):
202
+ page = doc[i]
203
+ pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
204
+ output_file = generate_filename(f"page_{i}", "png")
205
+ pix.save(output_file)
206
+ output_files.append(output_file)
207
+ doc.close()
208
+ elapsed = int(time.time() - start_time)
209
+ status.text(f"PDF Snapshot ({mode}) completed in {elapsed}s!")
210
+ update_gallery()
211
+ return output_files
212
+ except Exception as e:
213
+ status.error(f"Failed to process PDF: {str(e)}")
214
+ return []
215
+
216
+ # 😎 Async OCR: Convert images to text while your app keeps on groovin'—no blocking, just rocking! 🎸
217
+ async def process_ocr(image, output_file):
218
+ start_time = time.time()
219
+ status = st.empty()
220
+ status.text("Processing GOT-OCR2_0... (0s)")
221
+ tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
222
+ model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, torch_dtype=torch.float32).to("cpu").eval()
223
+ temp_file = f"temp_{int(time.time())}.png"
224
+ image.save(temp_file)
225
+ result = model.chat(tokenizer, temp_file, ocr_type='ocr')
226
+ os.remove(temp_file)
227
+ elapsed = int(time.time() - start_time)
228
+ status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
229
+ async with aiofiles.open(output_file, "w") as f:
230
+ await f.write(result)
231
+ update_gallery()
232
+ return result
233
+
234
+ # 🧞 Async Image Gen: Your image genie—wishing up pictures while the event loop keeps the party going! 🎉
235
+ async def process_image_gen(prompt, output_file):
236
+ start_time = time.time()
237
+ status = st.empty()
238
+ status.text("Processing Image Gen... (0s)")
239
+ pipeline = st.session_state['builder'].pipeline if st.session_state.get('builder') and isinstance(st.session_state['builder'], DiffusionBuilder) and st.session_state['builder'].pipeline else StableDiffusionPipeline.from_pretrained("OFA-Sys/small-stable-diffusion-v0", torch_dtype=torch.float32).to("cpu")
240
+ gen_image = pipeline(prompt, num_inference_steps=20).images[0]
241
+ elapsed = int(time.time() - start_time)
242
+ status.text(f"Image Gen completed in {elapsed}s!")
243
+ gen_image.save(output_file)
244
+ update_gallery()
245
+ return gen_image
246
+
247
+ # 🖼️ GPT-Image Interpreter: Turning pixels into prose!
248
+ def process_image_with_prompt(image, prompt, model="gpt-4o-mini", detail="auto"):
249
+ buffered = BytesIO()
250
+ image.save(buffered, format="PNG") # 💾 Save the image in-memory as PNG—no hard drives harmed!
251
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") # 🔐 Encode image data in Base64 for secure, inline transmission!
252
+ messages = [{"role": "user", "content": [
253
+ {"type": "text", "text": prompt},
254
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}", "detail": detail}}
255
+ ]}]
256
+ try:
257
+ response = client.chat.completions.create(model=model, messages=messages, max_tokens=300)
258
+ return response.choices[0].message.content
259
+ except Exception as e:
260
+ return f"Error processing image with GPT: {str(e)}"
261
+
262
+ # 📝 GPT-Text Alchemist: Merging your prompt and text into digital gold!
263
+ def process_text_with_prompt(text, prompt, model="gpt-4o-mini"):
264
+ messages = [{"role": "user", "content": f"{prompt}\n\n{text}"}]
265
+ try:
266
+ response = client.chat.completions.create(model=model, messages=messages, max_tokens=300)
267
+ return response.choices[0].message.content
268
+ except Exception as e:
269
+ return f"Error processing text with GPT: {str(e)}"
270
+
271
+ st.sidebar.subheader("Gallery Settings") # 🎨 Sidebar Gallery: Customize your creative space!
272
+ st.session_state.setdefault('gallery_size', 2) # 🔧 Setting default gallery size to 2 if it's missing!
273
+ st.session_state['gallery_size'] = st.sidebar.slider("Gallery Size", 1, 10, st.session_state['gallery_size'], key="gallery_size_slider") # 🎚️ Slide to adjust your gallery size and bring balance to your art!
274
+
275
+ # 📸 Gallery Updater: Making your assets dazzle and disappear faster than a magician's rabbit! 🐇✨
276
+ def update_gallery():
277
+ container = st.session_state['asset_gallery_container']
278
+ container.empty() # Clear previous gallery content
279
+ all_files = get_gallery_files() # 🔍 Grab all gallery files like a digital treasure hunt!
280
+ if all_files:
281
+ with container:
282
+ st.sidebar.subheader("Asset Gallery 📸📖")
283
+ cols = st.sidebar.columns(2)
284
+ for idx, file in enumerate(all_files[:st.session_state['gallery_size']]):
285
+ with cols[idx % 2]:
286
+ st.session_state['unique_counter'] += 1
287
+ unique_id = st.session_state['unique_counter']
288
+ if file.endswith('.png'):
289
+ st.image(Image.open(file), caption=os.path.basename(file), use_container_width=True)
290
+ else:
291
+ doc = fitz.open(file)
292
+ pix = doc[0].get_pixmap(matrix=fitz.Matrix(0.5, 0.5))
293
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
294
+ st.image(img, caption=os.path.basename(file), use_container_width=True)
295
+ doc.close()
296
+ checkbox_key = f"asset_{file}_{unique_id}"
297
+ st.session_state['asset_checkboxes'][file] = st.checkbox("Use for SFT/Input", value=st.session_state['asset_checkboxes'].get(file, False), key=checkbox_key)
298
+ mime_type = "image/png" if file.endswith('.png') else "application/pdf"
299
+ st.markdown(get_download_link(file, mime_type, "Snag It! 📥"), unsafe_allow_html=True)
300
+ if st.button("Zap It! 🗑️", key=f"delete_{file}_{unique_id}"):
301
+ os.remove(file)
302
+ st.session_state['asset_checkboxes'].pop(file, None)
303
+ st.sidebar.success(f"Asset {os.path.basename(file)} vaporized! 💨")
304
+ st.rerun()
305
+
306
+ st.sidebar.subheader("Action Logs 📜") # 📝 Action Logs: Where our system whispers its secrets!
307
+ with st.sidebar:
308
+ [st.write(f"{record.asctime} - {record.levelname} - {record.message}") for record in log_records]
309
+
310
+ st.sidebar.subheader("History 📜") # 🕰️ History: A walk down memory lane, one log at a time!
311
+ with st.sidebar:
312
+ [st.write(entry) for entry in st.session_state['history']]
313
+
314
+ tabs = st.tabs(["Camera Snap 📷", "Download PDFs 📥", "Test OCR 🔍", "Build Titan 🌱", "Test Image Gen 🎨", "PDF Process 📄", "Image Process 🖼️", "MD Gallery 📚"])
315
+ (tab_camera, tab_download, tab_ocr, tab_build, tab_imggen, tab_pdf_process, tab_image_process, tab_md_gallery) = tabs
316
+
317
+ with tab_camera:
318
+ st.header("Camera Snap 📷") # 🎥 Header: Let’s capture those Kodak moments!
319
+ st.subheader("Single Capture") # 📸 Subheader: One snap at a time, no double exposure!
320
+ cols = st.columns(2)
321
+
322
+ with cols[0]:
323
+ cam0_img = st.camera_input("Take a picture - Cam 0", key="cam0")
324
+ if cam0_img:
325
+ filename = generate_filename("cam0")
326
+ if st.session_state['cam0_file'] and os.path.exists(st.session_state['cam0_file']):
327
+ os.remove(st.session_state['cam0_file'])
328
+ with open(filename, "wb") as f:
329
+ f.write(cam0_img.getvalue())
330
+ st.session_state['cam0_file'] = filename
331
+ entry = f"Snapshot from Cam 0: {filename}"
332
+ if entry not in st.session_state['history']:
333
+ st.session_state['history'] = [e for e in st.session_state['history'] if not e.startswith("Snapshot from Cam 0:")] + [entry]
334
+ st.image(Image.open(filename), caption="Camera 0", use_container_width=True)
335
+ logger.info(f"Saved snapshot from Camera 0: {filename}")
336
+ update_gallery()
337
+
338
+ with cols[1]:
339
+ cam1_img = st.camera_input("Take a picture - Cam 1", key="cam1")
340
+ if cam1_img:
341
+ filename = generate_filename("cam1")
342
+ if st.session_state['cam1_file'] and os.path.exists(st.session_state['cam1_file']):
343
+ os.remove(st.session_state['cam1_file'])
344
+ with open(filename, "wb") as f:
345
+ f.write(cam1_img.getvalue())
346
+ st.session_state['cam1_file'] = filename
347
+ entry = f"Snapshot from Cam 1: {filename}"
348
+ if entry not in st.session_state['history']:
349
+ st.session_state['history'] = [e for e in st.session_state['history'] if not e.startswith("Snapshot from Cam 1:")] + [entry]
350
+ st.image(Image.open(filename), caption="Camera 1", use_container_width=True)
351
+ logger.info(f"Saved snapshot from Camera 1: {filename}")
352
+ update_gallery()
353
+
354
+ with tab_download:
355
+ st.header("Download PDFs 📥")
356
+ if st.button("Examples 📚"):
357
+ example_urls = [
358
+ "https://arxiv.org/pdf/2308.03892",
359
+ "https://arxiv.org/pdf/1912.01703",
360
+ "https://arxiv.org/pdf/2408.11039",
361
+ "https://arxiv.org/pdf/2109.10282",
362
+ "https://arxiv.org/pdf/2112.10752",
363
+ "https://arxiv.org/pdf/2308.11236",
364
+ "https://arxiv.org/pdf/1706.03762",
365
+ "https://arxiv.org/pdf/2006.11239",
366
+ "https://arxiv.org/pdf/2305.11207",
367
+ "https://arxiv.org/pdf/2106.09685",
368
+ "https://arxiv.org/pdf/2005.11401",
369
+ "https://arxiv.org/pdf/2106.10504"
370
+ ]
371
+ st.session_state['pdf_urls'] = "\n".join(example_urls)
372
+
373
+ url_input = st.text_area("Enter PDF URLs (one per line)", value=st.session_state.get('pdf_urls', ""), height=200)
374
+ if st.button("Robo-Download 🤖"):
375
+ urls = url_input.strip().split("\n")
376
+ progress_bar = st.progress(0)
377
+ status_text = st.empty()
378
+ total_urls = len(urls)
379
+ existing_pdfs = get_pdf_files()
380
+ for idx, url in enumerate(urls):
381
+ if url:
382
+ output_path = pdf_url_to_filename(url)
383
+ status_text.text(f"Fetching {idx + 1}/{total_urls}: {os.path.basename(output_path)}...")
384
+ if output_path not in existing_pdfs:
385
+ if download_pdf(url, output_path):
386
+ st.session_state['downloaded_pdfs'][url] = output_path
387
+ logger.info(f"Downloaded PDF from {url} to {output_path}")
388
+ entry = f"Downloaded PDF: {output_path}"
389
+ if entry not in st.session_state['history']:
390
+ st.session_state['history'].append(entry)
391
+ st.session_state['asset_checkboxes'][output_path] = True
392
+ else:
393
+ st.error(f"Failed to nab {url} 😿")
394
+ else:
395
+ st.info(f"Already got {os.path.basename(output_path)}! Skipping... 🐾")
396
+ st.session_state['downloaded_pdfs'][url] = output_path
397
+ progress_bar.progress((idx + 1) / total_urls)
398
+ status_text.text("Robo-Download complete! 🚀")
399
+ update_gallery()
400
+
401
+ mode = st.selectbox("Snapshot Mode", ["Single Page (High-Res)", "Two Pages (High-Res)", "All Pages (High-Res)"], key="download_mode")
402
+ if st.button("Snapshot Selected 📸"):
403
+ selected_pdfs = [path for path in get_gallery_files() if path.endswith('.pdf') and st.session_state['asset_checkboxes'].get(path, False)]
404
+ if selected_pdfs:
405
+ for pdf_path in selected_pdfs:
406
+ if not os.path.exists(pdf_path):
407
+ st.warning(f"File not found: {pdf_path}. Skipping.")
408
+ continue
409
+ mode_key = {"Single Page (High-Res)": "single", "Two Pages (High-Res)": "twopage", "All Pages (High-Res)": "allpages"}[mode]
410
+ snapshots = asyncio.run(process_pdf_snapshot(pdf_path, mode_key))
411
+ for snapshot in snapshots:
412
+ st.image(Image.open(snapshot), caption=snapshot, use_container_width=True)
413
+ st.session_state['asset_checkboxes'][snapshot] = True
414
+ update_gallery()
415
+ else:
416
+ st.warning("No PDFs selected for snapshotting! Check some boxes in the sidebar.")
417
+
418
+ with tab_ocr:
419
+ st.header("Test OCR 🔍")
420
+ all_files = get_gallery_files()
421
+ if all_files:
422
+ if st.button("OCR All Assets 🚀"):
423
+ full_text = "# OCR Results\n\n"
424
+ for file in all_files:
425
+ if file.endswith('.png'):
426
+ image = Image.open(file)
427
+ else:
428
+ doc = fitz.open(file)
429
+ pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
430
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
431
+ doc.close()
432
+ output_file = generate_filename(f"ocr_{os.path.basename(file)}", "txt")
433
+ result = asyncio.run(process_ocr(image, output_file))
434
+ full_text += f"## {os.path.basename(file)}\n\n{result}\n\n"
435
+ entry = f"OCR Test: {file} -> {output_file}"
436
+ if entry not in st.session_state['history']:
437
+ st.session_state['history'].append(entry)
438
+ md_output_file = f"full_ocr_{int(time.time())}.md"
439
+ with open(md_output_file, "w") as f:
440
+ f.write(full_text)
441
+ st.success(f"Full OCR saved to {md_output_file}")
442
+ st.markdown(get_download_link(md_output_file, "text/markdown", "Download Full OCR Markdown"), unsafe_allow_html=True)
443
+ selected_file = st.selectbox("Select Image or PDF", all_files, key="ocr_select")
444
+ if selected_file:
445
+ if selected_file.endswith('.png'):
446
+ image = Image.open(selected_file)
447
+ else:
448
+ doc = fitz.open(selected_file)
449
+ pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
450
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
451
+ doc.close()
452
+ st.image(image, caption="Input Image", use_container_width=True)
453
+ if st.button("Run OCR 🚀", key="ocr_run"):
454
+ output_file = generate_filename("ocr_output", "txt")
455
+ st.session_state['processing']['ocr'] = True
456
+ result = asyncio.run(process_ocr(image, output_file))
457
+ entry = f"OCR Test: {selected_file} -> {output_file}"
458
+ if entry not in st.session_state['history']:
459
+ st.session_state['history'].append(entry)
460
+ st.text_area("OCR Result", result, height=200, key="ocr_result")
461
+ st.success(f"OCR output saved to {output_file}")
462
+ st.session_state['processing']['ocr'] = False
463
+ if selected_file.endswith('.pdf') and st.button("OCR All Pages 🚀", key="ocr_all_pages"):
464
+ doc = fitz.open(selected_file)
465
+ full_text = f"# OCR Results for {os.path.basename(selected_file)}\n\n"
466
+ for i in range(len(doc)):
467
+ pix = doc[i].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
468
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
469
+ output_file = generate_filename(f"ocr_page_{i}", "txt")
470
+ result = asyncio.run(process_ocr(image, output_file))
471
+ full_text += f"## Page {i + 1}\n\n{result}\n\n"
472
+ entry = f"OCR Test: {selected_file} Page {i + 1} -> {output_file}"
473
+ if entry not in st.session_state['history']:
474
+ st.session_state['history'].append(entry)
475
+ md_output_file = f"full_ocr_{os.path.basename(selected_file)}_{int(time.time())}.md"
476
+ with open(md_output_file, "w") as f:
477
+ f.write(full_text)
478
+ st.success(f"Full OCR saved to {md_output_file}")
479
+ st.markdown(get_download_link(md_output_file, "text/markdown", "Download Full OCR Markdown"), unsafe_allow_html=True)
480
+ else:
481
+ st.warning("No assets in gallery yet. Use Camera Snap or Download PDFs!")
482
+
483
+ with tab_build:
484
+ st.header("Build Titan 🌱")
485
+ model_type = st.selectbox("Model Type", ["Causal LM", "Diffusion"], key="build_type")
486
+ base_model = st.selectbox(
487
+ "Select Tiny Model",
488
+ ["HuggingFaceTB/SmolLM-135M", "Qwen/Qwen1.5-0.5B-Chat"] if model_type == "Causal LM"
489
+ else ["OFA-Sys/small-stable-diffusion-v0", "stabilityai/stable-diffusion-2-base"]
490
+ )
491
+ model_name = st.text_input("Model Name", f"tiny-titan-{int(time.time())}")
492
+ domain = st.text_input("Target Domain", "general")
493
+ if st.button("Download Model ⬇️"):
494
+ config = (ModelConfig if model_type == "Causal LM" else DiffusionConfig)(
495
+ name=model_name, base_model=base_model, size="small", domain=domain
496
+ )
497
+ builder = ModelBuilder() if model_type == "Causal LM" else DiffusionBuilder()
498
+ builder.load_model(base_model, config); builder.save_model(config.model_path)
499
+ st.session_state['builder'] = builder; st.session_state['model_loaded'] = True
500
+ st.session_state['selected_model_type'] = model_type; st.session_state['selected_model'] = config.model_path
501
+ entry = f"Built {model_type} model: {model_name}"
502
+ if entry not in st.session_state['history']:
503
+ st.session_state['history'].append(entry)
504
+ st.success(f"Model downloaded and saved to {config.model_path}! 🎉"); st.rerun()
505
+
506
+ with tab_imggen:
507
+ st.header("Test Image Gen 🎨")
508
+ all_files = get_gallery_files()
509
+ if all_files:
510
+ selected_file = st.selectbox("Select Image or PDF", all_files, key="gen_select")
511
+ if selected_file:
512
+ if selected_file.endswith('.png'):
513
+ image = Image.open(selected_file)
514
+ else:
515
+ doc = fitz.open(selected_file)
516
+ pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
517
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
518
+ doc.close()
519
+ st.image(image, caption="Reference Image", use_container_width=True)
520
+ prompt = st.text_area("Prompt", "Generate a neon superhero version of this image", key="gen_prompt")
521
+ if st.button("Run Image Gen 🚀", key="gen_run"):
522
+ output_file = generate_filename("gen_output", "png")
523
+ st.session_state['processing']['gen'] = True
524
+ result = asyncio.run(process_image_gen(prompt, output_file))
525
+ entry = f"Image Gen Test: {prompt} -> {output_file}"
526
+ if entry not in st.session_state['history']:
527
+ st.session_state['history'].append(entry)
528
+ st.image(result, caption="Generated Image", use_container_width=True)
529
+ st.success(f"Image saved to {output_file}")
530
+ st.session_state['processing']['gen'] = False
531
+ else:
532
+ st.warning("No images or PDFs in gallery yet. Use Camera Snap or Download PDFs!")
533
+ update_gallery()
534
+
535
+ with tab_pdf_process:
536
+ st.header("PDF Process")
537
+ st.subheader("Upload PDFs for GPT-based text extraction")
538
+ gpt_models = ["gpt-4o", "gpt-4o-mini"]
539
+ selected_gpt_model = st.selectbox("Select GPT Model", gpt_models, key="pdf_gpt_model")
540
+ detail_level = st.selectbox("Detail Level", ["auto", "low", "high"], key="pdf_detail_level")
541
+ uploaded_pdfs = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True, key="pdf_process_uploader")
542
+ view_mode = st.selectbox("View Mode", ["Single Page", "Double Page"], key="pdf_view_mode")
543
+ if st.button("Process Uploaded PDFs", key="process_pdfs"):
544
+ combined_text = ""
545
+ for pdf_file in uploaded_pdfs:
546
+ pdf_bytes = pdf_file.read()
547
+ temp_pdf_path = f"temp_{pdf_file.name}"
548
+ with open(temp_pdf_path, "wb") as f:
549
+ f.write(pdf_bytes)
550
+ try:
551
+ doc = fitz.open(temp_pdf_path)
552
+ st.write(f"Processing {pdf_file.name} with {len(doc)} pages")
553
+ if view_mode == "Single Page":
554
+ for i, page in enumerate(doc):
555
+ pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
556
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
557
+ st.image(img, caption=f"{pdf_file.name} Page {i+1}")
558
+ gpt_text = process_image_with_prompt(img, "Extract the electronic text from image", model=selected_gpt_model, detail=detail_level)
559
+ combined_text += f"\n## {pdf_file.name} - Page {i+1}\n\n{gpt_text}\n"
560
+ else:
561
+ pages = list(doc)
562
+ for i in range(0, len(pages), 2):
563
+ if i+1 < len(pages):
564
+ pix1 = pages[i].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
565
+ img1 = Image.frombytes("RGB", [pix1.width, pix1.height], pix1.samples)
566
+ pix2 = pages[i+1].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
567
+ img2 = Image.frombytes("RGB", [pix2.width, pix2.height], pix2.samples)
568
+ total_width = img1.width + img2.width
569
+ max_height = max(img1.height, img2.height)
570
+ combined_img = Image.new("RGB", (total_width, max_height))
571
+ combined_img.paste(img1, (0, 0))
572
+ combined_img.paste(img2, (img1.width, 0))
573
+ st.image(combined_img, caption=f"{pdf_file.name} Pages {i+1}-{i+2}")
574
+ gpt_text = process_image_with_prompt(combined_img, "Extract the electronic text from image", model=selected_gpt_model, detail=detail_level)
575
+ combined_text += f"\n## {pdf_file.name} - Pages {i+1}-{i+2}\n\n{gpt_text}\n"
576
+ else:
577
+ pix = pages[i].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
578
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
579
+ st.image(img, caption=f"{pdf_file.name} Page {i+1}")
580
+ gpt_text = process_image_with_prompt(img, "Extract the electronic text from image", model=selected_gpt_model, detail=detail_level)
581
+ combined_text += f"\n## {pdf_file.name} - Page {i+1}\n\n{gpt_text}\n"
582
+ doc.close()
583
+ except Exception as e:
584
+ st.error(f"Error processing {pdf_file.name}: {str(e)}")
585
+ finally:
586
+ os.remove(temp_pdf_path)
587
+ output_filename = generate_filename("processed_pdf", "md")
588
+ with open(output_filename, "w", encoding="utf-8") as f:
589
+ f.write(combined_text)
590
+ st.success(f"PDF processing complete. MD file saved as {output_filename}")
591
+ st.markdown(get_download_link(output_filename, "text/markdown", "Download Processed PDF MD"), unsafe_allow_html=True)
592
+
593
+ with tab_image_process:
594
+ st.header("Image Process")
595
+ st.subheader("Upload Images for GPT-based OCR")
596
+ gpt_models = ["gpt-4o", "gpt-4o-mini"]
597
+ selected_gpt_model = st.selectbox("Select GPT Model", gpt_models, key="img_gpt_model")
598
+ detail_level = st.selectbox("Detail Level", ["auto", "low", "high"], key="img_detail_level")
599
+ prompt_img = st.text_input("Enter prompt for image processing", "Extract the electronic text from image", key="img_process_prompt")
600
+ uploaded_images = st.file_uploader("Upload image files", type=["png", "jpg", "jpeg"], accept_multiple_files=True, key="image_process_uploader")
601
+ if st.button("Process Uploaded Images", key="process_images"):
602
+ combined_text = ""
603
+ for img_file in uploaded_images:
604
+ try:
605
+ img = Image.open(img_file)
606
+ st.image(img, caption=img_file.name)
607
+ gpt_text = process_image_with_prompt(img, prompt_img, model=selected_gpt_model, detail=detail_level)
608
+ combined_text += f"\n## {img_file.name}\n\n{gpt_text}\n"
609
+ except Exception as e:
610
+ st.error(f"Error processing image {img_file.name}: {str(e)}")
611
+ output_filename = generate_filename("processed_image", "md")
612
+ with open(output_filename, "w", encoding="utf-8") as f:
613
+ f.write(combined_text)
614
+ st.success(f"Image processing complete. MD file saved as {output_filename}")
615
+ st.markdown(get_download_link(output_filename, "text/markdown", "Download Processed Image MD"), unsafe_allow_html=True)
616
+
617
+ with tab_md_gallery:
618
+ st.header("MD Gallery and GPT Processing")
619
+ gpt_models = ["gpt-4o", "gpt-4o-mini"]
620
+ selected_gpt_model = st.selectbox("Select GPT Model", gpt_models, key="md_gpt_model")
621
+ md_files = sorted(glob.glob("*.md"))
622
+ if md_files:
623
+ st.subheader("Individual File Processing")
624
+ cols = st.columns(2)
625
+ for idx, md_file in enumerate(md_files):
626
+ with cols[idx % 2]:
627
+ st.write(md_file)
628
+ if st.button(f"Process {md_file}", key=f"process_md_{md_file}"):
629
+ try:
630
+ with open(md_file, "r", encoding="utf-8") as f:
631
+ content = f.read()
632
+ prompt_md = "Summarize this into markdown outline with emojis and number the topics 1..12"
633
+ result_text = process_text_with_prompt(content, prompt_md, model=selected_gpt_model)
634
+ st.markdown(result_text)
635
+ output_filename = generate_filename(f"processed_{os.path.splitext(md_file)[0]}", "md")
636
+ with open(output_filename, "w", encoding="utf-8") as f:
637
+ f.write(result_text)
638
+ st.markdown(get_download_link(output_filename, "text/markdown", f"Download {output_filename}"), unsafe_allow_html=True)
639
+ except Exception as e:
640
+ st.error(f"Error processing {md_file}: {str(e)}")
641
+ st.subheader("Batch Processing")
642
+ st.write("Select MD files to combine and process:")
643
+ selected_md = {}
644
+ for md_file in md_files:
645
+ selected_md[md_file] = st.checkbox(md_file, key=f"checkbox_md_{md_file}")
646
+ batch_prompt = st.text_input("Enter batch processing prompt", "Summarize this into markdown outline with emojis and number the topics 1..12", key="batch_prompt")
647
+ if st.button("Process Selected MD Files", key="process_batch_md"):
648
+ combined_content = ""
649
+ for md_file, selected in selected_md.items():
650
+ if selected:
651
+ try:
652
+ with open(md_file, "r", encoding="utf-8") as f:
653
+ combined_content += f"\n## {md_file}\n" + f.read() + "\n"
654
+ except Exception as e:
655
+ st.error(f"Error reading {md_file}: {str(e)}")
656
+ if combined_content:
657
+ result_text = process_text_with_prompt(combined_content, batch_prompt, model=selected_gpt_model)
658
+ st.markdown(result_text)
659
+ output_filename = generate_filename("batch_processed_md", "md")
660
+ with open(output_filename, "w", encoding="utf-8") as f:
661
+ f.write(result_text)
662
+ st.success(f"Batch processing complete. MD file saved as {output_filename}")
663
+ st.markdown(get_download_link(output_filename, "text/markdown", "Download Batch Processed MD"), unsafe_allow_html=True)
664
+ else:
665
+ st.warning("No MD files selected.")
666
+ else:
667
+ st.warning("No MD files found.")