awacke1 commited on
Commit
75c09a2
·
verified ·
1 Parent(s): 7d67450

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +349 -167
app.py CHANGED
@@ -1,189 +1,371 @@
1
- # 😂 Shebangin’ it like it’s 1999—Python 3, let’s roll!
2
-
3
- # 🧳 Importing the whole circus—get ready for a wild ride!
4
  import os
 
 
5
  import time
6
  import pandas as pd
7
- import gradio as gr
8
- from gradio import DeepLinkButton # 🔥 Deep links from 5.23.0!
9
- import pkg_resources # 🕵️‍♂️ Sneaky version checker!
10
- import logging
11
- import glob
12
- from PIL import Image
 
13
  import fitz
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- # 📜 Logging setup—because even AIs need a diary!
16
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
17
  logger = logging.getLogger(__name__)
18
- log_records = [] # 🗒️ Dear diary, today I logged a thing...
19
 
20
- # 🤓 LogCaptureHandler class—catching logs like a pro fisherman!
21
  class LogCaptureHandler(logging.Handler):
22
- # 🎣 Hooking those logs right outta the stream!
23
  def emit(self, record):
24
  log_records.append(record)
25
 
26
- logger.addHandler(LogCaptureHandler()) # 🐟 Adding the hook to the logger—catch ‘em all!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- # 😂 Time to stamp files like a boss—unique names incoming!
29
- def generate_filename(sequence, ext):
30
- timestamp = time.strftime("%d%m%Y%H%M%S") # ⏰ Clock says “name me now!”
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  return f"{sequence}_{timestamp}.{ext}"
32
 
33
- # 🕵️‍♂️ Sherlocking the filesystem for your precious files!
34
- def get_gallery_files(file_types):
35
- return sorted(list(set([f for ext in file_types for f in glob.glob(f"*.{ext}")]))) # 🗃️ Deduped treasure hunt!
36
 
37
- # 🖼️ Snap those pics like a paparazzi—upload images with flair!
38
- def upload_images(files, history, selected_files):
39
- if not files:
40
- return "No files uploaded", history, selected_files # 😢 No pics, no party!
41
- uploaded = []
42
- for file in files:
43
- ext = file.name.split('.')[-1].lower() # 🕵️ Sniffing out the file type!
44
- if ext in ["jpg", "png"]:
45
- output_path = f"img_{int(time.time())}_{os.path.basename(file.name)}" # 🏷️ Tagging it fresh!
46
- with open(output_path, "wb") as f:
47
- f.write(file.read()) # 📸 Snap saved!
48
- uploaded.append(output_path)
49
- history.append(f"Uploaded Image: {output_path}") # 📜 Logging the fame!
50
- selected_files[output_path] = False # ✅ Unchecked for now!
51
- return f"Uploaded {len(uploaded)} images", history, selected_files
52
-
53
- # 📜 Scribble some docs—PDFs and more, oh what a bore!
54
- def upload_documents(files, history, selected_files):
55
- if not files:
56
- return "No files uploaded", history, selected_files # 📝 No docs, no drama!
57
- uploaded = []
58
- for file in files:
59
- ext = file.name.split('.')[-1].lower() # 🕵️ Peeking at the paper type!
60
- if ext in ["pdf"]: # Limiting to PDF for demo simplicity
61
- output_path = f"doc_{int(time.time())}_{os.path.basename(file.name)}" # 🏷️ Stamping the scroll!
62
  with open(output_path, "wb") as f:
63
- f.write(file.read()) # 📜 Scroll secured!
64
- uploaded.append(output_path)
65
- history.append(f"Uploaded Document: {output_path}") # 📜 Noted in history!
66
- selected_files[output_path] = False # ✅ Still on the bench!
67
- return f"Uploaded {len(uploaded)} documents", history, selected_files
68
-
69
- # 📊 Data nerd alert—CSV uploads for the win!
70
- def upload_datasets(files, history, selected_files):
71
- if not files:
72
- return "No files uploaded", history, selected_files # 📈 No data, no geek-out!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  uploaded = []
74
- for file in files:
75
- ext = file.name.split('.')[-1].lower() # 🕵️ Cracking the data code!
76
- if ext == "csv":
77
- output_path = f"data_{int(time.time())}_{os.path.basename(file.name)}" # 🏷️ Labeling the stats!
78
  with open(output_path, "wb") as f:
79
- f.write(file.read()) # 📊 Stats stashed!
80
  uploaded.append(output_path)
81
- history.append(f"Uploaded Dataset: {output_path}") # 📜 Data’s in the books!
82
- selected_files[output_path] = False # ✅ Not picked yet!
83
- return f"Uploaded {len(uploaded)} datasets", history, selected_files
84
-
85
- # 🖼️ Gallery glow-up—show off all your files in style!
86
- def update_galleries(history, selected_files):
87
- galleries = {
88
- "images": get_gallery_files(["jpg", "png"]), # 🖼️ Picture parade!
89
- "documents": get_gallery_files(["pdf"]), # 📜 Doc depot!
90
- "datasets": get_gallery_files(["csv"]), # 📊 Data den!
91
- }
92
- gallery_outputs = {
93
- "images": [(Image.open(f), os.path.basename(f)) for f in galleries["images"]], # 🖼️ Picture perfect!
94
- "documents": [(Image.frombytes("RGB", fitz.open(f)[0].get_pixmap(matrix=fitz.Matrix(0.5, 0.5)).size, fitz.open(f)[0].get_pixmap(matrix=fitz.Matrix(0.5, 0.5)).samples), os.path.basename(f)) for f in galleries["documents"]], # 📜 Doc dazzle!
95
- "datasets": [(f, os.path.basename(f)) for f in galleries["datasets"]], # 📊 Data delight!
96
- }
97
- history.append(f"Updated galleries: {sum(len(g) for g in galleries.values())} files") # 📜 Gallery grand total!
98
- return gallery_outputs, history, selected_files
99
-
100
- # 📂 Sidebar swagger—download links that scream “take me home!”
101
- def update_sidebar(history, selected_files):
102
- all_files = get_gallery_files(["jpg", "png", "pdf", "csv"])
103
- file_list = [gr.File(label=os.path.basename(f), value=f) for f in all_files] # 📥 Download goodies!
104
- return file_list, history
105
-
106
- # Check it or wreck it—toggle those selections like a pro!
107
- def toggle_selection(file_list, selected_files):
108
- for file in file_list:
109
- selected_files[file] = not selected_files.get(file, False) # ✅ Flip the switch, baby!
110
- return selected_files
111
-
112
- # 📊 Dataframe demo—showing off Gradio 5.21.0+ dataframe mastery!
113
- def get_dataframe():
114
- df = pd.DataFrame({
115
- "Name": ["Alice", "Bob", "Charlie"],
116
- "Age": [25, 30, 35],
117
- "Score": [95.5, 87.0, 92.3]
118
- })
119
- return df
120
-
121
- # 📜 Mermaid.js demo—flowchart fun from 5.23.0!
122
- def get_mermaid_chart():
123
- return """```mermaid
124
- graph TD
125
- A[Upload Files] --> B[View Gallery]
126
- B --> C[Select Files]
127
- C --> D[Generate Output]
128
- D --> E[Deep Link to Result]
129
- ```"""
130
-
131
- # 🎨 Code editor demo—Jedi completion from 5.23.0!
132
- def get_code_snippet():
133
- return "def hello(name):\n return f'Hello, {name}!'"
134
-
135
- # 🎪 Gradio UI—step right up to the AI circus!
136
- with gr.Blocks(title="Gradio 5.23.0 Mastery Demo 🚀") as demo:
137
- gr.Markdown(f"# Gradio 5.23.0 Mastery Demo 🚀\nRunning Gradio version: {pkg_resources.get_distribution('gradio').version}") # 🎉 Welcome to the big top with version check!
138
- history = gr.State(value=[]) # 📜 The ringmaster’s logbook!
139
- selected_files = gr.State(value={}) # ✅ The chosen ones, ready to perform!
140
-
141
- with gr.Row():
142
- with gr.Column(scale=1):
143
- gr.Markdown("## 📁 Files") # 🗃️ The file circus tent!
144
- sidebar_files = gr.Files(label="Downloads", height=300) # 📥 Grab your souvenirs here!
145
-
146
- with gr.Column(scale=3):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  with gr.Row():
148
- gr.Markdown("## 🛠️ Toolbar") # 🔧 The circus control panel!
149
- select_btn = gr.Button(" Select") # ✅ Pick your performers!
150
-
151
- with gr.Tabs():
152
- with gr.TabItem("📤 Upload"): # 📤 The upload trapeze!
153
- with gr.Row():
154
- img_upload = gr.File(label="🖼️ Images (jpg/png)", file_count="multiple") # 🖼️ Picture trapeze!
155
- doc_upload = gr.File(label="📜 Docs (pdf)", file_count="multiple") # 📜 Doc drop!
156
- with gr.Row():
157
- data_upload = gr.File(label="📊 Data (csv)", file_count="multiple") # 📊 Data dive!
158
- upload_status = gr.Textbox(label="Status") # 📢 Ringmaster’s update!
159
- gr.Button("📤 Upload Images").click(upload_images, inputs=[img_upload, history, selected_files], outputs=[upload_status, history, selected_files]).then(update_galleries, inputs=[history, selected_files], outputs=[gr.Gallery(), gr.Gallery(), gr.Gallery(), history, selected_files]).then(update_sidebar, inputs=[history, selected_files], outputs=[sidebar_files, history])
160
- gr.Button("📤 Upload Docs").click(upload_documents, inputs=[doc_upload, history, selected_files], outputs=[upload_status, history, selected_files]).then(update_galleries, inputs=[history, selected_files], outputs=[gr.Gallery(), gr.Gallery(), gr.Gallery(), history, selected_files]).then(update_sidebar, inputs=[history, selected_files], outputs=[sidebar_files, history])
161
- gr.Button("📤 Upload Data").click(upload_datasets, inputs=[data_upload, history, selected_files], outputs=[upload_status, history, selected_files]).then(update_galleries, inputs=[history, selected_files], outputs=[gr.Gallery(), gr.Gallery(), gr.Gallery(), history, selected_files]).then(update_sidebar, inputs=[history, selected_files], outputs=[sidebar_files, history])
162
-
163
- with gr.TabItem("🖼️ Gallery"): # 🖼️ The big top showcase!
164
- img_gallery = gr.Gallery(label="🖼️ Images (jpg/png)", columns=4, height="auto") # 🖼️ Picture parade!
165
- doc_gallery = gr.Gallery(label="📜 Docs (pdf)", columns=4, height="auto") # 📜 Doc depot!
166
- data_gallery = gr.Gallery(label="📊 Data (csv)", columns=4, height="auto") # 📊 Data den!
167
- gr.Button("🔄 Refresh").click(update_galleries, inputs=[history, selected_files], outputs=[img_gallery, doc_gallery, data_gallery, history, selected_files]).then(update_sidebar, inputs=[history, selected_files], outputs=[sidebar_files, history])
168
-
169
- with gr.TabItem("🔍 Features"): # 🔍 The magic trick tent!
170
- gr.Markdown("### 📊 Dataframe Mastery (5.21.0)") # 📊 Flexing new dataframe tricks!
171
- df_output = gr.Dataframe(value=get_dataframe, interactive=True, static_columns=["Name"], wrap=True) # 🔥 Static columns, drag selection from 5.21.0!
172
-
173
- gr.Markdown("### 📜 Mermaid.js Flowchart (5.23.0)") # 📜 Mermaid.js from 5.23.0!
174
- mermaid_output = gr.Markdown(value=get_mermaid_chart) # 🌐 Flowchart fun!
175
-
176
- gr.Markdown("### 🎨 Code Editor with Jedi Completion (5.23.0)") # 🎨 Jedi power from 5.23.0!
177
- code_output = gr.Code(value=get_code_snippet, language="python", interactive=True) # ✍️ Code with autocompletion!
178
-
179
- gr.Markdown("### 💥 Deep Link Button (5.23.0)") # 💥 Deep links from 5.23.0!
180
- DeepLinkButton(label="Link to Latest Output", variant="secondary", deep_link="/gallery/images") # 🔥 Secondary variant from 5.23.0!
181
-
182
- with gr.TabItem("📜 History"): # 📜 The logbook showcase!
183
- history_output = gr.Textbox(label="History", lines=5, value="\n".join(history.value), interactive=False) # 📜 What’s been cooking?
184
-
185
- # 🎉 Auto-update history on load—Gradio 5.20.1 event listener vibes!
186
- demo.load(lambda h: "\n".join(h[-5:]), inputs=[history], outputs=[history_output])
187
-
188
- # 🎉 Launch the circus—step right up, folks!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  demo.launch()
 
1
+ #!/usr/bin/env python3
 
 
2
  import os
3
+ import glob
4
+ import base64
5
  import time
6
  import pandas as pd
7
+ import torch
8
+ import torch.nn as nn
9
+ import torch.nn.functional as F
10
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
11
+ from diffusers import StableDiffusionPipeline
12
+ from torch.utils.data import Dataset, DataLoader
13
+ import csv
14
  import fitz
15
+ import requests
16
+ from PIL import Image
17
+ import numpy as np
18
+ import logging
19
+ import asyncio
20
+ import aiofiles
21
+ from io import BytesIO
22
+ from dataclasses import dataclass
23
+ from typing import Optional, Tuple
24
+ import zipfile
25
+ import math
26
+ import random
27
+ import re
28
+ import gradio as gr
29
 
 
30
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
31
  logger = logging.getLogger(__name__)
32
+ log_records = []
33
 
 
34
  class LogCaptureHandler(logging.Handler):
 
35
  def emit(self, record):
36
  log_records.append(record)
37
 
38
+ logger.addHandler(LogCaptureHandler())
39
+
40
+ @dataclass
41
+ class ModelConfig:
42
+ name: str
43
+ base_model: str
44
+ size: str
45
+ domain: Optional[str] = None
46
+ model_type: str = "causal_lm"
47
+ @property
48
+ def model_path(self):
49
+ return f"models/{self.name}"
50
+
51
+ @dataclass
52
+ class DiffusionConfig:
53
+ name: str
54
+ base_model: str
55
+ size: str
56
+ domain: Optional[str] = None
57
+ @property
58
+ def model_path(self):
59
+ return f"diffusion_models/{self.name}"
60
 
61
+ class ModelBuilder:
62
+ def __init__(self):
63
+ self.config = None
64
+ self.model = None
65
+ self.tokenizer = None
66
+ self.jokes = ["Why did the AI go to therapy? Too many layers to unpack! 😂", "Training complete! Time for a binary coffee break. ☕"]
67
+ def load_model(self, model_path: str, config: Optional[ModelConfig] = None):
68
+ self.model = AutoModelForCausalLM.from_pretrained(model_path)
69
+ self.tokenizer = AutoTokenizer.from_pretrained(model_path)
70
+ if self.tokenizer.pad_token is None:
71
+ self.tokenizer.pad_token = self.tokenizer.eos_token
72
+ if config:
73
+ self.config = config
74
+ self.model.to("cuda" if torch.cuda.is_available() else "cpu")
75
+ return self
76
+ def save_model(self, path: str):
77
+ os.makedirs(os.path.dirname(path), exist_ok=True)
78
+ self.model.save_pretrained(path)
79
+ self.tokenizer.save_pretrained(path)
80
+
81
+ class DiffusionBuilder:
82
+ def __init__(self):
83
+ self.config = None
84
+ self.pipeline = None
85
+ def load_model(self, model_path: str, config: Optional[DiffusionConfig] = None):
86
+ self.pipeline = StableDiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float32).to("cpu")
87
+ if config:
88
+ self.config = config
89
+ return self
90
+ def save_model(self, path: str):
91
+ os.makedirs(os.path.dirname(path), exist_ok=True)
92
+ self.pipeline.save_pretrained(path)
93
+ def generate(self, prompt: str):
94
+ return self.pipeline(prompt, num_inference_steps=20).images[0]
95
+
96
+ def generate_filename(sequence, ext="png"):
97
+ timestamp = time.strftime("%d%m%Y%H%M%S")
98
  return f"{sequence}_{timestamp}.{ext}"
99
 
100
+ def pdf_url_to_filename(url):
101
+ safe_name = re.sub(r'[<>:"/\\|?*]', '_', url)
102
+ return f"{safe_name}.pdf"
103
 
104
+ def get_gallery_files(file_types=["png", "pdf"]):
105
+ return sorted(list(set([f for ext in file_types for f in glob.glob(f"*.{ext}")]))) # Deduplicate files
106
+
107
+ def get_model_files(model_type="causal_lm"):
108
+ path = "models/*" if model_type == "causal_lm" else "diffusion_models/*"
109
+ dirs = [d for d in glob.glob(path) if os.path.isdir(d)]
110
+ return dirs if dirs else ["None"]
111
+
112
+ def download_pdf(url, output_path):
113
+ try:
114
+ response = requests.get(url, stream=True, timeout=10)
115
+ if response.status_code == 200:
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  with open(output_path, "wb") as f:
117
+ for chunk in response.iter_content(chunk_size=8192):
118
+ f.write(chunk)
119
+ return True
120
+ except requests.RequestException as e:
121
+ logger.error(f"Failed to download {url}: {e}")
122
+ return False
123
+
124
+ async def process_pdf_snapshot(pdf_path, mode="single"):
125
+ doc = fitz.open(pdf_path)
126
+ output_files = []
127
+ if mode == "single":
128
+ page = doc[0]
129
+ pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
130
+ output_file = generate_filename("single", "png")
131
+ pix.save(output_file)
132
+ output_files.append(output_file)
133
+ elif mode == "twopage":
134
+ for i in range(min(2, len(doc))):
135
+ page = doc[i]
136
+ pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
137
+ output_file = generate_filename(f"twopage_{i}", "png")
138
+ pix.save(output_file)
139
+ output_files.append(output_file)
140
+ elif mode == "allpages":
141
+ for i in range(len(doc)):
142
+ page = doc[i]
143
+ pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
144
+ output_file = generate_filename(f"page_{i}", "png")
145
+ pix.save(output_file)
146
+ output_files.append(output_file)
147
+ doc.close()
148
+ return output_files
149
+
150
+ async def process_ocr(image, output_file):
151
+ tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
152
+ model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, torch_dtype=torch.float32).to("cpu").eval()
153
+ temp_file = f"temp_{int(time.time())}.png"
154
+ image.save(temp_file)
155
+ result = model.chat(tokenizer, temp_file, ocr_type='ocr')
156
+ os.remove(temp_file)
157
+ async with aiofiles.open(output_file, "w") as f:
158
+ await f.write(result)
159
+ return result
160
+
161
+ async def process_image_gen(prompt, output_file, builder):
162
+ if builder and isinstance(builder, DiffusionBuilder) and builder.pipeline:
163
+ pipeline = builder.pipeline
164
+ else:
165
+ pipeline = StableDiffusionPipeline.from_pretrained("OFA-Sys/small-stable-diffusion-v0", torch_dtype=torch.float32).to("cpu")
166
+ gen_image = pipeline(prompt, num_inference_steps=20).images[0]
167
+ gen_image.save(output_file)
168
+ return gen_image
169
+
170
+ # Gradio Interface Functions
171
+ def update_gallery(history, asset_checkboxes):
172
+ all_files = get_gallery_files()
173
+ gallery_images = []
174
+ for file in all_files[:5]: # Limit to 5 for display
175
+ if file.endswith('.png'):
176
+ gallery_images.append(Image.open(file))
177
+ else:
178
+ doc = fitz.open(file)
179
+ pix = doc[0].get_pixmap(matrix=fitz.Matrix(0.5, 0.5))
180
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
181
+ gallery_images.append(img)
182
+ doc.close()
183
+ history.append(f"Gallery updated: {len(all_files)} files")
184
+ return gallery_images, history, asset_checkboxes
185
+
186
+ def camera_snap(image, cam_id, history, asset_checkboxes, cam_files):
187
+ if image is not None:
188
+ filename = generate_filename(f"cam{cam_id}")
189
+ image.save(filename)
190
+ history.append(f"Snapshot from Cam {cam_id}: {filename}")
191
+ asset_checkboxes[filename] = True
192
+ cam_files[cam_id] = filename
193
+ return f"Image saved as {filename}", Image.open(filename), history, asset_checkboxes, cam_files
194
+ elif cam_files.get(cam_id) and os.path.exists(cam_files[cam_id]):
195
+ return f"Showing previous capture: {cam_files[cam_id]}", Image.open(cam_files[cam_id]), history, asset_checkboxes, cam_files
196
+ return "No image captured", None, history, asset_checkboxes, cam_files
197
+
198
+ def download_pdfs(urls, history, asset_checkboxes):
199
+ urls = urls.strip().split("\n")
200
+ downloaded = []
201
+ for url in urls:
202
+ if url:
203
+ output_path = pdf_url_to_filename(url)
204
+ if download_pdf(url, output_path):
205
+ downloaded.append(output_path)
206
+ history.append(f"Downloaded PDF: {output_path}")
207
+ asset_checkboxes[output_path] = True
208
+ return f"Downloaded {len(downloaded)} PDFs", history, asset_checkboxes
209
+
210
+ def upload_pdfs(pdf_files, history, asset_checkboxes):
211
  uploaded = []
212
+ for pdf_file in pdf_files:
213
+ if pdf_file:
214
+ output_path = f"uploaded_{int(time.time())}_{pdf_file.name}"
 
215
  with open(output_path, "wb") as f:
216
+ f.write(pdf_file.read())
217
  uploaded.append(output_path)
218
+ history.append(f"Uploaded PDF: {output_path}")
219
+ asset_checkboxes[output_path] = True
220
+ return f"Uploaded {len(uploaded)} PDFs", history, asset_checkboxes
221
+
222
+ def snapshot_pdfs(mode, history, asset_checkboxes):
223
+ selected_pdfs = [path for path in get_gallery_files() if path.endswith('.pdf') and asset_checkboxes.get(path, False)]
224
+ if not selected_pdfs:
225
+ return "No PDFs selected", [], history, asset_checkboxes
226
+ snapshots = []
227
+ mode_key = {"Single Page (High-Res)": "single", "Two Pages (High-Res)": "twopage", "All Pages (High-Res)": "allpages"}[mode]
228
+ for pdf_path in selected_pdfs:
229
+ snap_files = asyncio.run(process_pdf_snapshot(pdf_path, mode_key))
230
+ for snap in snap_files:
231
+ snapshots.append(Image.open(snap))
232
+ asset_checkboxes[snap] = True
233
+ history.append(f"Snapshot {mode_key}: {snap}")
234
+ return f"Generated {len(snapshots)} snapshots", snapshots, history, asset_checkboxes
235
+
236
+ def process_ocr_all(history, asset_checkboxes):
237
+ all_files = get_gallery_files()
238
+ if not all_files:
239
+ return "No assets to OCR", history, asset_checkboxes
240
+ full_text = "# OCR Results\n\n"
241
+ for file in all_files:
242
+ if file.endswith('.png'):
243
+ image = Image.open(file)
244
+ else:
245
+ doc = fitz.open(file)
246
+ pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
247
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
248
+ doc.close()
249
+ output_file = generate_filename(f"ocr_{os.path.basename(file)}", "txt")
250
+ result = asyncio.run(process_ocr(image, output_file))
251
+ full_text += f"## {os.path.basename(file)}\n\n{result}\n\n"
252
+ history.append(f"OCR Test: {file} -> {output_file}")
253
+ md_output_file = f"full_ocr_{int(time.time())}.md"
254
+ with open(md_output_file, "w") as f:
255
+ f.write(full_text)
256
+ return f"Full OCR saved to {md_output_file}", history, asset_checkboxes
257
+
258
+ def process_ocr_single(file_path, history, asset_checkboxes):
259
+ if not file_path:
260
+ return "No file selected", None, "", history, asset_checkboxes
261
+ if file_path.endswith('.png'):
262
+ image = Image.open(file_path)
263
+ else:
264
+ doc = fitz.open(file_path)
265
+ pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
266
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
267
+ doc.close()
268
+ output_file = generate_filename("ocr_output", "txt")
269
+ result = asyncio.run(process_ocr(image, output_file))
270
+ history.append(f"OCR Test: {file_path} -> {output_file}")
271
+ return f"OCR output saved to {output_file}", image, result, history, asset_checkboxes
272
+
273
+ def build_model(model_type, base_model, model_name, domain, history):
274
+ config = (ModelConfig if model_type == "Causal LM" else DiffusionConfig)(name=model_name, base_model=base_model, size="small", domain=domain)
275
+ builder = ModelBuilder() if model_type == "Causal LM" else DiffusionBuilder()
276
+ builder.load_model(base_model, config)
277
+ builder.save_model(config.model_path)
278
+ history.append(f"Built {model_type} model: {model_name}")
279
+ return builder, f"Model saved to {config.model_path}", history
280
+
281
+ def image_gen(prompt, file_path, builder, history, asset_checkboxes):
282
+ if not file_path:
283
+ return "No file selected", None, history, asset_checkboxes
284
+ if file_path.endswith('.png'):
285
+ image = Image.open(file_path)
286
+ else:
287
+ doc = fitz.open(file_path)
288
+ pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
289
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
290
+ doc.close()
291
+ output_file = generate_filename("gen_output", "png")
292
+ gen_image = asyncio.run(process_image_gen(prompt, output_file, builder))
293
+ history.append(f"Image Gen Test: {prompt} -> {output_file}")
294
+ asset_checkboxes[output_file] = True
295
+ return f"Image saved to {output_file}", gen_image, history, asset_checkboxes
296
+
297
+ # Gradio UI
298
+ with gr.Blocks(title="AI Vision & SFT Titans 🚀") as demo:
299
+ gr.Markdown("# AI Vision & SFT Titans 🚀")
300
+ history = gr.State(value=[])
301
+ builder = gr.State(value=None)
302
+ asset_checkboxes = gr.State(value={})
303
+ cam_files = gr.State(value={})
304
+
305
  with gr.Row():
306
+ with gr.Column(scale=1):
307
+ gr.Markdown("## Captured Files 📜")
308
+ gallery_output = gr.Gallery(label="Asset Gallery", columns=2, height="auto")
309
+ gr.Button("Update Gallery").click(update_gallery, inputs=[history, asset_checkboxes], outputs=[gallery_output, history, asset_checkboxes])
310
+ gr.Markdown("## History 📜")
311
+ history_output = gr.Textbox(label="History", lines=5, interactive=False)
312
+ gr.Markdown("## Action Logs 📜")
313
+ log_output = gr.Textbox(label="Logs", value="\n".join([f"{r.asctime} - {r.levelname} - {r.message}" for r in log_records]), lines=5, interactive=False)
314
+
315
+ with gr.Column(scale=3):
316
+ with gr.Tabs():
317
+ with gr.TabItem("Camera Snap 📷"):
318
+ with gr.Row():
319
+ cam0_input = gr.Image(type="pil", label="Camera 0")
320
+ cam1_input = gr.Image(type="pil", label="Camera 1")
321
+ with gr.Row():
322
+ cam0_output = gr.Textbox(label="Cam 0 Status")
323
+ cam1_output = gr.Textbox(label="Cam 1 Status")
324
+ with gr.Row():
325
+ cam0_image = gr.Image(label="Cam 0 Preview")
326
+ cam1_image = gr.Image(label="Cam 1 Preview")
327
+ gr.Button("Capture Cam 0").click(camera_snap, inputs=[cam0_input, gr.State(value=0), history, asset_checkboxes, cam_files], outputs=[cam0_output, cam0_image, history, asset_checkboxes, cam_files])
328
+ gr.Button("Capture Cam 1").click(camera_snap, inputs=[cam1_input, gr.State(value=1), history, asset_checkboxes, cam_files], outputs=[cam1_output, cam1_image, history, asset_checkboxes, cam_files])
329
+
330
+ with gr.TabItem("Download PDFs 📥"):
331
+ url_input = gr.Textbox(label="Enter PDF URLs (one per line)", lines=5)
332
+ pdf_upload = gr.File(label="Upload PDFs", file_count="multiple", type="binary")
333
+ pdf_output = gr.Textbox(label="Status")
334
+ snapshot_mode = gr.Dropdown(["Single Page (High-Res)", "Two Pages (High-Res)", "All Pages (High-Res)"], label="Snapshot Mode")
335
+ snapshot_output = gr.Textbox(label="Snapshot Status")
336
+ snapshot_images = gr.Gallery(label="Snapshots", columns=2, height="auto")
337
+ gr.Button("Download URLs").click(download_pdfs, inputs=[url_input, history, asset_checkboxes], outputs=[pdf_output, history, asset_checkboxes])
338
+ gr.Button("Upload PDFs").click(upload_pdfs, inputs=[pdf_upload, history, asset_checkboxes], outputs=[pdf_output, history, asset_checkboxes])
339
+ gr.Button("Snapshot Selected").click(snapshot_pdfs, inputs=[snapshot_mode, history, asset_checkboxes], outputs=[snapshot_output, snapshot_images, history, asset_checkboxes])
340
+
341
+ with gr.TabItem("Test OCR 🔍"):
342
+ all_files = gr.Dropdown(choices=get_gallery_files(), label="Select File")
343
+ ocr_output = gr.Textbox(label="Status")
344
+ ocr_image = gr.Image(label="Input Image")
345
+ ocr_result = gr.Textbox(label="OCR Result", lines=5)
346
+ gr.Button("OCR All Assets").click(process_ocr_all, inputs=[history, asset_checkboxes], outputs=[ocr_output, history, asset_checkboxes])
347
+ gr.Button("OCR Selected").click(process_ocr_single, inputs=[all_files, history, asset_checkboxes], outputs=[ocr_output, ocr_image, ocr_result, history, asset_checkboxes])
348
+
349
+ with gr.TabItem("Build Titan 🌱"):
350
+ model_type = gr.Dropdown(["Causal LM", "Diffusion"], label="Model Type")
351
+ base_model = gr.Dropdown(
352
+ choices=["HuggingFaceTB/SmolLM-135M", "Qwen/Qwen1.5-0.5B-Chat"],
353
+ label="Base Model",
354
+ value="HuggingFaceTB/SmolLM-135M"
355
+ )
356
+ model_name = gr.Textbox(label="Model Name", value=f"tiny-titan-{int(time.time())}")
357
+ domain = gr.Textbox(label="Target Domain", value="general")
358
+ build_output = gr.Textbox(label="Status")
359
+ gr.Button("Build").click(build_model, inputs=[model_type, base_model, model_name, domain, history], outputs=[builder, build_output, history])
360
+
361
+ with gr.TabItem("Test Image Gen 🎨"):
362
+ gen_file = gr.Dropdown(choices=get_gallery_files(), label="Select Reference File")
363
+ gen_prompt = gr.Textbox(label="Prompt", value="Generate a neon superhero version of this image")
364
+ gen_output = gr.Textbox(label="Status")
365
+ gen_image = gr.Image(label="Generated Image")
366
+ gr.Button("Generate").click(image_gen, inputs=[gen_prompt, gen_file, builder, history, asset_checkboxes], outputs=[gen_output, gen_image, history, asset_checkboxes])
367
+
368
+ # Update history output on every interaction
369
+ demo.load(lambda h: "\n".join(h[-5:]), inputs=[history], outputs=[history_output])
370
+
371
  demo.launch()