awacke1 commited on
Commit
0daf532
·
verified ·
1 Parent(s): 67a1ae5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -402
app.py CHANGED
@@ -1,25 +1,16 @@
1
  #!/usr/bin/env python3
2
  import os
3
- import glob
4
- import base64
5
  import streamlit as st
6
- import pandas as pd
7
- import torch
8
- from transformers import AutoModelForCausalLM, AutoTokenizer
9
- from torch.utils.data import Dataset, DataLoader
10
- import csv
11
- import time
12
- from dataclasses import dataclass
13
- from typing import Optional, Tuple
14
- import zipfile
15
- import math
16
  from PIL import Image
17
- import random
18
- import logging
 
 
19
  import numpy as np
20
- from diffusers import StableDiffusionPipeline, DDPMPipeline, EulerAncestralDiscreteScheduler
 
21
 
22
- # Logging setup with a custom buffer
23
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
24
  logger = logging.getLogger(__name__)
25
  log_records = []
@@ -32,196 +23,16 @@ logger.addHandler(LogCaptureHandler())
32
 
33
  # Page Configuration
34
  st.set_page_config(
35
- page_title="SFT Tiny Titans 🚀",
36
  page_icon="🤖",
37
  layout="wide",
38
  initial_sidebar_state="expanded",
39
- menu_items={
40
- 'Get Help': 'https://huggingface.co/awacke1',
41
- 'Report a Bug': 'https://huggingface.co/spaces/awacke1',
42
- 'About': "Tiny Titans: Small models, big dreams, and a sprinkle of chaos! 🌌"
43
- }
44
  )
45
 
46
  # Initialize st.session_state
47
  if 'captured_images' not in st.session_state:
48
  st.session_state['captured_images'] = []
49
- if 'builder' not in st.session_state:
50
- st.session_state['builder'] = None
51
- if 'model_loaded' not in st.session_state:
52
- st.session_state['model_loaded'] = False
53
-
54
- # Model Configuration Classes
55
- @dataclass
56
- class ModelConfig:
57
- name: str
58
- base_model: str
59
- size: str
60
- domain: Optional[str] = None
61
- model_type: str = "causal_lm"
62
- @property
63
- def model_path(self):
64
- return f"models/{self.name}"
65
-
66
- @dataclass
67
- class DiffusionConfig:
68
- name: str
69
- base_model: str
70
- size: str
71
- @property
72
- def model_path(self):
73
- return f"diffusion_models/{self.name}"
74
-
75
- # Datasets
76
- class SFTDataset(Dataset):
77
- def __init__(self, data, tokenizer, max_length=128):
78
- self.data = data
79
- self.tokenizer = tokenizer
80
- self.max_length = max_length
81
- def __len__(self):
82
- return len(self.data)
83
- def __getitem__(self, idx):
84
- prompt = self.data[idx]["prompt"]
85
- response = self.data[idx]["response"]
86
- full_text = f"{prompt} {response}"
87
- full_encoding = self.tokenizer(full_text, max_length=self.max_length, padding="max_length", truncation=True, return_tensors="pt")
88
- prompt_encoding = self.tokenizer(prompt, max_length=self.max_length, padding=False, truncation=True, return_tensors="pt")
89
- input_ids = full_encoding["input_ids"].squeeze()
90
- attention_mask = full_encoding["attention_mask"].squeeze()
91
- labels = input_ids.clone()
92
- prompt_len = prompt_encoding["input_ids"].shape[1]
93
- if prompt_len < self.max_length:
94
- labels[:prompt_len] = -100
95
- return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}
96
-
97
- class DiffusionDataset(Dataset):
98
- def __init__(self, images, texts):
99
- self.images = images
100
- self.texts = texts
101
- def __len__(self):
102
- return len(self.images)
103
- def __getitem__(self, idx):
104
- return {"image": self.images[idx], "text": self.texts[idx]}
105
-
106
- # Model Builders
107
- class ModelBuilder:
108
- def __init__(self):
109
- self.config = None
110
- self.model = None
111
- self.tokenizer = None
112
- self.sft_data = None
113
- self.jokes = ["Why did the AI go to therapy? Too many layers to unpack! 😂", "Training complete! Time for a binary coffee break. ☕"]
114
- def load_model(self, model_path: str, config: Optional[ModelConfig] = None):
115
- with st.spinner(f"Loading {model_path}... ⏳"):
116
- self.model = AutoModelForCausalLM.from_pretrained(model_path)
117
- self.tokenizer = AutoTokenizer.from_pretrained(model_path)
118
- if self.tokenizer.pad_token is None:
119
- self.tokenizer.pad_token = self.tokenizer.eos_token
120
- if config:
121
- self.config = config
122
- self.model.to("cuda" if torch.cuda.is_available() else "cpu")
123
- st.success(f"Model loaded! 🎉 {random.choice(self.jokes)}")
124
- return self
125
- def fine_tune_sft(self, csv_path: str, epochs: int = 3, batch_size: int = 4):
126
- self.sft_data = []
127
- with open(csv_path, "r") as f:
128
- reader = csv.DictReader(f)
129
- for row in reader:
130
- self.sft_data.append({"prompt": row["prompt"], "response": row["response"]})
131
- dataset = SFTDataset(self.sft_data, self.tokenizer)
132
- dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
133
- optimizer = torch.optim.AdamW(self.model.parameters(), lr=2e-5)
134
- self.model.train()
135
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
136
- self.model.to(device)
137
- for epoch in range(epochs):
138
- with st.spinner(f"Training epoch {epoch + 1}/{epochs}... ⚙️"):
139
- total_loss = 0
140
- for batch in dataloader:
141
- optimizer.zero_grad()
142
- input_ids = batch["input_ids"].to(device)
143
- attention_mask = batch["attention_mask"].to(device)
144
- labels = batch["labels"].to(device)
145
- outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
146
- loss = outputs.loss
147
- loss.backward()
148
- optimizer.step()
149
- total_loss += loss.item()
150
- st.write(f"Epoch {epoch + 1} completed. Average loss: {total_loss / len(dataloader):.4f}")
151
- st.success(f"SFT Fine-tuning completed! 🎉 {random.choice(self.jokes)}")
152
- return self
153
- def save_model(self, path: str):
154
- with st.spinner("Saving model... 💾"):
155
- os.makedirs(os.path.dirname(path), exist_ok=True)
156
- self.model.save_pretrained(path)
157
- self.tokenizer.save_pretrained(path)
158
- st.success(f"Model saved at {path}! ✅")
159
- def evaluate(self, prompt: str, status_container=None):
160
- self.model.eval()
161
- if status_container:
162
- status_container.write("Preparing to evaluate... 🧠")
163
- try:
164
- with torch.no_grad():
165
- inputs = self.tokenizer(prompt, return_tensors="pt", max_length=128, truncation=True).to(self.model.device)
166
- outputs = self.model.generate(**inputs, max_new_tokens=50, do_sample=True, top_p=0.95, temperature=0.7)
167
- return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
168
- except Exception as e:
169
- if status_container:
170
- status_container.error(f"Oops! Something broke: {str(e)} 💥")
171
- return f"Error: {str(e)}"
172
-
173
- class DiffusionBuilder:
174
- def __init__(self):
175
- self.config = None
176
- self.pipeline = None
177
- self.model_type = None
178
- def load_model(self, model_path: str, config: Optional[DiffusionConfig] = None, model_type: str = "StableDiffusion", download: bool = True):
179
- with st.spinner(f"{'Downloading' if download else 'Loading'} {model_path}... ⏳"):
180
- if model_type == "StableDiffusion":
181
- self.pipeline = StableDiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float32, local_files_only=not download).to("cpu")
182
- elif model_type == "DDPM":
183
- self.pipeline = DDPMPipeline.from_pretrained(model_path, torch_dtype=torch.float32, local_files_only=not download).to("cpu")
184
- self.pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(self.pipeline.scheduler.config)
185
- if config:
186
- self.config = config
187
- self.model_type = model_type
188
- st.success(f"Diffusion model {'downloaded' if download else 'loaded'}! 🎨")
189
- return self
190
- def fine_tune_sft(self, images, texts, epochs=3):
191
- dataset = DiffusionDataset(images, texts)
192
- dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
193
- optimizer = torch.optim.AdamW(self.pipeline.unet.parameters(), lr=1e-5)
194
- self.pipeline.unet.train()
195
- for epoch in range(epochs):
196
- with st.spinner(f"Training diffusion epoch {epoch + 1}/{epochs}... ⚙️"):
197
- total_loss = 0
198
- for batch in dataloader:
199
- optimizer.zero_grad()
200
- image = batch["image"][0].to(self.pipeline.device)
201
- text = batch["text"][0]
202
- latents = self.pipeline.vae.encode(torch.tensor(np.array(image)).permute(2, 0, 1).unsqueeze(0).float().to(self.pipeline.device)).latent_dist.sample()
203
- noise = torch.randn_like(latents)
204
- timesteps = torch.randint(0, self.pipeline.scheduler.num_train_timesteps, (latents.shape[0],), device=latents.device)
205
- noisy_latents = self.pipeline.scheduler.add_noise(latents, noise, timesteps)
206
- text_embeddings = self.pipeline.text_encoder(self.pipeline.tokenizer(text, return_tensors="pt").input_ids.to(self.pipeline.device))[0]
207
- pred_noise = self.pipeline.unet(noisy_latents, timesteps, encoder_hidden_states=text_embeddings).sample
208
- loss = torch.nn.functional.mse_loss(pred_noise, noise)
209
- loss.backward()
210
- optimizer.step()
211
- total_loss += loss.item()
212
- st.write(f"Epoch {epoch + 1} completed. Average loss: {total_loss / len(dataloader):.4f}")
213
- st.success("Diffusion SFT Fine-tuning completed! 🎨")
214
- return self
215
- def save_model(self, path: str):
216
- with st.spinner("Saving diffusion model... 💾"):
217
- os.makedirs(os.path.dirname(path), exist_ok=True)
218
- self.pipeline.save_pretrained(path)
219
- st.success(f"Diffusion model saved at {path}! ✅")
220
- def generate(self, prompt: str, image=None):
221
- if self.model_type == "StableDiffusion":
222
- return self.pipeline(prompt, num_inference_steps=50).images[0]
223
- elif self.model_type == "DDPM":
224
- return self.pipeline(num_inference_steps=50).images[0]
225
 
226
  # Utility Functions
227
  def generate_filename(sequence, ext="png"):
@@ -231,22 +42,6 @@ def generate_filename(sequence, ext="png"):
231
  timestamp = datetime.now(central).strftime("%d%m%Y%H%M%S%p")
232
  return f"{sequence}{timestamp}.{ext}"
233
 
234
- def get_download_link(file_path, mime_type="text/plain", label="Download"):
235
- with open(file_path, 'rb') as f:
236
- data = f.read()
237
- b64 = base64.b64encode(data).decode()
238
- return f'<a href="data:{mime_type};base64,{b64}" download="{os.path.basename(file_path)}">{label} 📥</a>'
239
-
240
- def zip_directory(directory_path, zip_path):
241
- with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
242
- for root, _, files in os.walk(directory_path):
243
- for file in files:
244
- zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), os.path.dirname(directory_path)))
245
-
246
- def get_model_files(model_type="causal_lm"):
247
- path = "models/*" if model_type == "causal_lm" else "diffusion_models/*"
248
- return [d for d in glob.glob(path) if os.path.isdir(d)]
249
-
250
  def get_gallery_files(file_types):
251
  return sorted([f for ext in file_types for f in glob.glob(f"*.{ext}")])
252
 
@@ -257,119 +52,55 @@ def update_gallery():
257
  for idx, file in enumerate(media_files[:gallery_size * 2]):
258
  with cols[idx % 2]:
259
  st.image(Image.open(file), caption=file, use_container_width=True)
260
- st.markdown(get_download_link(file, "image/png", "Download Image"), unsafe_allow_html=True)
261
-
262
- # Mock Search Tool for RAG
263
- def mock_search(query: str) -> str:
264
- if "superhero" in query.lower():
265
- return "Latest trends: Gold-plated Batman statues, VR superhero battles."
266
- return "No relevant results found."
267
 
268
- class PartyPlannerAgent:
269
- def __init__(self, model, tokenizer):
270
- self.model = model
271
- self.tokenizer = tokenizer
272
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
273
- self.model.to(self.device)
274
- def generate(self, prompt: str) -> str:
275
- self.model.eval()
276
- with torch.no_grad():
277
- inputs = self.tokenizer(prompt, return_tensors="pt", max_length=128, truncation=True).to(self.device)
278
- outputs = self.model.generate(**inputs, max_new_tokens=100, do_sample=True, top_p=0.95, temperature=0.7)
279
- return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
280
- def plan_party(self, task: str) -> pd.DataFrame:
281
- search_result = mock_search("superhero party trends")
282
- prompt = f"Given this context: '{search_result}'\n{task}"
283
- plan_text = self.generate(prompt)
284
- locations = {"Wayne Manor": (42.3601, -71.0589), "New York": (40.7128, -74.0060)}
285
- wayne_coords = locations["Wayne Manor"]
286
- travel_times = {loc: calculate_cargo_travel_time(coords, wayne_coords) for loc, coords in locations.items() if loc != "Wayne Manor"}
287
- data = [
288
- {"Location": "New York", "Travel Time (hrs)": travel_times["New York"], "Luxury Idea": "Gold-plated Batman statues"},
289
- {"Location": "Wayne Manor", "Travel Time (hrs)": 0.0, "Luxury Idea": "VR superhero battles"}
290
- ]
291
- return pd.DataFrame(data)
292
-
293
- class CVPartyPlannerAgent:
294
- def __init__(self, pipeline):
295
- self.pipeline = pipeline
296
- def generate(self, prompt: str) -> Image.Image:
297
- return self.pipeline(prompt, num_inference_steps=50).images[0]
298
- def plan_party(self, task: str) -> pd.DataFrame:
299
- search_result = mock_search("superhero party trends")
300
- prompt = f"Given this context: '{search_result}'\n{task}"
301
- data = [
302
- {"Theme": "Batman", "Image Idea": "Gold-plated Batman statue"},
303
- {"Theme": "Avengers", "Image Idea": "VR superhero battle scene"}
304
- ]
305
- return pd.DataFrame(data)
306
-
307
- def calculate_cargo_travel_time(origin_coords: Tuple[float, float], destination_coords: Tuple[float, float], cruising_speed_kmh: float = 750.0) -> float:
308
- def to_radians(degrees: float) -> float:
309
- return degrees * (math.pi / 180)
310
- lat1, lon1 = map(to_radians, origin_coords)
311
- lat2, lon2 = map(to_radians, destination_coords)
312
- EARTH_RADIUS_KM = 6371.0
313
- dlon = lon2 - lon1
314
- dlat = lat2 - lat1
315
- a = (math.sin(dlat / 2) ** 2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2)
316
- c = 2 * math.asin(math.sqrt(a))
317
- distance = EARTH_RADIUS_KM * c
318
- actual_distance = distance * 1.1
319
- flight_time = (actual_distance / cruising_speed_kmh) + 1.0
320
- return round(flight_time, 2)
321
 
322
  # Main App
323
- st.title("SFT Tiny Titans 🚀 (Small but Mighty!)")
324
 
325
- # Sidebar Galleries
326
- st.sidebar.header("Media Gallery 🎨")
327
  gallery_size = st.sidebar.slider("Gallery Size", 1, 10, 4)
328
  update_gallery()
329
 
330
- st.sidebar.subheader("Model Management 🗂️")
331
- model_type = st.sidebar.selectbox("Model Type", ["Causal LM", "Diffusion"])
332
- model_dirs = get_model_files("causal_lm" if model_type == "Causal LM" else "diffusion")
333
- selected_model = st.sidebar.selectbox("Select Saved Model", ["None"] + model_dirs)
334
- if selected_model != "None" and st.sidebar.button("Load Model 📂"):
335
- builder = ModelBuilder() if model_type == "Causal LM" else DiffusionBuilder()
336
- config = (ModelConfig if model_type == "Causal LM" else DiffusionConfig)(name=os.path.basename(selected_model), base_model="unknown", size="small")
337
- builder.load_model(selected_model, config)
338
- st.session_state['builder'] = builder
339
- st.session_state['model_loaded'] = True
340
- st.rerun()
341
 
342
  # Tabs
343
- tab1, tab2, tab3, tab4 = st.tabs(["Build Titan & Camera Snap 🌱📷", "Fine-Tune Titan 🔧", "Test Titan 🧪", "Agentic RAG Party 🌐"])
344
 
345
  with tab1:
346
- st.header("Build Titan & Camera Snap 🌱📷")
347
- st.subheader("Build Titan 🌱")
348
- model_type = st.selectbox("Model Type", ["Causal LM", "Diffusion"], key="build_type")
349
- base_model_options = {
350
- "Causal LM": ["HuggingFaceTB/SmolLM-135M", "Qwen/Qwen1.5-0.5B-Chat"],
351
- "Diffusion": [
352
- "OFA-Sys/small-stable-diffusion-v0 (LDM/Conditional, ~300 MB)",
353
- "google/ddpm-ema-celebahq-256 (DDPM/SDE/Autoregressive Proxy, ~280 MB)"
354
- ]
355
- }
356
- base_model = st.selectbox("Select Tiny Model", base_model_options[model_type])
357
- action = st.radio("Action", ["Use Model", "Download Model"], index=0 if "Causal LM" in model_type else 1)
358
- model_name = st.text_input("Model Name (for Download)", f"tiny-titan-{int(time.time())}") if action == "Download Model" else None
359
- if st.button(f"{action} ⬇️"):
360
- config = (ModelConfig if model_type == "Causal LM" else DiffusionConfig)(name=model_name or base_model.split(" ")[0], base_model=base_model.split(" ")[0], size="small")
361
- builder = ModelBuilder() if model_type == "Causal LM" else DiffusionBuilder()
362
- model_type_for_diffusion = "StableDiffusion" if "small-stable-diffusion" in base_model else "DDPM"
363
- builder.load_model(base_model.split(" ")[0], config, model_type_for_diffusion, download=action == "Download Model")
364
- if action == "Download Model":
365
- builder.save_model(config.model_path)
366
- st.session_state['builder'] = builder
367
- st.session_state['model_loaded'] = True
368
- st.rerun()
369
-
370
- st.subheader("Camera Snap 📷")
371
  slice_count = st.number_input("Image Slice Count", min_value=1, max_value=20, value=10)
372
- video_length = st.number_input("Video Length (seconds)", min_value=1, max_value=30, value=10)
373
  cols = st.columns(2)
374
  with cols[0]:
375
  st.subheader("Camera 0")
@@ -429,97 +160,52 @@ with tab1:
429
  st.image(Image.open(frame), caption=frame, use_container_width=True)
430
 
431
  with tab2:
432
- st.header("Fine-Tune Titan 🔧")
433
- if 'builder' not in st.session_state or not st.session_state.get('model_loaded', False):
434
- st.warning("Please build or load a Titan first! ⚠️")
435
- else:
436
- if isinstance(st.session_state['builder'], ModelBuilder):
437
- uploaded_csv = st.file_uploader("Upload CSV for SFT", type="csv")
438
- if uploaded_csv and st.button("Fine-Tune with Uploaded CSV 🔄"):
439
- csv_path = f"uploaded_sft_data_{int(time.time())}.csv"
440
- with open(csv_path, "wb") as f:
441
- f.write(uploaded_csv.read())
442
- new_model_name = f"{st.session_state['builder'].config.name}-sft-{int(time.time())}"
443
- new_config = ModelConfig(name=new_model_name, base_model=st.session_state['builder'].config.base_model, size="small")
444
- st.session_state['builder'].config = new_config
445
- st.session_state['builder'].fine_tune_sft(csv_path)
446
- st.session_state['builder'].save_model(new_config.model_path)
447
- zip_path = f"{new_config.model_path}.zip"
448
- zip_directory(new_config.model_path, zip_path)
449
- st.markdown(get_download_link(zip_path, "application/zip", "Download Fine-Tuned Titan"), unsafe_allow_html=True)
450
- elif isinstance(st.session_state['builder'], DiffusionBuilder):
451
- captured_images = get_gallery_files(["png"])
452
- if len(captured_images) >= 2:
453
- demo_data = [{"image": img, "text": f"Superhero {os.path.basename(img).split('.')[0]}"} for img in captured_images[:min(len(captured_images), slice_count)]]
454
- edited_data = st.data_editor(pd.DataFrame(demo_data), num_rows="dynamic")
455
- if st.button("Fine-Tune with Dataset 🔄"):
456
- images = [Image.open(row["image"]) for _, row in edited_data.iterrows()]
457
- texts = [row["text"] for _, row in edited_data.iterrows()]
458
- new_model_name = f"{st.session_state['builder'].config.name}-sft-{int(time.time())}"
459
- new_config = DiffusionConfig(name=new_model_name, base_model=st.session_state['builder'].config.base_model, size="small")
460
- st.session_state['builder'].config = new_config
461
- st.session_state['builder'].fine_tune_sft(images, texts)
462
- st.session_state['builder'].save_model(new_config.model_path)
463
- zip_path = f"{new_config.model_path}.zip"
464
- zip_directory(new_config.model_path, zip_path)
465
- st.markdown(get_download_link(zip_path, "application/zip", "Download Fine-Tuned Diffusion Model"), unsafe_allow_html=True)
466
- csv_path = f"sft_dataset_{int(time.time())}.csv"
467
- with open(csv_path, "w", newline="") as f:
468
- writer = csv.writer(f)
469
- writer.writerow(["image", "text"])
470
- for _, row in edited_data.iterrows():
471
- writer.writerow([row["image"], row["text"]])
472
- st.markdown(get_download_link(csv_path, "text/csv", "Download SFT Dataset CSV"), unsafe_allow_html=True)
473
 
474
  with tab3:
475
- st.header("Test Titan 🧪")
476
- if 'builder' not in st.session_state or not st.session_state.get('model_loaded', False):
477
- st.warning("Please build or load a Titan first! ⚠️")
478
- else:
479
- captured_images = get_gallery_files(["png"])
480
- if captured_images:
481
- selected_image = st.selectbox("Select Image", captured_images)
482
- prompt = st.text_area("Enter Text Prompt", f"Superhero {os.path.basename(selected_image).split('.')[0]}")
483
- pipeline_options = ["Stable Diffusion (LDM/Conditional)", "DDPM (DDPM/SDE/Autoregressive Proxy)"] if isinstance(st.session_state['builder'], DiffusionBuilder) else ["Causal LM"]
484
- selected_pipeline = st.selectbox("Select Pipeline", pipeline_options)
485
- if st.button("Run Test 🚀"):
486
- if isinstance(st.session_state['builder'], ModelBuilder):
487
- result = st.session_state['builder'].evaluate(prompt)
488
- st.write(f"**Generated Response**: {result}")
489
- elif isinstance(st.session_state['builder'], DiffusionBuilder):
490
- if selected_pipeline == "Stable Diffusion (LDM/Conditional)":
491
- image = st.session_state['builder'].generate(prompt)
492
- else: # DDPM
493
- image = st.session_state['builder'].generate(prompt)
494
- st.image(image, caption=f"Generated from {selected_pipeline}")
495
 
496
  with tab4:
497
- st.header("Agentic RAG Party 🌐")
498
- if 'builder' not in st.session_state or not st.session_state.get('model_loaded', False):
499
- st.warning("Please build or load a Titan first! ⚠️")
500
- else:
501
- if isinstance(st.session_state['builder'], ModelBuilder):
502
- if st.button("Run NLP RAG Demo 🎉"):
503
- agent = PartyPlannerAgent(st.session_state['builder'].model, st.session_state['builder'].tokenizer)
504
- task = "Plan a luxury superhero-themed party at Wayne Manor."
505
- plan_df = agent.plan_party(task)
506
- st.dataframe(plan_df)
507
- elif isinstance(st.session_state['builder'], DiffusionBuilder):
508
- if st.button("Run CV RAG Demo 🎉"):
509
- agent = CVPartyPlannerAgent(st.session_state['builder'].pipeline)
510
- task = "Generate images for a luxury superhero-themed party."
511
- plan_df = agent.plan_party(task)
512
- st.dataframe(plan_df)
513
- for _, row in plan_df.iterrows():
514
- image = agent.generate(row["Image Idea"])
515
- st.image(image, caption=f"{row['Theme']} - {row['Image Idea']}")
516
-
517
- # Display Logs
518
- st.sidebar.subheader("Action Logs 📜")
519
- log_container = st.sidebar.empty()
520
- with log_container:
521
- for record in log_records:
522
- st.write(f"{record.asctime} - {record.levelname} - {record.message}")
523
 
524
  # Initial Gallery Update
525
  update_gallery()
 
1
  #!/usr/bin/env python3
2
  import os
 
 
3
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
4
  from PIL import Image
5
+ import torch
6
+ from transformers import AutoProcessor, Qwen2VLForConditionalGeneration, AutoTokenizer, AutoModel
7
+ from diffusers import StableDiffusionPipeline
8
+ import cv2
9
  import numpy as np
10
+ import logging
11
+ from io import BytesIO
12
 
13
+ # Logging setup
14
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
15
  logger = logging.getLogger(__name__)
16
  log_records = []
 
23
 
24
  # Page Configuration
25
  st.set_page_config(
26
+ page_title="AI Vision Titans 🚀",
27
  page_icon="🤖",
28
  layout="wide",
29
  initial_sidebar_state="expanded",
30
+ menu_items={'About': "AI Vision Titans: OCR, Image Gen, Line Drawings on CPU! 🌌"}
 
 
 
 
31
  )
32
 
33
  # Initialize st.session_state
34
  if 'captured_images' not in st.session_state:
35
  st.session_state['captured_images'] = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  # Utility Functions
38
  def generate_filename(sequence, ext="png"):
 
42
  timestamp = datetime.now(central).strftime("%d%m%Y%H%M%S%p")
43
  return f"{sequence}{timestamp}.{ext}"
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  def get_gallery_files(file_types):
46
  return sorted([f for ext in file_types for f in glob.glob(f"*.{ext}")])
47
 
 
52
  for idx, file in enumerate(media_files[:gallery_size * 2]):
53
  with cols[idx % 2]:
54
  st.image(Image.open(file), caption=file, use_container_width=True)
 
 
 
 
 
 
 
55
 
56
+ # Model Loaders (Simplified, CPU-focused)
57
+ def load_ocr_qwen2vl():
58
+ model_id = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
59
+ processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
60
+ model = Qwen2VLForConditionalGeneration.from_pretrained(model_id, trust_remote_code=True, torch_dtype=torch.float32).to("cpu").eval()
61
+ return processor, model
62
+
63
+ def load_ocr_got():
64
+ model_id = "ucaslcl/GOT-OCR2_0"
65
+ tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
66
+ model = AutoModel.from_pretrained(model_id, trust_remote_code=True, torch_dtype=torch.float32).to("cpu").eval()
67
+ return tokenizer, model
68
+
69
+ def load_image_gen():
70
+ model_id = "OFA-Sys/small-stable-diffusion-v0" # Small, CPU-friendly
71
+ pipeline = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float32).to("cpu")
72
+ return pipeline
73
+
74
+ def load_line_drawer():
75
+ # Simplified from your Torch Space (assuming a UNet-like model for edge detection)
76
+ # Placeholder: Using OpenCV edge detection as a minimal CPU example
77
+ def edge_detection(image):
78
+ img_np = np.array(image.convert("RGB"))
79
+ gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
80
+ edges = cv2.Canny(gray, 100, 200)
81
+ return Image.fromarray(edges)
82
+ return edge_detection
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  # Main App
85
+ st.title("AI Vision Titans 🚀 (OCR, Gen, Drawings!)")
86
 
87
+ # Sidebar Gallery
88
+ st.sidebar.header("Captured Images 🎨")
89
  gallery_size = st.sidebar.slider("Gallery Size", 1, 10, 4)
90
  update_gallery()
91
 
92
+ st.sidebar.subheader("Action Logs 📜")
93
+ log_container = st.sidebar.empty()
94
+ with log_container:
95
+ for record in log_records:
96
+ st.write(f"{record.asctime} - {record.levelname} - {record.message}")
 
 
 
 
 
 
97
 
98
  # Tabs
99
+ tab1, tab2, tab3, tab4 = st.tabs(["Camera Snap 📷", "Test OCR 🔍", "Test Image Gen 🎨", "Test Line Drawings ✏️"])
100
 
101
  with tab1:
102
+ st.header("Camera Snap 📷")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  slice_count = st.number_input("Image Slice Count", min_value=1, max_value=20, value=10)
 
104
  cols = st.columns(2)
105
  with cols[0]:
106
  st.subheader("Camera 0")
 
160
  st.image(Image.open(frame), caption=frame, use_container_width=True)
161
 
162
  with tab2:
163
+ st.header("Test OCR 🔍")
164
+ captured_images = get_gallery_files(["png"])
165
+ if captured_images:
166
+ selected_image = st.selectbox("Select Image", captured_images)
167
+ image = Image.open(selected_image)
168
+ st.image(image, caption="Input Image", use_container_width=True)
169
+ ocr_model = st.selectbox("Select OCR Model", ["Qwen2-VL-OCR-2B", "GOT-OCR2_0"])
170
+ prompt = st.text_area("Prompt", "Extract text from the image")
171
+ if st.button("Run OCR 🚀"):
172
+ if ocr_model == "Qwen2-VL-OCR-2B":
173
+ processor, model = load_ocr_qwen2vl()
174
+ inputs = processor(text=[prompt], images=[image], return_tensors="pt").to("cpu")
175
+ outputs = model.generate(**inputs, max_new_tokens=1024)
176
+ text = processor.decode(outputs[0], skip_special_tokens=True)
177
+ else: # GOT-OCR2_0
178
+ tokenizer, model = load_ocr_got()
179
+ with open(selected_image, "rb") as f:
180
+ img_bytes = f.read()
181
+ img = Image.open(BytesIO(img_bytes))
182
+ text = model.chat(tokenizer, img, ocr_type='ocr')
183
+ st.text_area("OCR Result", text, height=200)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
  with tab3:
186
+ st.header("Test Image Gen 🎨")
187
+ captured_images = get_gallery_files(["png"])
188
+ if captured_images:
189
+ selected_image = st.selectbox("Select Image", captured_images)
190
+ image = Image.open(selected_image)
191
+ st.image(image, caption="Reference Image", use_container_width=True)
192
+ prompt = st.text_area("Prompt", "Generate a similar superhero image")
193
+ if st.button("Run Image Gen 🚀"):
194
+ pipeline = load_image_gen()
195
+ gen_image = pipeline(prompt, num_inference_steps=50).images[0]
196
+ st.image(gen_image, caption="Generated Image", use_container_width=True)
 
 
 
 
 
 
 
 
 
197
 
198
  with tab4:
199
+ st.header("Test Line Drawings ✏️")
200
+ captured_images = get_gallery_files(["png"])
201
+ if captured_images:
202
+ selected_image = st.selectbox("Select Image", captured_images)
203
+ image = Image.open(selected_image)
204
+ st.image(image, caption="Input Image", use_container_width=True)
205
+ if st.button("Run Line Drawing 🚀"):
206
+ edge_fn = load_line_drawer()
207
+ line_drawing = edge_fn(image)
208
+ st.image(line_drawing, caption="Line Drawing", use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
  # Initial Gallery Update
211
  update_gallery()