awacke1 commited on
Commit
fc736fc
·
verified ·
1 Parent(s): a9c30d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +170 -13
app.py CHANGED
@@ -7,11 +7,13 @@ import shutil
7
  import streamlit as st
8
  import pandas as pd
9
  import torch
 
 
10
  from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
11
  from diffusers import StableDiffusionPipeline
12
  from torch.utils.data import Dataset, DataLoader
13
  import csv
14
- import fitz # PyMuPDF
15
  import requests
16
  from PIL import Image
17
  import cv2
@@ -46,7 +48,7 @@ st.set_page_config(
46
  menu_items={
47
  'Get Help': 'https://huggingface.co/awacke1',
48
  'Report a Bug': 'https://huggingface.co/spaces/awacke1',
49
- 'About': "AI Vision & SFT Titans: PDFs, OCR, Image Gen, Line Drawings, and SFT on CPU! 🌌"
50
  }
51
  )
52
 
@@ -114,6 +116,87 @@ class DiffusionDataset(Dataset):
114
  def __getitem__(self, idx):
115
  return {"image": self.images[idx], "text": self.texts[idx]}
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  # Model Builders
118
  class ModelBuilder:
119
  def __init__(self):
@@ -343,22 +426,18 @@ async def process_pdf_snapshot(pdf_path, mode="thumbnail"):
343
  start_time = time.time()
344
  status = st.empty()
345
  status.text(f"Processing PDF Snapshot ({mode})... (0s)")
346
- doc = fitz.open(pdf_path)
347
  output_files = []
348
  if mode == "thumbnail":
349
- page = doc[0]
350
- pix = page.get_pixmap(matrix=fitz.Matrix(0.5, 0.5))
351
  output_file = generate_filename("thumbnail", "png")
352
- pix.save(output_file)
353
  output_files.append(output_file)
354
  elif mode == "twopage":
355
- for i in range(min(2, len(doc))):
356
- page = doc[i]
357
- pix = page.get_pixmap(matrix=fitz.Matrix(1.0, 1.0))
358
  output_file = generate_filename(f"twopage_{i}", "png")
359
- pix.save(output_file)
360
  output_files.append(output_file)
361
- doc.close()
362
  elapsed = int(time.time() - start_time)
363
  status.text(f"PDF Snapshot ({mode}) completed in {elapsed}s!")
364
  for file in output_files:
@@ -383,12 +462,55 @@ async def process_ocr(image, output_file):
383
  update_gallery()
384
  return result
385
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
386
  # Main App
387
  st.title("AI Vision & SFT Titans 🚀")
388
 
389
  # Sidebar
390
  st.sidebar.header("Captured Files 📜")
391
  gallery_size = st.sidebar.slider("Gallery Size", 1, 10, 4)
 
 
 
 
 
 
 
 
 
 
 
 
392
  update_gallery()
393
 
394
  st.sidebar.subheader("Model Management 🗂️")
@@ -416,9 +538,9 @@ with history_container:
416
  st.write(entry)
417
 
418
  # Tabs
419
- tab1, tab2, tab3, tab4, tab5, tab6, tab7, tab8 = st.tabs([
420
  "Camera Snap 📷", "Download PDFs 📥", "Build Titan 🌱", "Fine-Tune Titan 🔧",
421
- "Test Titan 🧪", "Agentic RAG Party 🌐", "Test OCR 🔍", "Test Image Gen 🎨"
422
  ])
423
 
424
  with tab1:
@@ -669,5 +791,40 @@ with tab8:
669
  else:
670
  st.warning("No images captured yet. Use Camera Snap or Download PDFs first!")
671
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
672
  # Initial Gallery Update
673
  update_gallery()
 
7
  import streamlit as st
8
  import pandas as pd
9
  import torch
10
+ import torch.nn as nn
11
+ import torch.nn.functional as F
12
  from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
13
  from diffusers import StableDiffusionPipeline
14
  from torch.utils.data import Dataset, DataLoader
15
  import csv
16
+ from pdf2image import convert_from_path # Replaced fitz with pdf2image
17
  import requests
18
  from PIL import Image
19
  import cv2
 
48
  menu_items={
49
  'Get Help': 'https://huggingface.co/awacke1',
50
  'Report a Bug': 'https://huggingface.co/spaces/awacke1',
51
+ 'About': "AI Vision & SFT Titans: PDFs, OCR, Image Gen, Line Drawings, Custom Diffusion, and SFT on CPU! 🌌"
52
  }
53
  )
54
 
 
116
  def __getitem__(self, idx):
117
  return {"image": self.images[idx], "text": self.texts[idx]}
118
 
119
+ class TinyDiffusionDataset(Dataset):
120
+ def __init__(self, images):
121
+ self.images = [torch.tensor(np.array(img.convert("RGB")).transpose(2, 0, 1), dtype=torch.float32) / 255.0 for img in images]
122
+ def __len__(self):
123
+ return len(self.images)
124
+ def __getitem__(self, idx):
125
+ return self.images[idx]
126
+
127
+ # Custom Tiny Diffusion Model
128
+ class TinyUNet(nn.Module):
129
+ def __init__(self, in_channels=3, out_channels=3):
130
+ super(TinyUNet, self).__init__()
131
+ self.down1 = nn.Conv2d(in_channels, 32, 3, padding=1)
132
+ self.down2 = nn.Conv2d(32, 64, 3, padding=1, stride=2)
133
+ self.mid = nn.Conv2d(64, 128, 3, padding=1)
134
+ self.up1 = nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1)
135
+ self.up2 = nn.Conv2d(64 + 32, 32, 3, padding=1)
136
+ self.out = nn.Conv2d(32, out_channels, 3, padding=1)
137
+ self.time_embed = nn.Linear(1, 64)
138
+
139
+ def forward(self, x, t):
140
+ t_embed = F.relu(self.time_embed(t.unsqueeze(-1)))
141
+ t_embed = t_embed.view(t_embed.size(0), t_embed.size(1), 1, 1)
142
+
143
+ x1 = F.relu(self.down1(x))
144
+ x2 = F.relu(self.down2(x1))
145
+ x_mid = F.relu(self.mid(x2)) + t_embed
146
+ x_up1 = F.relu(self.up1(x_mid))
147
+ x_up2 = F.relu(self.up2(torch.cat([x_up1, x1], dim=1)))
148
+ return self.out(x_up2)
149
+
150
+ class TinyDiffusion:
151
+ def __init__(self, model, timesteps=100):
152
+ self.model = model
153
+ self.timesteps = timesteps
154
+ self.beta = torch.linspace(0.0001, 0.02, timesteps)
155
+ self.alpha = 1 - self.beta
156
+ self.alpha_cumprod = torch.cumprod(self.alpha, dim=0)
157
+
158
+ def train(self, images, epochs=50):
159
+ dataset = TinyDiffusionDataset(images)
160
+ dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
161
+ optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-4)
162
+ device = torch.device("cpu")
163
+ self.model.to(device)
164
+ for epoch in range(epochs):
165
+ total_loss = 0
166
+ for x in dataloader:
167
+ x = x.to(device)
168
+ t = torch.randint(0, self.timesteps, (x.size(0),), device=device).float()
169
+ noise = torch.randn_like(x)
170
+ alpha_t = self.alpha_cumprod[t.long()].view(-1, 1, 1, 1)
171
+ x_noisy = torch.sqrt(alpha_t) * x + torch.sqrt(1 - alpha_t) * noise
172
+ pred_noise = self.model(x_noisy, t)
173
+ loss = F.mse_loss(pred_noise, noise)
174
+ optimizer.zero_grad()
175
+ loss.backward()
176
+ optimizer.step()
177
+ total_loss += loss.item()
178
+ logger.info(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(dataloader):.4f}")
179
+ return self
180
+
181
+ def generate(self, size=(64, 64), steps=100):
182
+ device = torch.device("cpu")
183
+ x = torch.randn(1, 3, size[0], size[1], device=device)
184
+ for t in reversed(range(steps)):
185
+ t_tensor = torch.full((1,), t, device=device, dtype=torch.float32)
186
+ alpha_t = self.alpha_cumprod[t].view(-1, 1, 1, 1)
187
+ pred_noise = self.model(x, t_tensor)
188
+ x = (x - (1 - self.alpha[t]) / torch.sqrt(1 - alpha_t) * pred_noise) / torch.sqrt(self.alpha[t])
189
+ if t > 0:
190
+ x += torch.sqrt(self.beta[t]) * torch.randn_like(x)
191
+ x = torch.clamp(x * 255, 0, 255).byte()
192
+ return Image.fromarray(x.squeeze(0).permute(1, 2, 0).cpu().numpy())
193
+
194
+ def upscale(self, image, scale_factor=2):
195
+ img_tensor = torch.tensor(np.array(image.convert("RGB")).transpose(2, 0, 1), dtype=torch.float32).unsqueeze(0) / 255.0
196
+ upscaled = F.interpolate(img_tensor, scale_factor=scale_factor, mode='bilinear', align_corners=False)
197
+ upscaled = torch.clamp(upscaled * 255, 0, 255).byte()
198
+ return Image.fromarray(upscaled.squeeze(0).permute(1, 2, 0).cpu().numpy())
199
+
200
  # Model Builders
201
  class ModelBuilder:
202
  def __init__(self):
 
426
  start_time = time.time()
427
  status = st.empty()
428
  status.text(f"Processing PDF Snapshot ({mode})... (0s)")
429
+ images = convert_from_path(pdf_path, dpi=200) # Convert PDF to images
430
  output_files = []
431
  if mode == "thumbnail":
432
+ img = images[0].resize((int(images[0].width * 0.5), int(images[0].height * 0.5)), Image.Resampling.LANCZOS)
 
433
  output_file = generate_filename("thumbnail", "png")
434
+ img.save(output_file)
435
  output_files.append(output_file)
436
  elif mode == "twopage":
437
+ for i in range(min(2, len(images))):
 
 
438
  output_file = generate_filename(f"twopage_{i}", "png")
439
+ images[i].save(output_file)
440
  output_files.append(output_file)
 
441
  elapsed = int(time.time() - start_time)
442
  status.text(f"PDF Snapshot ({mode}) completed in {elapsed}s!")
443
  for file in output_files:
 
462
  update_gallery()
463
  return result
464
 
465
+ async def process_image_gen(prompt, output_file):
466
+ start_time = time.time()
467
+ status = st.empty()
468
+ status.text("Processing Image Gen... (0s)")
469
+ pipeline = StableDiffusionPipeline.from_pretrained("OFA-Sys/small-stable-diffusion-v0", torch_dtype=torch.float32).to("cpu")
470
+ gen_image = pipeline(prompt, num_inference_steps=20).images[0]
471
+ elapsed = int(time.time() - start_time)
472
+ status.text(f"Image Gen completed in {elapsed}s!")
473
+ gen_image.save(output_file)
474
+ if output_file not in st.session_state['captured_files']:
475
+ st.session_state['captured_files'].append(output_file)
476
+ update_gallery()
477
+ return gen_image
478
+
479
+ async def process_custom_diffusion(images, output_file, model_name):
480
+ start_time = time.time()
481
+ status = st.empty()
482
+ status.text(f"Training {model_name}... (0s)")
483
+ unet = TinyUNet()
484
+ diffusion = TinyDiffusion(unet)
485
+ diffusion.train(images)
486
+ gen_image = diffusion.generate()
487
+ upscaled_image = diffusion.upscale(gen_image, scale_factor=2)
488
+ elapsed = int(time.time() - start_time)
489
+ status.text(f"{model_name} completed in {elapsed}s!")
490
+ upscaled_image.save(output_file)
491
+ if output_file not in st.session_state['captured_files']:
492
+ st.session_state['captured_files'].append(output_file)
493
+ update_gallery()
494
+ return upscaled_image
495
+
496
  # Main App
497
  st.title("AI Vision & SFT Titans 🚀")
498
 
499
  # Sidebar
500
  st.sidebar.header("Captured Files 📜")
501
  gallery_size = st.sidebar.slider("Gallery Size", 1, 10, 4)
502
+ def update_gallery():
503
+ media_files = get_gallery_files(["png", "txt"])
504
+ if media_files:
505
+ cols = st.sidebar.columns(2)
506
+ for idx, file in enumerate(media_files[:gallery_size * 2]):
507
+ with cols[idx % 2]:
508
+ if file.endswith(".png"):
509
+ st.image(Image.open(file), caption=file, use_container_width=True)
510
+ elif file.endswith(".txt"):
511
+ with open(file, "r") as f:
512
+ content = f.read()
513
+ st.text(content[:50] + "..." if len(content) > 50 else content, help=file)
514
  update_gallery()
515
 
516
  st.sidebar.subheader("Model Management 🗂️")
 
538
  st.write(entry)
539
 
540
  # Tabs
541
+ tab1, tab2, tab3, tab4, tab5, tab6, tab7, tab8, tab9 = st.tabs([
542
  "Camera Snap 📷", "Download PDFs 📥", "Build Titan 🌱", "Fine-Tune Titan 🔧",
543
+ "Test Titan 🧪", "Agentic RAG Party 🌐", "Test OCR 🔍", "Test Image Gen 🎨", "Custom Diffusion 🎨🤓"
544
  ])
545
 
546
  with tab1:
 
791
  else:
792
  st.warning("No images captured yet. Use Camera Snap or Download PDFs first!")
793
 
794
+ with tab9:
795
+ st.header("Custom Diffusion 🎨🤓")
796
+ st.write("Unleash your inner artist with our tiny diffusion models!")
797
+ captured_files = get_gallery_files(["png"])
798
+ if captured_files:
799
+ st.subheader("Select Images to Train")
800
+ selected_files = st.multiselect("Pick Images", captured_files, key="diffusion_select")
801
+ images = [Image.open(file) for file in selected_files]
802
+
803
+ model_options = [
804
+ ("PixelTickler 🎨✨", "OFA-Sys/small-stable-diffusion-v0"),
805
+ ("DreamWeaver 🌙🖌️", "stabilityai/stable-diffusion-2-base"),
806
+ ("TinyArtBot 🤖🖼️", "custom")
807
+ ]
808
+ model_choice = st.selectbox("Choose Your Diffusion Dynamo", [opt[0] for opt in model_options], key="diffusion_model")
809
+ model_name = next(opt[1] for opt in model_options if opt[0] == model_choice)
810
+
811
+ if st.button("Train & Generate 🚀", key="diffusion_run"):
812
+ output_file = generate_filename("custom_diffusion", "png")
813
+ st.session_state['processing']['diffusion'] = True
814
+ if model_name == "custom":
815
+ result = asyncio.run(process_custom_diffusion(images, output_file, model_choice))
816
+ else:
817
+ builder = DiffusionBuilder()
818
+ builder.load_model(model_name)
819
+ result = builder.generate("A superhero scene inspired by captured images")
820
+ result.save(output_file)
821
+ st.session_state['captured_files'].append(output_file)
822
+ st.session_state['history'].append(f"Custom Diffusion: {model_choice} -> {output_file}")
823
+ st.image(result, caption=f"{model_choice} Masterpiece", use_container_width=True)
824
+ st.success(f"Image saved to {output_file}")
825
+ st.session_state['processing']['diffusion'] = False
826
+ else:
827
+ st.warning("No images captured yet. Use Camera Snap or Download PDFs first!")
828
+
829
  # Initial Gallery Update
830
  update_gallery()