Spaces:

awacke1
/

TorchTransformers-CV-SFT

Running

App Files Files Community

awacke1 commited on Mar 24

Commit

353aa7f

verified ·

1 Parent(s): d6f4b6a

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -163

app.py CHANGED Viewed

@@ -9,11 +9,11 @@ import pandas as pd
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel, AutoProcessor, Qwen2VLForConditionalGeneration, TrOCRProcessor, VisionEncoderDecoderModel
 from diffusers import StableDiffusionPipeline
 from torch.utils.data import Dataset, DataLoader
 import csv
-import fitz  # PyMuPDF
 import requests
 from PIL import Image
 import cv2
@@ -28,10 +28,7 @@ import zipfile
 import math
 import random
 import re
-from datetime import datetime
-import pytz
-# Logging setup with custom buffer
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 logger = logging.getLogger(__name__)
 log_records = []
@@ -42,7 +39,6 @@ class LogCaptureHandler(logging.Handler):
 logger.addHandler(LogCaptureHandler())
-# Page Configuration
 st.set_page_config(
     page_title="AI Vision & SFT Titans 🚀",
     page_icon="🤖",
@@ -55,9 +51,8 @@ st.set_page_config(
     }
 )
-# Initialize st.session_state
 if 'history' not in st.session_state:
-    st.session_state['history'] = []  # Flat list for history
 if 'builder' not in st.session_state:
     st.session_state['builder'] = None
 if 'model_loaded' not in st.session_state:
@@ -68,10 +63,7 @@ if 'pdf_checkboxes' not in st.session_state:
     st.session_state['pdf_checkboxes'] = {}
 if 'downloaded_pdfs' not in st.session_state:
     st.session_state['downloaded_pdfs'] = {}
-if 'captured_images' not in st.session_state:
-    st.session_state['captured_images'] = []
-# Model Configuration Classes
 @dataclass
 class ModelConfig:
     name: str
@@ -88,12 +80,11 @@ class DiffusionConfig:
     name: str
     base_model: str
     size: str
-    domain: Optional[str] = None  # Fixed to include domain
     @property
     def model_path(self):
         return f"diffusion_models/{self.name}"
-# Datasets
 class SFTDataset(Dataset):
     def __init__(self, data, tokenizer, max_length=128):
         self.data = data
@@ -132,7 +123,6 @@ class TinyDiffusionDataset(Dataset):
     def __getitem__(self, idx):
         return self.images[idx]
-# Custom Tiny Diffusion Model
 class TinyUNet(nn.Module):
     def __init__(self, in_channels=3, out_channels=3):
         super(TinyUNet, self).__init__()
@@ -205,7 +195,6 @@ class TinyDiffusion:
         upscaled = torch.clamp(upscaled * 255, 0, 255).byte()
         return Image.fromarray(upscaled.squeeze(0).permute(1, 2, 0).cpu().numpy())
-# Model Builders
 class ModelBuilder:
     def __init__(self):
         self.config = None
@@ -316,10 +305,8 @@ class DiffusionBuilder:
     def generate(self, prompt: str):
         return self.pipeline(prompt, num_inference_steps=20).images[0]
-# Utility Functions
 def generate_filename(sequence, ext="png"):
-    central = pytz.timezone('US/Central')
-    timestamp = datetime.now(central).strftime("%d%m%Y%H%M%S%p")
     return f"{sequence}_{timestamp}.{ext}"
 def pdf_url_to_filename(url):
@@ -342,7 +329,7 @@ def get_model_files(model_type="causal_lm"):
     path = "models/*" if model_type == "causal_lm" else "diffusion_models/*"
     return [d for d in glob.glob(path) if os.path.isdir(d)]
-def get_gallery_files(file_types=["png", "txt"]):
     return sorted([f for ext in file_types for f in glob.glob(f"*.{ext}")])
 def get_pdf_files():
@@ -360,33 +347,6 @@ def download_pdf(url, output_path):
         logger.error(f"Failed to download {url}: {e}")
     return False
-# Model Loaders for New App Features
-def load_ocr_qwen2vl():
-    model_id = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
-    processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
-    model = Qwen2VLForConditionalGeneration.from_pretrained(model_id, trust_remote_code=True, torch_dtype=torch.float32).to("cpu").eval()
-    return processor, model
-def load_ocr_trocr():
-    model_id = "microsoft/trocr-small-handwritten"
-    processor = TrOCRProcessor.from_pretrained(model_id)
-    model = VisionEncoderDecoderModel.from_pretrained(model_id, torch_dtype=torch.float32).to("cpu").eval()
-    return processor, model
-def load_image_gen():
-    model_id = "OFA-Sys/small-stable-diffusion-v0"
-    pipeline = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float32).to("cpu")
-    return pipeline
-def load_line_drawer():
-    def edge_detection(image):
-        img_np = np.array(image.convert("RGB"))
-        gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
-        edges = cv2.Canny(gray, 100, 200)
-        return Image.fromarray(edges)
-    return edge_detection
-# Async Processing Functions
 async def process_pdf_snapshot(pdf_path, mode="single"):
     start_time = time.time()
     status = st.empty()
@@ -423,31 +383,17 @@ async def process_pdf_snapshot(pdf_path, mode="single"):
         status.error(f"Failed to process PDF: {str(e)}")
         return []
-async def process_ocr(image, prompt, model_name, output_file):
     start_time = time.time()
     status = st.empty()
-    status.text(f"Processing {model_name} OCR... (0s)")
-    if model_name == "Qwen2-VL-OCR-2B":
-        processor, model = load_ocr_qwen2vl()
-        messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": prompt}]}]
-        text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-        inputs = processor(text=[text], images=[image], return_tensors="pt", padding=True).to("cpu")
-        outputs = model.generate(**inputs, max_new_tokens=1024)
-        result = processor.batch_decode(outputs, skip_special_tokens=True)[0]
-    elif model_name == "TrOCR-Small":
-        processor, model = load_ocr_trocr()
-        pixel_values = processor(images=image, return_tensors="pt").pixel_values.to("cpu")
-        outputs = model.generate(pixel_values)
-        result = processor.batch_decode(outputs, skip_special_tokens=True)[0]
-    else:  # GOT-OCR2_0 (original from Backup 6)
-        tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
-        model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, torch_dtype=torch.float32).to("cpu").eval()
-        result = model.chat(tokenizer, image, ocr_type='ocr')
     elapsed = int(time.time() - start_time)
-    status.text(f"{model_name} OCR completed in {elapsed}s!")
     async with aiofiles.open(output_file, "w") as f:
         await f.write(result)
-    st.session_state['captured_images'].append(output_file)
     update_gallery()
     return result
@@ -455,29 +401,29 @@ async def process_image_gen(prompt, output_file):
     start_time = time.time()
     status = st.empty()
     status.text("Processing Image Gen... (0s)")
-    pipeline = load_image_gen()
     gen_image = pipeline(prompt, num_inference_steps=20).images[0]
     elapsed = int(time.time() - start_time)
     status.text(f"Image Gen completed in {elapsed}s!")
     gen_image.save(output_file)
-    st.session_state['captured_images'].append(output_file)
     update_gallery()
     return gen_image
-async def process_line_drawing(image, output_file):
     start_time = time.time()
     status = st.empty()
-    status.text("Processing Line Drawing... (0s)")
-    edge_fn = load_line_drawer()
-    line_drawing = edge_fn(image)
     elapsed = int(time.time() - start_time)
-    status.text(f"Line Drawing completed in {elapsed}s!")
-    line_drawing.save(output_file)
-    st.session_state['captured_images'].append(output_file)
     update_gallery()
-    return line_drawing
-# Mock Search Tool for RAG
 def mock_search(query: str) -> str:
     if "superhero" in query.lower():
         return "Latest trends: Gold-plated Batman statues, VR superhero battles."
@@ -493,7 +439,6 @@ def mock_duckduckgo_search(query: str) -> str:
         """
     return "No relevant results found."
-# Agent Classes
 class PartyPlannerAgent:
     def __init__(self, model, tokenizer):
         self.model = model
@@ -558,26 +503,19 @@ def calculate_cargo_travel_time(origin_coords: Tuple[float, float], destination_
     flight_time = (actual_distance / cruising_speed_kmh) + 1.0
     return round(flight_time, 2)
-# Main App
 st.title("AI Vision & SFT Titans 🚀")
-# Sidebar
 st.sidebar.header("Captured Files 📜")
 gallery_size = st.sidebar.slider("Gallery Size", 1, 10, 2)
 def update_gallery():
-    media_files = get_gallery_files(["png", "txt"])
     pdf_files = get_pdf_files()
     if media_files or pdf_files:
-        st.sidebar.subheader("Images & Text 📸")
         cols = st.sidebar.columns(2)
         for idx, file in enumerate(media_files[:gallery_size * 2]):
             with cols[idx % 2]:
-                if file.endswith(".png"):
-                    st.image(Image.open(file), caption=os.path.basename(file), use_container_width=True)
-                elif file.endswith(".txt"):
-                    with open(file, "r") as f:
-                        content = f.read()
-                        st.text(content[:50] + "..." if len(content) > 50 else content, help=file)
         st.sidebar.subheader("PDF Downloads 📖")
         for pdf_file in pdf_files[:gallery_size * 2]:
             st.markdown(get_download_link(pdf_file, "application/pdf", f"📥 Grab {os.path.basename(pdf_file)}"), unsafe_allow_html=True)
@@ -607,11 +545,9 @@ with history_container:
     for entry in st.session_state['history'][-gallery_size * 2:]:
         st.write(entry)
-# Tabs
-tab1, tab2, tab3, tab4, tab5, tab6, tab7, tab8, tab9, tab10 = st.tabs([
     "Camera Snap 📷", "Download PDFs 📥", "Build Titan 🌱", "Fine-Tune Titan 🔧",
-    "Test Titan 🧪", "Agentic RAG Party 🌐", "Test OCR 🔍", "Test Image Gen 🎨",
-    "Test Line Drawings ✏️", "Custom Diffusion 🎨🤓"
 ])
 with tab1:
@@ -622,55 +558,43 @@ with tab1:
         cam0_img = st.camera_input("Take a picture - Cam 0", key="cam0")
         if cam0_img:
             filename = generate_filename("cam0")
-            if filename not in st.session_state['captured_images']:
-                with open(filename, "wb") as f:
-                    f.write(cam0_img.getvalue())
-                st.image(Image.open(filename), caption="Camera 0", use_container_width=True)
-                logger.info(f"Saved snapshot from Camera 0: {filename}")
-                st.session_state['captured_images'].append(filename)
-                update_gallery()
     with cols[1]:
         cam1_img = st.camera_input("Take a picture - Cam 1", key="cam1")
         if cam1_img:
             filename = generate_filename("cam1")
-            if filename not in st.session_state['captured_images']:
-                with open(filename, "wb") as f:
-                    f.write(cam1_img.getvalue())
-                st.image(Image.open(filename), caption="Camera 1", use_container_width=True)
-                logger.info(f"Saved snapshot from Camera 1: {filename}")
-                st.session_state['captured_images'].append(filename)
-                update_gallery()
-    st.subheader("Burst Capture")
-    slice_count = st.number_input("Number of Frames", min_value=1, max_value=20, value=10, key="burst_count")
-    if st.button("Start Burst Capture 📸"):
-        st.session_state['burst_frames'] = []
-        placeholder = st.empty()
-        for i in range(slice_count):
-            with placeholder.container():
-                st.write(f"Capturing frame {i+1}/{slice_count}...")
-                img = st.camera_input(f"Frame {i}", key=f"burst_{i}_{time.time()}")
-                if img:
-                    filename = generate_filename(f"burst_{i}")
-                    if filename not in st.session_state['captured_images']:
-                        with open(filename, "wb") as f:
-                            f.write(img.getvalue())
-                        st.session_state['burst_frames'].append(filename)
-                        logger.info(f"Saved burst frame {i}: {filename}")
-                        st.image(Image.open(filename), caption=filename, use_container_width=True)
-                    time.sleep(0.5)
-        st.session_state['captured_images'].extend([f for f in st.session_state['burst_frames'] if f not in st.session_state['captured_images']])
-        update_gallery()
-        placeholder.success(f"Captured {len(st.session_state['burst_frames'])} frames!")
 with tab2:
     st.header("Download PDFs 📥")
     if st.button("Examples 📚"):
         example_urls = [
-            "https://arxiv.org/pdf/2308.03892", "https://arxiv.org/pdf/1912.01703", "https://arxiv.org/pdf/2408.11039",
-            "https://arxiv.org/pdf/2109.10282", "https://arxiv.org/pdf/2112.10752", "https://arxiv.org/pdf/2308.11236",
-            "https://arxiv.org/pdf/1706.03762", "https://arxiv.org/pdf/2006.11239", "https://arxiv.org/pdf/2305.11207",
-            "https://arxiv.org/pdf/2106.09685", "https://arxiv.org/pdf/2005.11401", "https://arxiv.org/pdf/2106.10504"
         ]
         st.session_state['pdf_urls'] = "\n".join(example_urls)
@@ -716,7 +640,9 @@ with tab2:
                     st.image(img, caption=os.path.basename(pdf_path), use_container_width=True)
                     checkbox_key = f"pdf_{pdf_path}"
                     st.session_state['pdf_checkboxes'][checkbox_key] = st.checkbox(
-                        "Use for SFT/Input", value=st.session_state['pdf_checkboxes'].get(checkbox_key, False), key=checkbox_key
                     )
                     st.markdown(get_download_link(pdf_path, "application/pdf", "Snag It! 📥"), unsafe_allow_html=True)
                     if st.button("Zap It! 🗑️", key=f"delete_{pdf_path}"):
@@ -916,12 +842,13 @@ with tab7:
                 image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
                 doc.close()
             st.image(image, caption="Input Image", use_container_width=True)
-            ocr_model = st.selectbox("Select OCR Model", ["Qwen2-VL-OCR-2B", "TrOCR-Small", "GOT-OCR2_0"], key="ocr_model_select")
-            prompt = st.text_area("Prompt", "Extract text from the image", key="ocr_prompt")
             if st.button("Run OCR 🚀", key="ocr_run"):
                 output_file = generate_filename("ocr_output", "txt")
                 st.session_state['processing']['ocr'] = True
-                result = asyncio.run(process_ocr(image, prompt, ocr_model, output_file))
                 st.text_area("OCR Result", result, height=200, key="ocr_result")
                 st.success(f"OCR output saved to {output_file}")
                 st.session_state['processing']['ocr'] = False
@@ -949,6 +876,9 @@ with tab8:
                 output_file = generate_filename("gen_output", "png")
                 st.session_state['processing']['gen'] = True
                 result = asyncio.run(process_image_gen(prompt, output_file))
                 st.image(result, caption="Generated Image", use_container_width=True)
                 st.success(f"Image saved to {output_file}")
                 st.session_state['processing']['gen'] = False
@@ -956,32 +886,6 @@ with tab8:
         st.warning("No images or PDFs captured yet. Use Camera Snap or Download PDFs first!")
 with tab9:
-    st.header("Test Line Drawings ✏️")
-    captured_files = get_gallery_files(["png"])
-    selected_pdfs = [path for key, path in st.session_state['downloaded_pdfs'].items() if st.session_state['pdf_checkboxes'].get(f"pdf_{path}", False)]
-    all_files = captured_files + selected_pdfs
-    if all_files:
-        selected_file = st.selectbox("Select Image or PDF", all_files, key="line_select")
-        if selected_file:
-            if selected_file.endswith('.png'):
-                image = Image.open(selected_file)
-            else:
-                doc = fitz.open(selected_file)
-                pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
-                image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
-                doc.close()
-            st.image(image, caption="Input Image", use_container_width=True)
-            if st.button("Run Line Drawing 🚀", key="line_run"):
-                output_file = generate_filename("line_output", "png")
-                st.session_state['processing']['line'] = True
-                result = asyncio.run(process_line_drawing(image, output_file))
-                st.image(result, caption="Line Drawing", use_container_width=True)
-                st.success(f"Line drawing saved to {output_file}")
-                st.session_state['processing']['line'] = False
-    else:
-        st.warning("No images or PDFs captured yet. Use Camera Snap or Download PDFs first!")
-with tab10:
     st.header("Custom Diffusion 🎨🤓")
     st.write("Unleash your inner artist with our tiny diffusion models!")
     captured_files = get_gallery_files(["png"])
@@ -1027,5 +931,4 @@ with tab10:
     else:
         st.warning("No images or PDFs captured yet. Use Camera Snap or Download PDFs first!")
-# Initial Gallery Update
 update_gallery()

 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
 from diffusers import StableDiffusionPipeline
 from torch.utils.data import Dataset, DataLoader
 import csv
+import fitz
 import requests
 from PIL import Image
 import cv2
 import math
 import random
 import re
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 logger = logging.getLogger(__name__)
 log_records = []
 logger.addHandler(LogCaptureHandler())
 st.set_page_config(
     page_title="AI Vision & SFT Titans 🚀",
     page_icon="🤖",
     }
 )
 if 'history' not in st.session_state:
+    st.session_state['history'] = []
 if 'builder' not in st.session_state:
     st.session_state['builder'] = None
 if 'model_loaded' not in st.session_state:
     st.session_state['pdf_checkboxes'] = {}
 if 'downloaded_pdfs' not in st.session_state:
     st.session_state['downloaded_pdfs'] = {}
 @dataclass
 class ModelConfig:
     name: str
     name: str
     base_model: str
     size: str
+    domain: Optional[str] = None
     @property
     def model_path(self):
         return f"diffusion_models/{self.name}"
 class SFTDataset(Dataset):
     def __init__(self, data, tokenizer, max_length=128):
         self.data = data
     def __getitem__(self, idx):
         return self.images[idx]
 class TinyUNet(nn.Module):
     def __init__(self, in_channels=3, out_channels=3):
         super(TinyUNet, self).__init__()
         upscaled = torch.clamp(upscaled * 255, 0, 255).byte()
         return Image.fromarray(upscaled.squeeze(0).permute(1, 2, 0).cpu().numpy())
 class ModelBuilder:
     def __init__(self):
         self.config = None
     def generate(self, prompt: str):
         return self.pipeline(prompt, num_inference_steps=20).images[0]
 def generate_filename(sequence, ext="png"):
+    timestamp = time.strftime("%d%m%Y%H%M%S")
     return f"{sequence}_{timestamp}.{ext}"
 def pdf_url_to_filename(url):
     path = "models/*" if model_type == "causal_lm" else "diffusion_models/*"
     return [d for d in glob.glob(path) if os.path.isdir(d)]
+def get_gallery_files(file_types=["png"]):
     return sorted([f for ext in file_types for f in glob.glob(f"*.{ext}")])
 def get_pdf_files():
         logger.error(f"Failed to download {url}: {e}")
     return False
 async def process_pdf_snapshot(pdf_path, mode="single"):
     start_time = time.time()
     status = st.empty()
         status.error(f"Failed to process PDF: {str(e)}")
         return []
+async def process_ocr(image, output_file):
     start_time = time.time()
     status = st.empty()
+    status.text("Processing GOT-OCR2_0... (0s)")
+    tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
+    model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, torch_dtype=torch.float32).to("cpu").eval()
+    result = model.chat(tokenizer, image, ocr_type='ocr')
     elapsed = int(time.time() - start_time)
+    status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
     async with aiofiles.open(output_file, "w") as f:
         await f.write(result)
     update_gallery()
     return result
     start_time = time.time()
     status = st.empty()
     status.text("Processing Image Gen... (0s)")
+    pipeline = StableDiffusionPipeline.from_pretrained("OFA-Sys/small-stable-diffusion-v0", torch_dtype=torch.float32).to("cpu")
     gen_image = pipeline(prompt, num_inference_steps=20).images[0]
     elapsed = int(time.time() - start_time)
     status.text(f"Image Gen completed in {elapsed}s!")
     gen_image.save(output_file)
     update_gallery()
     return gen_image
+async def process_custom_diffusion(images, output_file, model_name):
     start_time = time.time()
     status = st.empty()
+    status.text(f"Training {model_name}... (0s)")
+    unet = TinyUNet()
+    diffusion = TinyDiffusion(unet)
+    diffusion.train(images)
+    gen_image = diffusion.generate()
+    upscaled_image = diffusion.upscale(gen_image, scale_factor=2)
     elapsed = int(time.time() - start_time)
+    status.text(f"{model_name} completed in {elapsed}s!")
+    upscaled_image.save(output_file)
     update_gallery()
+    return upscaled_image
 def mock_search(query: str) -> str:
     if "superhero" in query.lower():
         return "Latest trends: Gold-plated Batman statues, VR superhero battles."
         """
     return "No relevant results found."
 class PartyPlannerAgent:
     def __init__(self, model, tokenizer):
         self.model = model
     flight_time = (actual_distance / cruising_speed_kmh) + 1.0
     return round(flight_time, 2)
 st.title("AI Vision & SFT Titans 🚀")
 st.sidebar.header("Captured Files 📜")
 gallery_size = st.sidebar.slider("Gallery Size", 1, 10, 2)
 def update_gallery():
+    media_files = get_gallery_files(["png"])
     pdf_files = get_pdf_files()
     if media_files or pdf_files:
+        st.sidebar.subheader("Images 📸")
         cols = st.sidebar.columns(2)
         for idx, file in enumerate(media_files[:gallery_size * 2]):
             with cols[idx % 2]:
+                st.image(Image.open(file), caption=os.path.basename(file), use_container_width=True)
         st.sidebar.subheader("PDF Downloads 📖")
         for pdf_file in pdf_files[:gallery_size * 2]:
             st.markdown(get_download_link(pdf_file, "application/pdf", f"📥 Grab {os.path.basename(pdf_file)}"), unsafe_allow_html=True)
     for entry in st.session_state['history'][-gallery_size * 2:]:
         st.write(entry)
+tab1, tab2, tab3, tab4, tab5, tab6, tab7, tab8, tab9 = st.tabs([
     "Camera Snap 📷", "Download PDFs 📥", "Build Titan 🌱", "Fine-Tune Titan 🔧",
+    "Test Titan 🧪", "Agentic RAG Party 🌐", "Test OCR 🔍", "Test Image Gen 🎨", "Custom Diffusion 🎨🤓"
 ])
 with tab1:
         cam0_img = st.camera_input("Take a picture - Cam 0", key="cam0")
         if cam0_img:
             filename = generate_filename("cam0")
+            with open(filename, "wb") as f:
+                f.write(cam0_img.getvalue())
+            entry = f"Snapshot from Cam 0: {filename}"
+            if entry not in st.session_state['history']:
+                st.session_state['history'] = [e for e in st.session_state['history'] if not e.startswith("Snapshot from Cam 0:")] + [entry]
+            st.image(Image.open(filename), caption="Camera 0", use_container_width=True)
+            logger.info(f"Saved snapshot from Camera 0: {filename}")
+            update_gallery()
     with cols[1]:
         cam1_img = st.camera_input("Take a picture - Cam 1", key="cam1")
         if cam1_img:
             filename = generate_filename("cam1")
+            with open(filename, "wb") as f:
+                f.write(cam1_img.getvalue())
+            entry = f"Snapshot from Cam 1: {filename}"
+            if entry not in st.session_state['history']:
+                st.session_state['history'] = [e for e in st.session_state['history'] if not e.startswith("Snapshot from Cam 1:")] + [entry]
+            st.image(Image.open(filename), caption="Camera 1", use_container_width=True)
+            logger.info(f"Saved snapshot from Camera 1: {filename}")
+            update_gallery()
 with tab2:
     st.header("Download PDFs 📥")
     if st.button("Examples 📚"):
         example_urls = [
+            "https://arxiv.org/pdf/2308.03892",
+            "https://arxiv.org/pdf/1912.01703",
+            "https://arxiv.org/pdf/2408.11039",
+            "https://arxiv.org/pdf/2109.10282",
+            "https://arxiv.org/pdf/2112.10752",
+            "https://arxiv.org/pdf/2308.11236",
+            "https://arxiv.org/pdf/1706.03762",
+            "https://arxiv.org/pdf/2006.11239",
+            "https://arxiv.org/pdf/2305.11207",
+            "https://arxiv.org/pdf/2106.09685",
+            "https://arxiv.org/pdf/2005.11401",
+            "https://arxiv.org/pdf/2106.10504"
         ]
         st.session_state['pdf_urls'] = "\n".join(example_urls)
                     st.image(img, caption=os.path.basename(pdf_path), use_container_width=True)
                     checkbox_key = f"pdf_{pdf_path}"
                     st.session_state['pdf_checkboxes'][checkbox_key] = st.checkbox(
+                        "Use for SFT/Input",
+                        value=st.session_state['pdf_checkboxes'].get(checkbox_key, False),
+                        key=checkbox_key
                     )
                     st.markdown(get_download_link(pdf_path, "application/pdf", "Snag It! 📥"), unsafe_allow_html=True)
                     if st.button("Zap It! 🗑️", key=f"delete_{pdf_path}"):
                 image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
                 doc.close()
             st.image(image, caption="Input Image", use_container_width=True)
             if st.button("Run OCR 🚀", key="ocr_run"):
                 output_file = generate_filename("ocr_output", "txt")
                 st.session_state['processing']['ocr'] = True
+                result = asyncio.run(process_ocr(image, output_file))
+                entry = f"OCR Test: {selected_file} -> {output_file}"
+                if entry not in st.session_state['history']:
+                    st.session_state['history'].append(entry)
                 st.text_area("OCR Result", result, height=200, key="ocr_result")
                 st.success(f"OCR output saved to {output_file}")
                 st.session_state['processing']['ocr'] = False
                 output_file = generate_filename("gen_output", "png")
                 st.session_state['processing']['gen'] = True
                 result = asyncio.run(process_image_gen(prompt, output_file))
+                entry = f"Image Gen Test: {prompt} -> {output_file}"
+                if entry not in st.session_state['history']:
+                    st.session_state['history'].append(entry)
                 st.image(result, caption="Generated Image", use_container_width=True)
                 st.success(f"Image saved to {output_file}")
                 st.session_state['processing']['gen'] = False
         st.warning("No images or PDFs captured yet. Use Camera Snap or Download PDFs first!")
 with tab9:
     st.header("Custom Diffusion 🎨🤓")
     st.write("Unleash your inner artist with our tiny diffusion models!")
     captured_files = get_gallery_files(["png"])
     else:
         st.warning("No images or PDFs captured yet. Use Camera Snap or Download PDFs first!")
 update_gallery()