Spaces:

awacke1
/

TorchTransformers-CV-SFT

Running

App Files Files Community

awacke1 commited on Mar 31

Commit

5c99a8d

verified ·

1 Parent(s): 99b2de2

Update backup.03302025.app.py

Browse files

Files changed (1) hide show

backup.03302025.app.py +46 -61

backup.03302025.app.py CHANGED Viewed

@@ -25,13 +25,10 @@ from PIL import Image
 from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
 from typing import Optional
-# 🤖 OpenAI wizardry: Summon your API magic!
-client = OpenAI(
-    api_key=os.getenv('OPENAI_API_KEY'),
-    organization=os.getenv('OPENAI_ORG_ID')
-)
-# 📜 Logging activated: Capturing chaos and calm!
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 logger = logging.getLogger(__name__)
 log_records = []
@@ -40,7 +37,7 @@ class LogCaptureHandler(logging.Handler):
         log_records.append(record)
 logger.addHandler(LogCaptureHandler())
-# 🎨 Streamlit styling: Designing a cosmic interface!
 st.set_page_config(
     page_title="AI Vision & SFT Titans 🚀",
     page_icon="🤖",
@@ -53,24 +50,22 @@ st.set_page_config(
     }
 )
-# Set up default session state values.
-st.session_state.setdefault('history', [])             # History: starting fresh if empty!
-st.session_state.setdefault('builder', None)             # Builder: set up if missing.
-st.session_state.setdefault('model_loaded', False)       # Model Loaded: not loaded by default.
-st.session_state.setdefault('processing', {})            # Processing: initialize as an empty dict.
-st.session_state.setdefault('asset_checkboxes', {})      # Asset Checkboxes: default to an empty dict.
-st.session_state.setdefault('downloaded_pdfs', {})       # Downloaded PDFs: start with none.
-st.session_state.setdefault('unique_counter', 0)         # Unique Counter: initialize to zero.
 st.session_state.setdefault('selected_model_type', "Causal LM")
 st.session_state.setdefault('selected_model', "None")
 st.session_state.setdefault('cam0_file', None)
 st.session_state.setdefault('cam1_file', None)
-# Create a single container for the asset gallery in the sidebar.
 if 'asset_gallery_container' not in st.session_state:
     st.session_state['asset_gallery_container'] = st.sidebar.empty()
-@dataclass  # ModelConfig: A blueprint for model configurations.
 class ModelConfig:
     name: str
     base_model: str
@@ -81,7 +76,7 @@ class ModelConfig:
     def model_path(self):
         return f"models/{self.name}"
-@dataclass  # DiffusionConfig: Where diffusion magic takes shape.
 class DiffusionConfig:
     name: str
     base_model: str
@@ -178,7 +173,6 @@ def download_pdf(url, output_path):
         ret = False
     return ret
-# Async PDF Snapshot: Snap your PDF pages without blocking.
 async def process_pdf_snapshot(pdf_path, mode="single"):
     start_time = time.time()
     status = st.empty()
@@ -214,24 +208,32 @@ async def process_pdf_snapshot(pdf_path, mode="single"):
         status.error(f"Failed to process PDF: {str(e)}")
         return []
-# Async OCR: Convert images to text.
-async def process_ocr(image, output_file):
     start_time = time.time()
     status = st.empty()
-    status.text("Processing GOT-OCR2_0... (0s)")
-    tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
-    model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, torch_dtype=torch.float32).to("cpu").eval()
-    temp_file = f"temp_{int(time.time())}.png"
-    image.save(temp_file)
-    result = model.chat(tokenizer, temp_file, ocr_type='ocr')
-    os.remove(temp_file)
-    elapsed = int(time.time() - start_time)
-    status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
-    async with aiofiles.open(output_file, "w") as f:
-        await f.write(result)
-    return result
-# Async Image Gen: Your image genie.
 async def process_image_gen(prompt, output_file):
     start_time = time.time()
     status = st.empty()
@@ -246,7 +248,6 @@ async def process_image_gen(prompt, output_file):
     gen_image.save(output_file)
     return gen_image
-# GPT-Image Interpreter: Turning pixels into prose!
 def process_image_with_prompt(image, prompt, model="gpt-4o-mini", detail="auto"):
     buffered = BytesIO()
     image.save(buffered, format="PNG")
@@ -264,7 +265,6 @@ def process_image_with_prompt(image, prompt, model="gpt-4o-mini", detail="auto")
     except Exception as e:
         return f"Error processing image with GPT: {str(e)}"
-# GPT-Text Alchemist: Merging prompt and text.
 def process_text_with_prompt(text, prompt, model="gpt-4o-mini"):
     messages = [{"role": "user", "content": f"{prompt}\n\n{text}"}]
     try:
@@ -273,21 +273,18 @@ def process_text_with_prompt(text, prompt, model="gpt-4o-mini"):
     except Exception as e:
         return f"Error processing text with GPT: {str(e)}"
-# ----------------- SIDEBAR UPDATES -----------------
 # Sidebar: Gallery Settings
 st.sidebar.subheader("Gallery Settings")
 st.session_state.setdefault('gallery_size', 2)
 st.session_state['gallery_size'] = st.sidebar.slider("Gallery Size", 1, 10, st.session_state['gallery_size'], key="gallery_size_slider")
-# ----------------- TAB SETUP -----------------
 tabs = st.tabs([
     "Camera Snap 📷", "Download PDFs 📥", "Test OCR 🔍", "Build Titan 🌱",
     "Test Image Gen 🎨", "PDF Process 📄", "Image Process 🖼️", "MD Gallery 📚"
 ])
 (tab_camera, tab_download, tab_ocr, tab_build, tab_imggen, tab_pdf_process, tab_image_process, tab_md_gallery) = tabs
-# ----------------- TAB: Camera Snap -----------------
 with tab_camera:
     st.header("Camera Snap 📷")
     st.subheader("Single Capture")
@@ -319,7 +316,6 @@ with tab_camera:
             st.image(Image.open(filename), caption="Camera 1", use_container_width=True)
             logger.info(f"Saved snapshot from Camera 1: {filename}")
-# ----------------- TAB: Download PDFs -----------------
 with tab_download:
     st.header("Download PDFs 📥")
     if st.button("Examples 📚"):
@@ -378,17 +374,15 @@ with tab_download:
                 for snapshot in snapshots:
                     st.image(Image.open(snapshot), caption=snapshot, use_container_width=True)
                     st.session_state['asset_checkboxes'][snapshot] = True
-            # No update_gallery() call here; will update once later.
         else:
             st.warning("No PDFs selected for snapshotting! Check some boxes in the sidebar.")
-# ----------------- TAB: Test OCR -----------------
 with tab_ocr:
     st.header("Test OCR 🔍")
     all_files = get_gallery_files()
     if all_files:
         if st.button("OCR All Assets 🚀"):
-            full_text = "# OCR Results\n\n"
             for file in all_files:
                 if file.endswith('.png'):
                     image = Image.open(file)
@@ -398,7 +392,7 @@ with tab_ocr:
                     image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
                     doc.close()
                 output_file = generate_filename(f"ocr_{os.path.basename(file)}", "txt")
-                result = asyncio.run(process_ocr(image, output_file))
                 full_text += f"## {os.path.basename(file)}\n\n{result}\n\n"
                 entry = f"OCR Test: {file} -> {output_file}"
                 st.session_state['history'].append(entry)
@@ -420,7 +414,7 @@ with tab_ocr:
             if st.button("Run OCR 🚀", key="ocr_run"):
                 output_file = generate_filename("ocr_output", "txt")
                 st.session_state['processing']['ocr'] = True
-                result = asyncio.run(process_ocr(image, output_file))
                 entry = f"OCR Test: {selected_file} -> {output_file}"
                 st.session_state['history'].append(entry)
                 st.text_area("OCR Result", result, height=200, key="ocr_result")
@@ -433,7 +427,7 @@ with tab_ocr:
                     pix = doc[i].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
                     image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
                     output_file = generate_filename(f"ocr_page_{i}", "txt")
-                    result = asyncio.run(process_ocr(image, output_file))
                     full_text += f"## Page {i + 1}\n\n{result}\n\n"
                     entry = f"OCR Test: {selected_file} Page {i + 1} -> {output_file}"
                     st.session_state['history'].append(entry)
@@ -445,7 +439,6 @@ with tab_ocr:
     else:
         st.warning("No assets in gallery yet. Use Camera Snap or Download PDFs!")
-# ----------------- TAB: Build Titan -----------------
 with tab_build:
     st.header("Build Titan 🌱")
     model_type = st.selectbox("Model Type", ["Causal LM", "Diffusion"], key="build_type")
@@ -470,9 +463,8 @@ with tab_build:
         entry = f"Built {model_type} model: {model_name}"
         st.session_state['history'].append(entry)
         st.success(f"Model downloaded and saved to {config.model_path}! 🎉")
-        st.experimental_rerun()
-# ----------------- TAB: Test Image Gen -----------------
 with tab_imggen:
     st.header("Test Image Gen 🎨")
     all_files = get_gallery_files()
@@ -500,7 +492,6 @@ with tab_imggen:
     else:
         st.warning("No images or PDFs in gallery yet. Use Camera Snap or Download PDFs!")
-# ----------------- TAB: PDF Process -----------------
 with tab_pdf_process:
     st.header("PDF Process")
     st.subheader("Upload PDFs for GPT-based text extraction")
@@ -559,7 +550,6 @@ with tab_pdf_process:
         st.success(f"PDF processing complete. MD file saved as {output_filename}")
         st.markdown(get_download_link(output_filename, "text/markdown", "Download Processed PDF MD"), unsafe_allow_html=True)
-# ----------------- TAB: Image Process -----------------
 with tab_image_process:
     st.header("Image Process")
     st.subheader("Upload Images for GPT-based OCR")
@@ -584,7 +574,6 @@ with tab_image_process:
         st.success(f"Image processing complete. MD file saved as {output_filename}")
         st.markdown(get_download_link(output_filename, "text/markdown", "Download Processed Image MD"), unsafe_allow_html=True)
-# ----------------- TAB: MD Gallery -----------------
 with tab_md_gallery:
     st.header("MD Gallery and GPT Processing")
     gpt_models = ["gpt-4o", "gpt-4o-mini"]
@@ -637,11 +626,9 @@ with tab_md_gallery:
     else:
         st.warning("No MD files found.")
-# ----------------- FINAL SIDEBAR UPDATE -----------------
-# Update the asset gallery once (using its container).
 def update_gallery():
     container = st.session_state['asset_gallery_container']
-    container.empty()  # Clear previous gallery content.
     all_files = get_gallery_files()
     if all_files:
         container.markdown("### Asset Gallery 📸📖")
@@ -666,12 +653,10 @@ def update_gallery():
                     os.remove(file)
                     st.session_state['asset_checkboxes'].pop(file, None)
                     st.success(f"Asset {os.path.basename(file)} vaporized! 💨")
-                    st.experimental_rerun()
-# Call the gallery update once after all tabs have been processed.
 update_gallery()
-# Finally, update the Action Logs and History in the sidebar.
 st.sidebar.subheader("Action Logs 📜")
 for record in log_records:
     st.sidebar.write(f"{record.asctime} - {record.levelname} - {record.message}")
@@ -679,4 +664,4 @@ for record in log_records:
 st.sidebar.subheader("History 📜")
 for entry in st.session_state.get("history", []):
     if entry is not None:
-        st.sidebar.write(entry)

 from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
 from typing import Optional
+# OpenAI client initialization
+client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
+# Logging setup
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 logger = logging.getLogger(__name__)
 log_records = []
         log_records.append(record)
 logger.addHandler(LogCaptureHandler())
+# Streamlit configuration
 st.set_page_config(
     page_title="AI Vision & SFT Titans 🚀",
     page_icon="🤖",
     }
 )
+# Session state initialization
+st.session_state.setdefault('history', [])
+st.session_state.setdefault('builder', None)
+st.session_state.setdefault('model_loaded', False)
+st.session_state.setdefault('processing', {})
+st.session_state.setdefault('asset_checkboxes', {})
+st.session_state.setdefault('downloaded_pdfs', {})
+st.session_state.setdefault('unique_counter', 0)
 st.session_state.setdefault('selected_model_type', "Causal LM")
 st.session_state.setdefault('selected_model', "None")
 st.session_state.setdefault('cam0_file', None)
 st.session_state.setdefault('cam1_file', None)
 if 'asset_gallery_container' not in st.session_state:
     st.session_state['asset_gallery_container'] = st.sidebar.empty()
+@dataclass
 class ModelConfig:
     name: str
     base_model: str
     def model_path(self):
         return f"models/{self.name}"
+@dataclass
 class DiffusionConfig:
     name: str
     base_model: str
         ret = False
     return ret
 async def process_pdf_snapshot(pdf_path, mode="single"):
     start_time = time.time()
     status = st.empty()
         status.error(f"Failed to process PDF: {str(e)}")
         return []
+async def process_gpt4o_ocr(image, output_file):
     start_time = time.time()
     status = st.empty()
+    status.text("Processing GPT-4o OCR... (0s)")
+    buffered = BytesIO()
+    image.save(buffered, format="PNG")
+    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    messages = [{
+        "role": "user",
+        "content": [
+            {"type": "text", "text": "Extract the electronic text from this image."},
+            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}", "detail": "auto"}}
+        ]
+    }]
+    try:
+        response = client.chat.completions.create(model="gpt-4o", messages=messages, max_tokens=300)
+        result = response.choices[0].message.content
+        elapsed = int(time.time() - start_time)
+        status.text(f"GPT-4o OCR completed in {elapsed}s!")
+        async with aiofiles.open(output_file, "w") as f:
+            await f.write(result)
+        return result
+    except Exception as e:
+        status.error(f"Failed to process image with GPT-4o: {str(e)}")
+        return ""
 async def process_image_gen(prompt, output_file):
     start_time = time.time()
     status = st.empty()
     gen_image.save(output_file)
     return gen_image
 def process_image_with_prompt(image, prompt, model="gpt-4o-mini", detail="auto"):
     buffered = BytesIO()
     image.save(buffered, format="PNG")
     except Exception as e:
         return f"Error processing image with GPT: {str(e)}"
 def process_text_with_prompt(text, prompt, model="gpt-4o-mini"):
     messages = [{"role": "user", "content": f"{prompt}\n\n{text}"}]
     try:
     except Exception as e:
         return f"Error processing text with GPT: {str(e)}"
 # Sidebar: Gallery Settings
 st.sidebar.subheader("Gallery Settings")
 st.session_state.setdefault('gallery_size', 2)
 st.session_state['gallery_size'] = st.sidebar.slider("Gallery Size", 1, 10, st.session_state['gallery_size'], key="gallery_size_slider")
+# Tabs setup
 tabs = st.tabs([
     "Camera Snap 📷", "Download PDFs 📥", "Test OCR 🔍", "Build Titan 🌱",
     "Test Image Gen 🎨", "PDF Process 📄", "Image Process 🖼️", "MD Gallery 📚"
 ])
 (tab_camera, tab_download, tab_ocr, tab_build, tab_imggen, tab_pdf_process, tab_image_process, tab_md_gallery) = tabs
 with tab_camera:
     st.header("Camera Snap 📷")
     st.subheader("Single Capture")
             st.image(Image.open(filename), caption="Camera 1", use_container_width=True)
             logger.info(f"Saved snapshot from Camera 1: {filename}")
 with tab_download:
     st.header("Download PDFs 📥")
     if st.button("Examples 📚"):
                 for snapshot in snapshots:
                     st.image(Image.open(snapshot), caption=snapshot, use_container_width=True)
                     st.session_state['asset_checkboxes'][snapshot] = True
         else:
             st.warning("No PDFs selected for snapshotting! Check some boxes in the sidebar.")
 with tab_ocr:
     st.header("Test OCR 🔍")
     all_files = get_gallery_files()
     if all_files:
         if st.button("OCR All Assets 🚀"):
+            full_text = "# OCR Results (GPT-4o)\n\n"
             for file in all_files:
                 if file.endswith('.png'):
                     image = Image.open(file)
                     image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
                     doc.close()
                 output_file = generate_filename(f"ocr_{os.path.basename(file)}", "txt")
+                result = asyncio.run(process_gpt4o_ocr(image, output_file))
                 full_text += f"## {os.path.basename(file)}\n\n{result}\n\n"
                 entry = f"OCR Test: {file} -> {output_file}"
                 st.session_state['history'].append(entry)
             if st.button("Run OCR 🚀", key="ocr_run"):
                 output_file = generate_filename("ocr_output", "txt")
                 st.session_state['processing']['ocr'] = True
+                result = asyncio.run(process_gpt4o_ocr(image, output_file))
                 entry = f"OCR Test: {selected_file} -> {output_file}"
                 st.session_state['history'].append(entry)
                 st.text_area("OCR Result", result, height=200, key="ocr_result")
                     pix = doc[i].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
                     image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
                     output_file = generate_filename(f"ocr_page_{i}", "txt")
+                    result = asyncio.run(process_gpt4o_ocr(image, output_file))
                     full_text += f"## Page {i + 1}\n\n{result}\n\n"
                     entry = f"OCR Test: {selected_file} Page {i + 1} -> {output_file}"
                     st.session_state['history'].append(entry)
     else:
         st.warning("No assets in gallery yet. Use Camera Snap or Download PDFs!")
 with tab_build:
     st.header("Build Titan 🌱")
     model_type = st.selectbox("Model Type", ["Causal LM", "Diffusion"], key="build_type")
         entry = f"Built {model_type} model: {model_name}"
         st.session_state['history'].append(entry)
         st.success(f"Model downloaded and saved to {config.model_path}! 🎉")
+        st.rerun()
 with tab_imggen:
     st.header("Test Image Gen 🎨")
     all_files = get_gallery_files()
     else:
         st.warning("No images or PDFs in gallery yet. Use Camera Snap or Download PDFs!")
 with tab_pdf_process:
     st.header("PDF Process")
     st.subheader("Upload PDFs for GPT-based text extraction")
         st.success(f"PDF processing complete. MD file saved as {output_filename}")
         st.markdown(get_download_link(output_filename, "text/markdown", "Download Processed PDF MD"), unsafe_allow_html=True)
 with tab_image_process:
     st.header("Image Process")
     st.subheader("Upload Images for GPT-based OCR")
         st.success(f"Image processing complete. MD file saved as {output_filename}")
         st.markdown(get_download_link(output_filename, "text/markdown", "Download Processed Image MD"), unsafe_allow_html=True)
 with tab_md_gallery:
     st.header("MD Gallery and GPT Processing")
     gpt_models = ["gpt-4o", "gpt-4o-mini"]
     else:
         st.warning("No MD files found.")
 def update_gallery():
     container = st.session_state['asset_gallery_container']
+    container.empty()
     all_files = get_gallery_files()
     if all_files:
         container.markdown("### Asset Gallery 📸📖")
                     os.remove(file)
                     st.session_state['asset_checkboxes'].pop(file, None)
                     st.success(f"Asset {os.path.basename(file)} vaporized! 💨")
+                    st.rerun()
 update_gallery()
 st.sidebar.subheader("Action Logs 📜")
 for record in log_records:
     st.sidebar.write(f"{record.asctime} - {record.levelname} - {record.message}")
 st.sidebar.subheader("History 📜")
 for entry in st.session_state.get("history", []):
     if entry is not None:
+        st.sidebar.write(entry)