awacke1 commited on
Commit
5c99a8d
·
verified ·
1 Parent(s): 99b2de2

Update backup.03302025.app.py

Browse files
Files changed (1) hide show
  1. backup.03302025.app.py +46 -61
backup.03302025.app.py CHANGED
@@ -25,13 +25,10 @@ from PIL import Image
25
  from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
26
  from typing import Optional
27
 
28
- # 🤖 OpenAI wizardry: Summon your API magic!
29
- client = OpenAI(
30
- api_key=os.getenv('OPENAI_API_KEY'),
31
- organization=os.getenv('OPENAI_ORG_ID')
32
- )
33
 
34
- # 📜 Logging activated: Capturing chaos and calm!
35
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
36
  logger = logging.getLogger(__name__)
37
  log_records = []
@@ -40,7 +37,7 @@ class LogCaptureHandler(logging.Handler):
40
  log_records.append(record)
41
  logger.addHandler(LogCaptureHandler())
42
 
43
- # 🎨 Streamlit styling: Designing a cosmic interface!
44
  st.set_page_config(
45
  page_title="AI Vision & SFT Titans 🚀",
46
  page_icon="🤖",
@@ -53,24 +50,22 @@ st.set_page_config(
53
  }
54
  )
55
 
56
- # Set up default session state values.
57
- st.session_state.setdefault('history', []) # History: starting fresh if empty!
58
- st.session_state.setdefault('builder', None) # Builder: set up if missing.
59
- st.session_state.setdefault('model_loaded', False) # Model Loaded: not loaded by default.
60
- st.session_state.setdefault('processing', {}) # Processing: initialize as an empty dict.
61
- st.session_state.setdefault('asset_checkboxes', {}) # Asset Checkboxes: default to an empty dict.
62
- st.session_state.setdefault('downloaded_pdfs', {}) # Downloaded PDFs: start with none.
63
- st.session_state.setdefault('unique_counter', 0) # Unique Counter: initialize to zero.
64
  st.session_state.setdefault('selected_model_type', "Causal LM")
65
  st.session_state.setdefault('selected_model', "None")
66
  st.session_state.setdefault('cam0_file', None)
67
  st.session_state.setdefault('cam1_file', None)
68
-
69
- # Create a single container for the asset gallery in the sidebar.
70
  if 'asset_gallery_container' not in st.session_state:
71
  st.session_state['asset_gallery_container'] = st.sidebar.empty()
72
 
73
- @dataclass # ModelConfig: A blueprint for model configurations.
74
  class ModelConfig:
75
  name: str
76
  base_model: str
@@ -81,7 +76,7 @@ class ModelConfig:
81
  def model_path(self):
82
  return f"models/{self.name}"
83
 
84
- @dataclass # DiffusionConfig: Where diffusion magic takes shape.
85
  class DiffusionConfig:
86
  name: str
87
  base_model: str
@@ -178,7 +173,6 @@ def download_pdf(url, output_path):
178
  ret = False
179
  return ret
180
 
181
- # Async PDF Snapshot: Snap your PDF pages without blocking.
182
  async def process_pdf_snapshot(pdf_path, mode="single"):
183
  start_time = time.time()
184
  status = st.empty()
@@ -214,24 +208,32 @@ async def process_pdf_snapshot(pdf_path, mode="single"):
214
  status.error(f"Failed to process PDF: {str(e)}")
215
  return []
216
 
217
- # Async OCR: Convert images to text.
218
- async def process_ocr(image, output_file):
219
  start_time = time.time()
220
  status = st.empty()
221
- status.text("Processing GOT-OCR2_0... (0s)")
222
- tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
223
- model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, torch_dtype=torch.float32).to("cpu").eval()
224
- temp_file = f"temp_{int(time.time())}.png"
225
- image.save(temp_file)
226
- result = model.chat(tokenizer, temp_file, ocr_type='ocr')
227
- os.remove(temp_file)
228
- elapsed = int(time.time() - start_time)
229
- status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
230
- async with aiofiles.open(output_file, "w") as f:
231
- await f.write(result)
232
- return result
 
 
 
 
 
 
 
 
 
 
233
 
234
- # Async Image Gen: Your image genie.
235
  async def process_image_gen(prompt, output_file):
236
  start_time = time.time()
237
  status = st.empty()
@@ -246,7 +248,6 @@ async def process_image_gen(prompt, output_file):
246
  gen_image.save(output_file)
247
  return gen_image
248
 
249
- # GPT-Image Interpreter: Turning pixels into prose!
250
  def process_image_with_prompt(image, prompt, model="gpt-4o-mini", detail="auto"):
251
  buffered = BytesIO()
252
  image.save(buffered, format="PNG")
@@ -264,7 +265,6 @@ def process_image_with_prompt(image, prompt, model="gpt-4o-mini", detail="auto")
264
  except Exception as e:
265
  return f"Error processing image with GPT: {str(e)}"
266
 
267
- # GPT-Text Alchemist: Merging prompt and text.
268
  def process_text_with_prompt(text, prompt, model="gpt-4o-mini"):
269
  messages = [{"role": "user", "content": f"{prompt}\n\n{text}"}]
270
  try:
@@ -273,21 +273,18 @@ def process_text_with_prompt(text, prompt, model="gpt-4o-mini"):
273
  except Exception as e:
274
  return f"Error processing text with GPT: {str(e)}"
275
 
276
- # ----------------- SIDEBAR UPDATES -----------------
277
-
278
  # Sidebar: Gallery Settings
279
  st.sidebar.subheader("Gallery Settings")
280
  st.session_state.setdefault('gallery_size', 2)
281
  st.session_state['gallery_size'] = st.sidebar.slider("Gallery Size", 1, 10, st.session_state['gallery_size'], key="gallery_size_slider")
282
 
283
- # ----------------- TAB SETUP -----------------
284
  tabs = st.tabs([
285
  "Camera Snap 📷", "Download PDFs 📥", "Test OCR 🔍", "Build Titan 🌱",
286
  "Test Image Gen 🎨", "PDF Process 📄", "Image Process 🖼️", "MD Gallery 📚"
287
  ])
288
  (tab_camera, tab_download, tab_ocr, tab_build, tab_imggen, tab_pdf_process, tab_image_process, tab_md_gallery) = tabs
289
 
290
- # ----------------- TAB: Camera Snap -----------------
291
  with tab_camera:
292
  st.header("Camera Snap 📷")
293
  st.subheader("Single Capture")
@@ -319,7 +316,6 @@ with tab_camera:
319
  st.image(Image.open(filename), caption="Camera 1", use_container_width=True)
320
  logger.info(f"Saved snapshot from Camera 1: {filename}")
321
 
322
- # ----------------- TAB: Download PDFs -----------------
323
  with tab_download:
324
  st.header("Download PDFs 📥")
325
  if st.button("Examples 📚"):
@@ -378,17 +374,15 @@ with tab_download:
378
  for snapshot in snapshots:
379
  st.image(Image.open(snapshot), caption=snapshot, use_container_width=True)
380
  st.session_state['asset_checkboxes'][snapshot] = True
381
- # No update_gallery() call here; will update once later.
382
  else:
383
  st.warning("No PDFs selected for snapshotting! Check some boxes in the sidebar.")
384
 
385
- # ----------------- TAB: Test OCR -----------------
386
  with tab_ocr:
387
  st.header("Test OCR 🔍")
388
  all_files = get_gallery_files()
389
  if all_files:
390
  if st.button("OCR All Assets 🚀"):
391
- full_text = "# OCR Results\n\n"
392
  for file in all_files:
393
  if file.endswith('.png'):
394
  image = Image.open(file)
@@ -398,7 +392,7 @@ with tab_ocr:
398
  image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
399
  doc.close()
400
  output_file = generate_filename(f"ocr_{os.path.basename(file)}", "txt")
401
- result = asyncio.run(process_ocr(image, output_file))
402
  full_text += f"## {os.path.basename(file)}\n\n{result}\n\n"
403
  entry = f"OCR Test: {file} -> {output_file}"
404
  st.session_state['history'].append(entry)
@@ -420,7 +414,7 @@ with tab_ocr:
420
  if st.button("Run OCR 🚀", key="ocr_run"):
421
  output_file = generate_filename("ocr_output", "txt")
422
  st.session_state['processing']['ocr'] = True
423
- result = asyncio.run(process_ocr(image, output_file))
424
  entry = f"OCR Test: {selected_file} -> {output_file}"
425
  st.session_state['history'].append(entry)
426
  st.text_area("OCR Result", result, height=200, key="ocr_result")
@@ -433,7 +427,7 @@ with tab_ocr:
433
  pix = doc[i].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
434
  image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
435
  output_file = generate_filename(f"ocr_page_{i}", "txt")
436
- result = asyncio.run(process_ocr(image, output_file))
437
  full_text += f"## Page {i + 1}\n\n{result}\n\n"
438
  entry = f"OCR Test: {selected_file} Page {i + 1} -> {output_file}"
439
  st.session_state['history'].append(entry)
@@ -445,7 +439,6 @@ with tab_ocr:
445
  else:
446
  st.warning("No assets in gallery yet. Use Camera Snap or Download PDFs!")
447
 
448
- # ----------------- TAB: Build Titan -----------------
449
  with tab_build:
450
  st.header("Build Titan 🌱")
451
  model_type = st.selectbox("Model Type", ["Causal LM", "Diffusion"], key="build_type")
@@ -470,9 +463,8 @@ with tab_build:
470
  entry = f"Built {model_type} model: {model_name}"
471
  st.session_state['history'].append(entry)
472
  st.success(f"Model downloaded and saved to {config.model_path}! 🎉")
473
- st.experimental_rerun()
474
 
475
- # ----------------- TAB: Test Image Gen -----------------
476
  with tab_imggen:
477
  st.header("Test Image Gen 🎨")
478
  all_files = get_gallery_files()
@@ -500,7 +492,6 @@ with tab_imggen:
500
  else:
501
  st.warning("No images or PDFs in gallery yet. Use Camera Snap or Download PDFs!")
502
 
503
- # ----------------- TAB: PDF Process -----------------
504
  with tab_pdf_process:
505
  st.header("PDF Process")
506
  st.subheader("Upload PDFs for GPT-based text extraction")
@@ -559,7 +550,6 @@ with tab_pdf_process:
559
  st.success(f"PDF processing complete. MD file saved as {output_filename}")
560
  st.markdown(get_download_link(output_filename, "text/markdown", "Download Processed PDF MD"), unsafe_allow_html=True)
561
 
562
- # ----------------- TAB: Image Process -----------------
563
  with tab_image_process:
564
  st.header("Image Process")
565
  st.subheader("Upload Images for GPT-based OCR")
@@ -584,7 +574,6 @@ with tab_image_process:
584
  st.success(f"Image processing complete. MD file saved as {output_filename}")
585
  st.markdown(get_download_link(output_filename, "text/markdown", "Download Processed Image MD"), unsafe_allow_html=True)
586
 
587
- # ----------------- TAB: MD Gallery -----------------
588
  with tab_md_gallery:
589
  st.header("MD Gallery and GPT Processing")
590
  gpt_models = ["gpt-4o", "gpt-4o-mini"]
@@ -637,11 +626,9 @@ with tab_md_gallery:
637
  else:
638
  st.warning("No MD files found.")
639
 
640
- # ----------------- FINAL SIDEBAR UPDATE -----------------
641
- # Update the asset gallery once (using its container).
642
  def update_gallery():
643
  container = st.session_state['asset_gallery_container']
644
- container.empty() # Clear previous gallery content.
645
  all_files = get_gallery_files()
646
  if all_files:
647
  container.markdown("### Asset Gallery 📸📖")
@@ -666,12 +653,10 @@ def update_gallery():
666
  os.remove(file)
667
  st.session_state['asset_checkboxes'].pop(file, None)
668
  st.success(f"Asset {os.path.basename(file)} vaporized! 💨")
669
- st.experimental_rerun()
670
 
671
- # Call the gallery update once after all tabs have been processed.
672
  update_gallery()
673
 
674
- # Finally, update the Action Logs and History in the sidebar.
675
  st.sidebar.subheader("Action Logs 📜")
676
  for record in log_records:
677
  st.sidebar.write(f"{record.asctime} - {record.levelname} - {record.message}")
@@ -679,4 +664,4 @@ for record in log_records:
679
  st.sidebar.subheader("History 📜")
680
  for entry in st.session_state.get("history", []):
681
  if entry is not None:
682
- st.sidebar.write(entry)
 
25
  from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
26
  from typing import Optional
27
 
28
+ # OpenAI client initialization
29
+ client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
 
 
 
30
 
31
+ # Logging setup
32
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
33
  logger = logging.getLogger(__name__)
34
  log_records = []
 
37
  log_records.append(record)
38
  logger.addHandler(LogCaptureHandler())
39
 
40
+ # Streamlit configuration
41
  st.set_page_config(
42
  page_title="AI Vision & SFT Titans 🚀",
43
  page_icon="🤖",
 
50
  }
51
  )
52
 
53
+ # Session state initialization
54
+ st.session_state.setdefault('history', [])
55
+ st.session_state.setdefault('builder', None)
56
+ st.session_state.setdefault('model_loaded', False)
57
+ st.session_state.setdefault('processing', {})
58
+ st.session_state.setdefault('asset_checkboxes', {})
59
+ st.session_state.setdefault('downloaded_pdfs', {})
60
+ st.session_state.setdefault('unique_counter', 0)
61
  st.session_state.setdefault('selected_model_type', "Causal LM")
62
  st.session_state.setdefault('selected_model', "None")
63
  st.session_state.setdefault('cam0_file', None)
64
  st.session_state.setdefault('cam1_file', None)
 
 
65
  if 'asset_gallery_container' not in st.session_state:
66
  st.session_state['asset_gallery_container'] = st.sidebar.empty()
67
 
68
+ @dataclass
69
  class ModelConfig:
70
  name: str
71
  base_model: str
 
76
  def model_path(self):
77
  return f"models/{self.name}"
78
 
79
+ @dataclass
80
  class DiffusionConfig:
81
  name: str
82
  base_model: str
 
173
  ret = False
174
  return ret
175
 
 
176
  async def process_pdf_snapshot(pdf_path, mode="single"):
177
  start_time = time.time()
178
  status = st.empty()
 
208
  status.error(f"Failed to process PDF: {str(e)}")
209
  return []
210
 
211
+ async def process_gpt4o_ocr(image, output_file):
 
212
  start_time = time.time()
213
  status = st.empty()
214
+ status.text("Processing GPT-4o OCR... (0s)")
215
+ buffered = BytesIO()
216
+ image.save(buffered, format="PNG")
217
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
218
+ messages = [{
219
+ "role": "user",
220
+ "content": [
221
+ {"type": "text", "text": "Extract the electronic text from this image."},
222
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}", "detail": "auto"}}
223
+ ]
224
+ }]
225
+ try:
226
+ response = client.chat.completions.create(model="gpt-4o", messages=messages, max_tokens=300)
227
+ result = response.choices[0].message.content
228
+ elapsed = int(time.time() - start_time)
229
+ status.text(f"GPT-4o OCR completed in {elapsed}s!")
230
+ async with aiofiles.open(output_file, "w") as f:
231
+ await f.write(result)
232
+ return result
233
+ except Exception as e:
234
+ status.error(f"Failed to process image with GPT-4o: {str(e)}")
235
+ return ""
236
 
 
237
  async def process_image_gen(prompt, output_file):
238
  start_time = time.time()
239
  status = st.empty()
 
248
  gen_image.save(output_file)
249
  return gen_image
250
 
 
251
  def process_image_with_prompt(image, prompt, model="gpt-4o-mini", detail="auto"):
252
  buffered = BytesIO()
253
  image.save(buffered, format="PNG")
 
265
  except Exception as e:
266
  return f"Error processing image with GPT: {str(e)}"
267
 
 
268
  def process_text_with_prompt(text, prompt, model="gpt-4o-mini"):
269
  messages = [{"role": "user", "content": f"{prompt}\n\n{text}"}]
270
  try:
 
273
  except Exception as e:
274
  return f"Error processing text with GPT: {str(e)}"
275
 
 
 
276
  # Sidebar: Gallery Settings
277
  st.sidebar.subheader("Gallery Settings")
278
  st.session_state.setdefault('gallery_size', 2)
279
  st.session_state['gallery_size'] = st.sidebar.slider("Gallery Size", 1, 10, st.session_state['gallery_size'], key="gallery_size_slider")
280
 
281
+ # Tabs setup
282
  tabs = st.tabs([
283
  "Camera Snap 📷", "Download PDFs 📥", "Test OCR 🔍", "Build Titan 🌱",
284
  "Test Image Gen 🎨", "PDF Process 📄", "Image Process 🖼️", "MD Gallery 📚"
285
  ])
286
  (tab_camera, tab_download, tab_ocr, tab_build, tab_imggen, tab_pdf_process, tab_image_process, tab_md_gallery) = tabs
287
 
 
288
  with tab_camera:
289
  st.header("Camera Snap 📷")
290
  st.subheader("Single Capture")
 
316
  st.image(Image.open(filename), caption="Camera 1", use_container_width=True)
317
  logger.info(f"Saved snapshot from Camera 1: {filename}")
318
 
 
319
  with tab_download:
320
  st.header("Download PDFs 📥")
321
  if st.button("Examples 📚"):
 
374
  for snapshot in snapshots:
375
  st.image(Image.open(snapshot), caption=snapshot, use_container_width=True)
376
  st.session_state['asset_checkboxes'][snapshot] = True
 
377
  else:
378
  st.warning("No PDFs selected for snapshotting! Check some boxes in the sidebar.")
379
 
 
380
  with tab_ocr:
381
  st.header("Test OCR 🔍")
382
  all_files = get_gallery_files()
383
  if all_files:
384
  if st.button("OCR All Assets 🚀"):
385
+ full_text = "# OCR Results (GPT-4o)\n\n"
386
  for file in all_files:
387
  if file.endswith('.png'):
388
  image = Image.open(file)
 
392
  image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
393
  doc.close()
394
  output_file = generate_filename(f"ocr_{os.path.basename(file)}", "txt")
395
+ result = asyncio.run(process_gpt4o_ocr(image, output_file))
396
  full_text += f"## {os.path.basename(file)}\n\n{result}\n\n"
397
  entry = f"OCR Test: {file} -> {output_file}"
398
  st.session_state['history'].append(entry)
 
414
  if st.button("Run OCR 🚀", key="ocr_run"):
415
  output_file = generate_filename("ocr_output", "txt")
416
  st.session_state['processing']['ocr'] = True
417
+ result = asyncio.run(process_gpt4o_ocr(image, output_file))
418
  entry = f"OCR Test: {selected_file} -> {output_file}"
419
  st.session_state['history'].append(entry)
420
  st.text_area("OCR Result", result, height=200, key="ocr_result")
 
427
  pix = doc[i].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
428
  image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
429
  output_file = generate_filename(f"ocr_page_{i}", "txt")
430
+ result = asyncio.run(process_gpt4o_ocr(image, output_file))
431
  full_text += f"## Page {i + 1}\n\n{result}\n\n"
432
  entry = f"OCR Test: {selected_file} Page {i + 1} -> {output_file}"
433
  st.session_state['history'].append(entry)
 
439
  else:
440
  st.warning("No assets in gallery yet. Use Camera Snap or Download PDFs!")
441
 
 
442
  with tab_build:
443
  st.header("Build Titan 🌱")
444
  model_type = st.selectbox("Model Type", ["Causal LM", "Diffusion"], key="build_type")
 
463
  entry = f"Built {model_type} model: {model_name}"
464
  st.session_state['history'].append(entry)
465
  st.success(f"Model downloaded and saved to {config.model_path}! 🎉")
466
+ st.rerun()
467
 
 
468
  with tab_imggen:
469
  st.header("Test Image Gen 🎨")
470
  all_files = get_gallery_files()
 
492
  else:
493
  st.warning("No images or PDFs in gallery yet. Use Camera Snap or Download PDFs!")
494
 
 
495
  with tab_pdf_process:
496
  st.header("PDF Process")
497
  st.subheader("Upload PDFs for GPT-based text extraction")
 
550
  st.success(f"PDF processing complete. MD file saved as {output_filename}")
551
  st.markdown(get_download_link(output_filename, "text/markdown", "Download Processed PDF MD"), unsafe_allow_html=True)
552
 
 
553
  with tab_image_process:
554
  st.header("Image Process")
555
  st.subheader("Upload Images for GPT-based OCR")
 
574
  st.success(f"Image processing complete. MD file saved as {output_filename}")
575
  st.markdown(get_download_link(output_filename, "text/markdown", "Download Processed Image MD"), unsafe_allow_html=True)
576
 
 
577
  with tab_md_gallery:
578
  st.header("MD Gallery and GPT Processing")
579
  gpt_models = ["gpt-4o", "gpt-4o-mini"]
 
626
  else:
627
  st.warning("No MD files found.")
628
 
 
 
629
  def update_gallery():
630
  container = st.session_state['asset_gallery_container']
631
+ container.empty()
632
  all_files = get_gallery_files()
633
  if all_files:
634
  container.markdown("### Asset Gallery 📸📖")
 
653
  os.remove(file)
654
  st.session_state['asset_checkboxes'].pop(file, None)
655
  st.success(f"Asset {os.path.basename(file)} vaporized! 💨")
656
+ st.rerun()
657
 
 
658
  update_gallery()
659
 
 
660
  st.sidebar.subheader("Action Logs 📜")
661
  for record in log_records:
662
  st.sidebar.write(f"{record.asctime} - {record.levelname} - {record.message}")
 
664
  st.sidebar.subheader("History 📜")
665
  for entry in st.session_state.get("history", []):
666
  if entry is not None:
667
+ st.sidebar.write(entry)