awacke1 commited on
Commit
c58195d
·
verified ·
1 Parent(s): f269b29

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -106
app.py CHANGED
@@ -54,7 +54,7 @@ st.set_page_config(
54
 
55
  # Initialize st.session_state
56
  if 'captured_files' not in st.session_state:
57
- st.session_state['captured_files'] = []
58
  if 'builder' not in st.session_state:
59
  st.session_state['builder'] = None
60
  if 'model_loaded' not in st.session_state:
@@ -430,25 +430,29 @@ async def process_pdf_snapshot(pdf_path, mode="thumbnail"):
430
  start_time = time.time()
431
  status = st.empty()
432
  status.text(f"Processing PDF Snapshot ({mode})... (0s)")
433
- images = convert_from_path(pdf_path, dpi=200)
434
- output_files = []
435
- if mode == "thumbnail":
436
- img = images[0].resize((int(images[0].width * 0.5), int(images[0].height * 0.5)), Image.Resampling.LANCZOS)
437
- output_file = generate_filename("thumbnail", "png")
438
- img.save(output_file)
439
- output_files.append(output_file)
440
- elif mode == "twopage":
441
- for i in range(min(2, len(images))):
442
- output_file = generate_filename(f"twopage_{i}", "png")
443
- images[i].save(output_file)
444
  output_files.append(output_file)
445
- elapsed = int(time.time() - start_time)
446
- status.text(f"PDF Snapshot ({mode}) completed in {elapsed}s!")
447
- for file in output_files:
448
- if file not in st.session_state['captured_files']:
449
- st.session_state['captured_files'].append(file)
450
- update_gallery()
451
- return output_files
 
 
 
 
 
 
 
 
452
 
453
  async def process_ocr(image, output_file):
454
  start_time = time.time()
@@ -461,8 +465,8 @@ async def process_ocr(image, output_file):
461
  status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
462
  async with aiofiles.open(output_file, "w") as f:
463
  await f.write(result)
464
- if output_file not in st.session_state['captured_files']:
465
- st.session_state['captured_files'].append(output_file)
466
  update_gallery()
467
  return result
468
 
@@ -475,8 +479,8 @@ async def process_image_gen(prompt, output_file):
475
  elapsed = int(time.time() - start_time)
476
  status.text(f"Image Gen completed in {elapsed}s!")
477
  gen_image.save(output_file)
478
- if output_file not in st.session_state['captured_files']:
479
- st.session_state['captured_files'].append(output_file)
480
  update_gallery()
481
  return gen_image
482
 
@@ -492,8 +496,8 @@ async def process_custom_diffusion(images, output_file, model_name):
492
  elapsed = int(time.time() - start_time)
493
  status.text(f"{model_name} completed in {elapsed}s!")
494
  upscaled_image.save(output_file)
495
- if output_file not in st.session_state['captured_files']:
496
- st.session_state['captured_files'].append(output_file)
497
  update_gallery()
498
  return upscaled_image
499
 
@@ -504,17 +508,18 @@ st.title("AI Vision & SFT Titans 🚀")
504
  st.sidebar.header("Captured Files 📜")
505
  gallery_size = st.sidebar.slider("Gallery Size", 1, 10, 4)
506
  def update_gallery():
507
- media_files = get_gallery_files(["png", "txt"])
508
  if media_files:
509
  cols = st.sidebar.columns(2)
510
  for idx, file in enumerate(media_files[:gallery_size * 2]):
511
- with cols[idx % 2]:
512
- if file.endswith(".png"):
513
- st.image(Image.open(file), caption=file, use_container_width=True)
514
- elif file.endswith(".txt"):
515
- with open(file, "r") as f:
516
- content = f.read()
517
- st.text(content[:50] + "..." if len(content) > 50 else content, help=file)
 
518
  update_gallery()
519
 
520
  st.sidebar.subheader("Model Management 🗂️")
@@ -554,50 +559,25 @@ with tab1:
554
  with cols[0]:
555
  cam0_img = st.camera_input("Take a picture - Cam 0", key="cam0")
556
  if cam0_img:
557
- filename = generate_filename(0)
558
- if filename not in st.session_state['captured_files']:
559
- with open(filename, "wb") as f:
560
- f.write(cam0_img.getvalue())
561
- st.image(Image.open(filename), caption=filename, use_container_width=True)
562
- logger.info(f"Saved snapshot from Camera 0: {filename}")
563
- st.session_state['captured_files'].append(filename)
564
- st.session_state['history'].append(f"Snapshot from Cam 0: {filename}")
565
- update_gallery()
566
  with cols[1]:
567
  cam1_img = st.camera_input("Take a picture - Cam 1", key="cam1")
568
  if cam1_img:
569
- filename = generate_filename(1)
570
- if filename not in st.session_state['captured_files']:
571
- with open(filename, "wb") as f:
572
- f.write(cam1_img.getvalue())
573
- st.image(Image.open(filename), caption=filename, use_container_width=True)
574
- logger.info(f"Saved snapshot from Camera 1: {filename}")
575
- st.session_state['captured_files'].append(filename)
576
- st.session_state['history'].append(f"Snapshot from Cam 1: {filename}")
577
- update_gallery()
578
-
579
- st.subheader("Burst Capture")
580
- slice_count = st.number_input("Number of Frames", min_value=1, max_value=20, value=10, key="burst_count")
581
- if st.button("Start Burst Capture 📸"):
582
- st.session_state['burst_frames'] = []
583
- placeholder = st.empty()
584
- for i in range(slice_count):
585
- with placeholder.container():
586
- st.write(f"Capturing frame {i+1}/{slice_count}...")
587
- img = st.camera_input(f"Frame {i}", key=f"burst_{i}_{time.time()}")
588
- if img:
589
- filename = generate_filename(f"burst_{i}")
590
- if filename not in st.session_state['captured_files']:
591
- with open(filename, "wb") as f:
592
- f.write(img.getvalue())
593
- st.session_state['burst_frames'].append(filename)
594
- logger.info(f"Saved burst frame {i}: {filename}")
595
- st.session_state['history'].append(f"Burst frame {i}: {filename}")
596
- st.image(Image.open(filename), caption=filename, use_container_width=True)
597
- time.sleep(0.5)
598
- st.session_state['captured_files'].extend([f for f in st.session_state['burst_frames'] if f not in st.session_state['captured_files']])
599
- update_gallery()
600
- placeholder.success(f"Captured {len(st.session_state['burst_frames'])} frames!")
601
 
602
  with tab2:
603
  st.header("Download PDFs 📥")
@@ -671,9 +651,9 @@ with tab4:
671
  st.markdown(get_download_link(zip_path, "application/zip", "Download Fine-Tuned Titan"), unsafe_allow_html=True)
672
  st.rerun()
673
  elif isinstance(st.session_state['builder'], DiffusionBuilder):
674
- captured_files = get_gallery_files(["png"])
675
  if len(captured_files) >= 2:
676
- demo_data = [{"image": img, "text": f"Superhero {os.path.basename(img).split('.')[0]}"} for img in captured_files[:min(len(captured_files), slice_count)]]
677
  edited_data = st.data_editor(pd.DataFrame(demo_data), num_rows="dynamic")
678
  if st.button("Fine-Tune with Dataset 🔄"):
679
  images = [Image.open(row["image"]) for _, row in edited_data.iterrows()]
@@ -726,7 +706,7 @@ with tab5:
726
  image = st.session_state['builder'].generate(test_prompt)
727
  output_file = generate_filename("diffusion_test", "png")
728
  image.save(output_file)
729
- st.session_state['captured_files'].append(output_file)
730
  st.session_state['history'].append(f"Diffusion Test: {test_prompt} -> {output_file}")
731
  st.image(image, caption="Generated Image")
732
  update_gallery()
@@ -754,54 +734,56 @@ with tab6:
754
  image = agent.generate(row["Image Idea"])
755
  output_file = generate_filename(f"cv_rag_{row['Theme'].lower()}", "png")
756
  image.save(output_file)
757
- st.session_state['captured_files'].append(output_file)
758
  st.image(image, caption=f"{row['Theme']} - {row['Image Idea']}")
759
  update_gallery()
760
 
761
  with tab7:
762
  st.header("Test OCR 🔍")
763
- captured_files = get_gallery_files(["png"])
764
  if captured_files:
765
- selected_file = st.selectbox("Select Image", captured_files, key="ocr_select")
766
- image = Image.open(selected_file)
767
- st.image(image, caption="Input Image", use_container_width=True)
768
- if st.button("Run OCR 🚀", key="ocr_run"):
769
- output_file = generate_filename("ocr_output", "txt")
770
- st.session_state['processing']['ocr'] = True
771
- result = asyncio.run(process_ocr(image, output_file))
772
- st.session_state['history'].append(f"OCR Test: {selected_file} -> {output_file}")
773
- st.text_area("OCR Result", result, height=200, key="ocr_result")
774
- st.success(f"OCR output saved to {output_file}")
775
- st.session_state['processing']['ocr'] = False
 
776
  else:
777
  st.warning("No images captured yet. Use Camera Snap or Download PDFs first!")
778
 
779
  with tab8:
780
  st.header("Test Image Gen 🎨")
781
- captured_files = get_gallery_files(["png"])
782
  if captured_files:
783
- selected_file = st.selectbox("Select Image", captured_files, key="gen_select")
784
- image = Image.open(selected_file)
785
- st.image(image, caption="Reference Image", use_container_width=True)
786
- prompt = st.text_area("Prompt", "Generate a similar superhero image", key="gen_prompt")
787
- if st.button("Run Image Gen 🚀", key="gen_run"):
788
- output_file = generate_filename("gen_output", "png")
789
- st.session_state['processing']['gen'] = True
790
- result = asyncio.run(process_image_gen(prompt, output_file))
791
- st.session_state['history'].append(f"Image Gen Test: {prompt} -> {output_file}")
792
- st.image(result, caption="Generated Image", use_container_width=True)
793
- st.success(f"Image saved to {output_file}")
794
- st.session_state['processing']['gen'] = False
 
795
  else:
796
  st.warning("No images captured yet. Use Camera Snap or Download PDFs first!")
797
 
798
  with tab9:
799
  st.header("Custom Diffusion 🎨🤓")
800
  st.write("Unleash your inner artist with our tiny diffusion models!")
801
- captured_files = get_gallery_files(["png"])
802
  if captured_files:
803
  st.subheader("Select Images to Train")
804
- selected_files = st.multiselect("Pick Images", captured_files, key="diffusion_select")
805
  images = [Image.open(file) for file in selected_files]
806
 
807
  model_options = [
@@ -822,7 +804,7 @@ with tab9:
822
  builder.load_model(model_name)
823
  result = builder.generate("A superhero scene inspired by captured images")
824
  result.save(output_file)
825
- st.session_state['captured_files'].append(output_file)
826
  st.session_state['history'].append(f"Custom Diffusion: {model_choice} -> {output_file}")
827
  st.image(result, caption=f"{model_choice} Masterpiece", use_container_width=True)
828
  st.success(f"Image saved to {output_file}")
 
54
 
55
  # Initialize st.session_state
56
  if 'captured_files' not in st.session_state:
57
+ st.session_state['captured_files'] = {'cam0': None, 'cam1': None} # Dictionary to store one file per camera
58
  if 'builder' not in st.session_state:
59
  st.session_state['builder'] = None
60
  if 'model_loaded' not in st.session_state:
 
430
  start_time = time.time()
431
  status = st.empty()
432
  status.text(f"Processing PDF Snapshot ({mode})... (0s)")
433
+ try:
434
+ images = convert_from_path(pdf_path, dpi=200)
435
+ output_files = []
436
+ if mode == "thumbnail":
437
+ img = images[0].resize((int(images[0].width * 0.5), int(images[0].height * 0.5)), Image.Resampling.LANCZOS)
438
+ output_file = generate_filename("thumbnail", "png")
439
+ img.save(output_file)
 
 
 
 
440
  output_files.append(output_file)
441
+ elif mode == "twopage":
442
+ for i in range(min(2, len(images))):
443
+ output_file = generate_filename(f"twopage_{i}", "png")
444
+ images[i].save(output_file)
445
+ output_files.append(output_file)
446
+ elapsed = int(time.time() - start_time)
447
+ status.text(f"PDF Snapshot ({mode}) completed in {elapsed}s!")
448
+ for file in output_files:
449
+ if file not in st.session_state['captured_files'].values():
450
+ st.session_state['captured_files'][f"pdf_{len(output_files)}"] = file
451
+ update_gallery()
452
+ return output_files
453
+ except Exception as e:
454
+ status.error(f"Failed to process PDF: {str(e)}. Install poppler-utils (e.g., 'sudo apt-get install poppler-utils' on Ubuntu) and ensure it's in PATH.")
455
+ return []
456
 
457
  async def process_ocr(image, output_file):
458
  start_time = time.time()
 
465
  status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
466
  async with aiofiles.open(output_file, "w") as f:
467
  await f.write(result)
468
+ if output_file not in st.session_state['captured_files'].values():
469
+ st.session_state['captured_files']['ocr'] = output_file
470
  update_gallery()
471
  return result
472
 
 
479
  elapsed = int(time.time() - start_time)
480
  status.text(f"Image Gen completed in {elapsed}s!")
481
  gen_image.save(output_file)
482
+ if output_file not in st.session_state['captured_files'].values():
483
+ st.session_state['captured_files']['gen'] = output_file
484
  update_gallery()
485
  return gen_image
486
 
 
496
  elapsed = int(time.time() - start_time)
497
  status.text(f"{model_name} completed in {elapsed}s!")
498
  upscaled_image.save(output_file)
499
+ if output_file not in st.session_state['captured_files'].values():
500
+ st.session_state['captured_files']['diffusion'] = output_file
501
  update_gallery()
502
  return upscaled_image
503
 
 
508
  st.sidebar.header("Captured Files 📜")
509
  gallery_size = st.sidebar.slider("Gallery Size", 1, 10, 4)
510
  def update_gallery():
511
+ media_files = list(st.session_state['captured_files'].values())
512
  if media_files:
513
  cols = st.sidebar.columns(2)
514
  for idx, file in enumerate(media_files[:gallery_size * 2]):
515
+ if file and os.path.exists(file): # Check if file exists
516
+ with cols[idx % 2]:
517
+ if file.endswith(".png"):
518
+ st.image(Image.open(file), caption=os.path.basename(file), use_container_width=True)
519
+ elif file.endswith(".txt"):
520
+ with open(file, "r") as f:
521
+ content = f.read()
522
+ st.text(content[:50] + "..." if len(content) > 50 else content)
523
  update_gallery()
524
 
525
  st.sidebar.subheader("Model Management 🗂️")
 
559
  with cols[0]:
560
  cam0_img = st.camera_input("Take a picture - Cam 0", key="cam0")
561
  if cam0_img:
562
+ filename = generate_filename("cam0")
563
+ with open(filename, "wb") as f:
564
+ f.write(cam0_img.getvalue())
565
+ st.session_state['captured_files']['cam0'] = filename
566
+ st.image(Image.open(filename), caption="Camera 0", use_container_width=True)
567
+ logger.info(f"Saved snapshot from Camera 0: {filename}")
568
+ st.session_state['history'].append(f"Snapshot from Cam 0: {filename}")
569
+ update_gallery()
 
570
  with cols[1]:
571
  cam1_img = st.camera_input("Take a picture - Cam 1", key="cam1")
572
  if cam1_img:
573
+ filename = generate_filename("cam1")
574
+ with open(filename, "wb") as f:
575
+ f.write(cam1_img.getvalue())
576
+ st.session_state['captured_files']['cam1'] = filename
577
+ st.image(Image.open(filename), caption="Camera 1", use_container_width=True)
578
+ logger.info(f"Saved snapshot from Camera 1: {filename}")
579
+ st.session_state['history'].append(f"Snapshot from Cam 1: {filename}")
580
+ update_gallery()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
581
 
582
  with tab2:
583
  st.header("Download PDFs 📥")
 
651
  st.markdown(get_download_link(zip_path, "application/zip", "Download Fine-Tuned Titan"), unsafe_allow_html=True)
652
  st.rerun()
653
  elif isinstance(st.session_state['builder'], DiffusionBuilder):
654
+ captured_files = list(st.session_state['captured_files'].values())
655
  if len(captured_files) >= 2:
656
+ demo_data = [{"image": img, "text": f"Superhero {os.path.basename(img).split('.')[0]}"} for img in captured_files if img]
657
  edited_data = st.data_editor(pd.DataFrame(demo_data), num_rows="dynamic")
658
  if st.button("Fine-Tune with Dataset 🔄"):
659
  images = [Image.open(row["image"]) for _, row in edited_data.iterrows()]
 
706
  image = st.session_state['builder'].generate(test_prompt)
707
  output_file = generate_filename("diffusion_test", "png")
708
  image.save(output_file)
709
+ st.session_state['captured_files']['diffusion_test'] = output_file
710
  st.session_state['history'].append(f"Diffusion Test: {test_prompt} -> {output_file}")
711
  st.image(image, caption="Generated Image")
712
  update_gallery()
 
734
  image = agent.generate(row["Image Idea"])
735
  output_file = generate_filename(f"cv_rag_{row['Theme'].lower()}", "png")
736
  image.save(output_file)
737
+ st.session_state['captured_files'][f"cv_rag_{row['Theme'].lower()}"] = output_file
738
  st.image(image, caption=f"{row['Theme']} - {row['Image Idea']}")
739
  update_gallery()
740
 
741
  with tab7:
742
  st.header("Test OCR 🔍")
743
+ captured_files = list(st.session_state['captured_files'].values())
744
  if captured_files:
745
+ selected_file = st.selectbox("Select Image", [f for f in captured_files if f and f.endswith(".png")], key="ocr_select")
746
+ if selected_file:
747
+ image = Image.open(selected_file)
748
+ st.image(image, caption="Input Image", use_container_width=True)
749
+ if st.button("Run OCR 🚀", key="ocr_run"):
750
+ output_file = generate_filename("ocr_output", "txt")
751
+ st.session_state['processing']['ocr'] = True
752
+ result = asyncio.run(process_ocr(image, output_file))
753
+ st.session_state['history'].append(f"OCR Test: {selected_file} -> {output_file}")
754
+ st.text_area("OCR Result", result, height=200, key="ocr_result")
755
+ st.success(f"OCR output saved to {output_file}")
756
+ st.session_state['processing']['ocr'] = False
757
  else:
758
  st.warning("No images captured yet. Use Camera Snap or Download PDFs first!")
759
 
760
  with tab8:
761
  st.header("Test Image Gen 🎨")
762
+ captured_files = list(st.session_state['captured_files'].values())
763
  if captured_files:
764
+ selected_file = st.selectbox("Select Image", [f for f in captured_files if f and f.endswith(".png")], key="gen_select")
765
+ if selected_file:
766
+ image = Image.open(selected_file)
767
+ st.image(image, caption="Reference Image", use_container_width=True)
768
+ prompt = st.text_area("Prompt", "Generate a similar superhero image", key="gen_prompt")
769
+ if st.button("Run Image Gen 🚀", key="gen_run"):
770
+ output_file = generate_filename("gen_output", "png")
771
+ st.session_state['processing']['gen'] = True
772
+ result = asyncio.run(process_image_gen(prompt, output_file))
773
+ st.session_state['history'].append(f"Image Gen Test: {prompt} -> {output_file}")
774
+ st.image(result, caption="Generated Image", use_container_width=True)
775
+ st.success(f"Image saved to {output_file}")
776
+ st.session_state['processing']['gen'] = False
777
  else:
778
  st.warning("No images captured yet. Use Camera Snap or Download PDFs first!")
779
 
780
  with tab9:
781
  st.header("Custom Diffusion 🎨🤓")
782
  st.write("Unleash your inner artist with our tiny diffusion models!")
783
+ captured_files = list(st.session_state['captured_files'].values())
784
  if captured_files:
785
  st.subheader("Select Images to Train")
786
+ selected_files = st.multiselect("Pick Images", [f for f in captured_files if f and f.endswith(".png")], key="diffusion_select")
787
  images = [Image.open(file) for file in selected_files]
788
 
789
  model_options = [
 
804
  builder.load_model(model_name)
805
  result = builder.generate("A superhero scene inspired by captured images")
806
  result.save(output_file)
807
+ st.session_state['captured_files']['diffusion'] = output_file
808
  st.session_state['history'].append(f"Custom Diffusion: {model_choice} -> {output_file}")
809
  st.image(result, caption=f"{model_choice} Masterpiece", use_container_width=True)
810
  st.success(f"Image saved to {output_file}")