Update app.py
Browse files
app.py
CHANGED
@@ -54,7 +54,7 @@ st.set_page_config(
|
|
54 |
|
55 |
# Initialize st.session_state
|
56 |
if 'captured_files' not in st.session_state:
|
57 |
-
st.session_state['captured_files'] =
|
58 |
if 'builder' not in st.session_state:
|
59 |
st.session_state['builder'] = None
|
60 |
if 'model_loaded' not in st.session_state:
|
@@ -430,25 +430,29 @@ async def process_pdf_snapshot(pdf_path, mode="thumbnail"):
|
|
430 |
start_time = time.time()
|
431 |
status = st.empty()
|
432 |
status.text(f"Processing PDF Snapshot ({mode})... (0s)")
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
elif mode == "twopage":
|
441 |
-
for i in range(min(2, len(images))):
|
442 |
-
output_file = generate_filename(f"twopage_{i}", "png")
|
443 |
-
images[i].save(output_file)
|
444 |
output_files.append(output_file)
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
452 |
|
453 |
async def process_ocr(image, output_file):
|
454 |
start_time = time.time()
|
@@ -461,8 +465,8 @@ async def process_ocr(image, output_file):
|
|
461 |
status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
|
462 |
async with aiofiles.open(output_file, "w") as f:
|
463 |
await f.write(result)
|
464 |
-
if output_file not in st.session_state['captured_files']:
|
465 |
-
st.session_state['captured_files']
|
466 |
update_gallery()
|
467 |
return result
|
468 |
|
@@ -475,8 +479,8 @@ async def process_image_gen(prompt, output_file):
|
|
475 |
elapsed = int(time.time() - start_time)
|
476 |
status.text(f"Image Gen completed in {elapsed}s!")
|
477 |
gen_image.save(output_file)
|
478 |
-
if output_file not in st.session_state['captured_files']:
|
479 |
-
st.session_state['captured_files']
|
480 |
update_gallery()
|
481 |
return gen_image
|
482 |
|
@@ -492,8 +496,8 @@ async def process_custom_diffusion(images, output_file, model_name):
|
|
492 |
elapsed = int(time.time() - start_time)
|
493 |
status.text(f"{model_name} completed in {elapsed}s!")
|
494 |
upscaled_image.save(output_file)
|
495 |
-
if output_file not in st.session_state['captured_files']:
|
496 |
-
st.session_state['captured_files']
|
497 |
update_gallery()
|
498 |
return upscaled_image
|
499 |
|
@@ -504,17 +508,18 @@ st.title("AI Vision & SFT Titans 🚀")
|
|
504 |
st.sidebar.header("Captured Files 📜")
|
505 |
gallery_size = st.sidebar.slider("Gallery Size", 1, 10, 4)
|
506 |
def update_gallery():
|
507 |
-
media_files =
|
508 |
if media_files:
|
509 |
cols = st.sidebar.columns(2)
|
510 |
for idx, file in enumerate(media_files[:gallery_size * 2]):
|
511 |
-
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
-
|
516 |
-
|
517 |
-
|
|
|
518 |
update_gallery()
|
519 |
|
520 |
st.sidebar.subheader("Model Management 🗂️")
|
@@ -554,50 +559,25 @@ with tab1:
|
|
554 |
with cols[0]:
|
555 |
cam0_img = st.camera_input("Take a picture - Cam 0", key="cam0")
|
556 |
if cam0_img:
|
557 |
-
filename = generate_filename(
|
558 |
-
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
update_gallery()
|
566 |
with cols[1]:
|
567 |
cam1_img = st.camera_input("Take a picture - Cam 1", key="cam1")
|
568 |
if cam1_img:
|
569 |
-
filename = generate_filename(
|
570 |
-
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
-
|
577 |
-
update_gallery()
|
578 |
-
|
579 |
-
st.subheader("Burst Capture")
|
580 |
-
slice_count = st.number_input("Number of Frames", min_value=1, max_value=20, value=10, key="burst_count")
|
581 |
-
if st.button("Start Burst Capture 📸"):
|
582 |
-
st.session_state['burst_frames'] = []
|
583 |
-
placeholder = st.empty()
|
584 |
-
for i in range(slice_count):
|
585 |
-
with placeholder.container():
|
586 |
-
st.write(f"Capturing frame {i+1}/{slice_count}...")
|
587 |
-
img = st.camera_input(f"Frame {i}", key=f"burst_{i}_{time.time()}")
|
588 |
-
if img:
|
589 |
-
filename = generate_filename(f"burst_{i}")
|
590 |
-
if filename not in st.session_state['captured_files']:
|
591 |
-
with open(filename, "wb") as f:
|
592 |
-
f.write(img.getvalue())
|
593 |
-
st.session_state['burst_frames'].append(filename)
|
594 |
-
logger.info(f"Saved burst frame {i}: {filename}")
|
595 |
-
st.session_state['history'].append(f"Burst frame {i}: {filename}")
|
596 |
-
st.image(Image.open(filename), caption=filename, use_container_width=True)
|
597 |
-
time.sleep(0.5)
|
598 |
-
st.session_state['captured_files'].extend([f for f in st.session_state['burst_frames'] if f not in st.session_state['captured_files']])
|
599 |
-
update_gallery()
|
600 |
-
placeholder.success(f"Captured {len(st.session_state['burst_frames'])} frames!")
|
601 |
|
602 |
with tab2:
|
603 |
st.header("Download PDFs 📥")
|
@@ -671,9 +651,9 @@ with tab4:
|
|
671 |
st.markdown(get_download_link(zip_path, "application/zip", "Download Fine-Tuned Titan"), unsafe_allow_html=True)
|
672 |
st.rerun()
|
673 |
elif isinstance(st.session_state['builder'], DiffusionBuilder):
|
674 |
-
captured_files =
|
675 |
if len(captured_files) >= 2:
|
676 |
-
demo_data = [{"image": img, "text": f"Superhero {os.path.basename(img).split('.')[0]}"} for img in captured_files
|
677 |
edited_data = st.data_editor(pd.DataFrame(demo_data), num_rows="dynamic")
|
678 |
if st.button("Fine-Tune with Dataset 🔄"):
|
679 |
images = [Image.open(row["image"]) for _, row in edited_data.iterrows()]
|
@@ -726,7 +706,7 @@ with tab5:
|
|
726 |
image = st.session_state['builder'].generate(test_prompt)
|
727 |
output_file = generate_filename("diffusion_test", "png")
|
728 |
image.save(output_file)
|
729 |
-
st.session_state['captured_files']
|
730 |
st.session_state['history'].append(f"Diffusion Test: {test_prompt} -> {output_file}")
|
731 |
st.image(image, caption="Generated Image")
|
732 |
update_gallery()
|
@@ -754,54 +734,56 @@ with tab6:
|
|
754 |
image = agent.generate(row["Image Idea"])
|
755 |
output_file = generate_filename(f"cv_rag_{row['Theme'].lower()}", "png")
|
756 |
image.save(output_file)
|
757 |
-
st.session_state['captured_files'].
|
758 |
st.image(image, caption=f"{row['Theme']} - {row['Image Idea']}")
|
759 |
update_gallery()
|
760 |
|
761 |
with tab7:
|
762 |
st.header("Test OCR 🔍")
|
763 |
-
captured_files =
|
764 |
if captured_files:
|
765 |
-
selected_file = st.selectbox("Select Image", captured_files, key="ocr_select")
|
766 |
-
|
767 |
-
|
768 |
-
|
769 |
-
|
770 |
-
|
771 |
-
|
772 |
-
|
773 |
-
|
774 |
-
|
775 |
-
|
|
|
776 |
else:
|
777 |
st.warning("No images captured yet. Use Camera Snap or Download PDFs first!")
|
778 |
|
779 |
with tab8:
|
780 |
st.header("Test Image Gen 🎨")
|
781 |
-
captured_files =
|
782 |
if captured_files:
|
783 |
-
selected_file = st.selectbox("Select Image", captured_files, key="gen_select")
|
784 |
-
|
785 |
-
|
786 |
-
|
787 |
-
|
788 |
-
|
789 |
-
|
790 |
-
|
791 |
-
|
792 |
-
|
793 |
-
|
794 |
-
|
|
|
795 |
else:
|
796 |
st.warning("No images captured yet. Use Camera Snap or Download PDFs first!")
|
797 |
|
798 |
with tab9:
|
799 |
st.header("Custom Diffusion 🎨🤓")
|
800 |
st.write("Unleash your inner artist with our tiny diffusion models!")
|
801 |
-
captured_files =
|
802 |
if captured_files:
|
803 |
st.subheader("Select Images to Train")
|
804 |
-
selected_files = st.multiselect("Pick Images", captured_files, key="diffusion_select")
|
805 |
images = [Image.open(file) for file in selected_files]
|
806 |
|
807 |
model_options = [
|
@@ -822,7 +804,7 @@ with tab9:
|
|
822 |
builder.load_model(model_name)
|
823 |
result = builder.generate("A superhero scene inspired by captured images")
|
824 |
result.save(output_file)
|
825 |
-
st.session_state['captured_files']
|
826 |
st.session_state['history'].append(f"Custom Diffusion: {model_choice} -> {output_file}")
|
827 |
st.image(result, caption=f"{model_choice} Masterpiece", use_container_width=True)
|
828 |
st.success(f"Image saved to {output_file}")
|
|
|
54 |
|
55 |
# Initialize st.session_state
|
56 |
if 'captured_files' not in st.session_state:
|
57 |
+
st.session_state['captured_files'] = {'cam0': None, 'cam1': None} # Dictionary to store one file per camera
|
58 |
if 'builder' not in st.session_state:
|
59 |
st.session_state['builder'] = None
|
60 |
if 'model_loaded' not in st.session_state:
|
|
|
430 |
start_time = time.time()
|
431 |
status = st.empty()
|
432 |
status.text(f"Processing PDF Snapshot ({mode})... (0s)")
|
433 |
+
try:
|
434 |
+
images = convert_from_path(pdf_path, dpi=200)
|
435 |
+
output_files = []
|
436 |
+
if mode == "thumbnail":
|
437 |
+
img = images[0].resize((int(images[0].width * 0.5), int(images[0].height * 0.5)), Image.Resampling.LANCZOS)
|
438 |
+
output_file = generate_filename("thumbnail", "png")
|
439 |
+
img.save(output_file)
|
|
|
|
|
|
|
|
|
440 |
output_files.append(output_file)
|
441 |
+
elif mode == "twopage":
|
442 |
+
for i in range(min(2, len(images))):
|
443 |
+
output_file = generate_filename(f"twopage_{i}", "png")
|
444 |
+
images[i].save(output_file)
|
445 |
+
output_files.append(output_file)
|
446 |
+
elapsed = int(time.time() - start_time)
|
447 |
+
status.text(f"PDF Snapshot ({mode}) completed in {elapsed}s!")
|
448 |
+
for file in output_files:
|
449 |
+
if file not in st.session_state['captured_files'].values():
|
450 |
+
st.session_state['captured_files'][f"pdf_{len(output_files)}"] = file
|
451 |
+
update_gallery()
|
452 |
+
return output_files
|
453 |
+
except Exception as e:
|
454 |
+
status.error(f"Failed to process PDF: {str(e)}. Install poppler-utils (e.g., 'sudo apt-get install poppler-utils' on Ubuntu) and ensure it's in PATH.")
|
455 |
+
return []
|
456 |
|
457 |
async def process_ocr(image, output_file):
|
458 |
start_time = time.time()
|
|
|
465 |
status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
|
466 |
async with aiofiles.open(output_file, "w") as f:
|
467 |
await f.write(result)
|
468 |
+
if output_file not in st.session_state['captured_files'].values():
|
469 |
+
st.session_state['captured_files']['ocr'] = output_file
|
470 |
update_gallery()
|
471 |
return result
|
472 |
|
|
|
479 |
elapsed = int(time.time() - start_time)
|
480 |
status.text(f"Image Gen completed in {elapsed}s!")
|
481 |
gen_image.save(output_file)
|
482 |
+
if output_file not in st.session_state['captured_files'].values():
|
483 |
+
st.session_state['captured_files']['gen'] = output_file
|
484 |
update_gallery()
|
485 |
return gen_image
|
486 |
|
|
|
496 |
elapsed = int(time.time() - start_time)
|
497 |
status.text(f"{model_name} completed in {elapsed}s!")
|
498 |
upscaled_image.save(output_file)
|
499 |
+
if output_file not in st.session_state['captured_files'].values():
|
500 |
+
st.session_state['captured_files']['diffusion'] = output_file
|
501 |
update_gallery()
|
502 |
return upscaled_image
|
503 |
|
|
|
508 |
st.sidebar.header("Captured Files 📜")
|
509 |
gallery_size = st.sidebar.slider("Gallery Size", 1, 10, 4)
|
510 |
def update_gallery():
|
511 |
+
media_files = list(st.session_state['captured_files'].values())
|
512 |
if media_files:
|
513 |
cols = st.sidebar.columns(2)
|
514 |
for idx, file in enumerate(media_files[:gallery_size * 2]):
|
515 |
+
if file and os.path.exists(file): # Check if file exists
|
516 |
+
with cols[idx % 2]:
|
517 |
+
if file.endswith(".png"):
|
518 |
+
st.image(Image.open(file), caption=os.path.basename(file), use_container_width=True)
|
519 |
+
elif file.endswith(".txt"):
|
520 |
+
with open(file, "r") as f:
|
521 |
+
content = f.read()
|
522 |
+
st.text(content[:50] + "..." if len(content) > 50 else content)
|
523 |
update_gallery()
|
524 |
|
525 |
st.sidebar.subheader("Model Management 🗂️")
|
|
|
559 |
with cols[0]:
|
560 |
cam0_img = st.camera_input("Take a picture - Cam 0", key="cam0")
|
561 |
if cam0_img:
|
562 |
+
filename = generate_filename("cam0")
|
563 |
+
with open(filename, "wb") as f:
|
564 |
+
f.write(cam0_img.getvalue())
|
565 |
+
st.session_state['captured_files']['cam0'] = filename
|
566 |
+
st.image(Image.open(filename), caption="Camera 0", use_container_width=True)
|
567 |
+
logger.info(f"Saved snapshot from Camera 0: {filename}")
|
568 |
+
st.session_state['history'].append(f"Snapshot from Cam 0: {filename}")
|
569 |
+
update_gallery()
|
|
|
570 |
with cols[1]:
|
571 |
cam1_img = st.camera_input("Take a picture - Cam 1", key="cam1")
|
572 |
if cam1_img:
|
573 |
+
filename = generate_filename("cam1")
|
574 |
+
with open(filename, "wb") as f:
|
575 |
+
f.write(cam1_img.getvalue())
|
576 |
+
st.session_state['captured_files']['cam1'] = filename
|
577 |
+
st.image(Image.open(filename), caption="Camera 1", use_container_width=True)
|
578 |
+
logger.info(f"Saved snapshot from Camera 1: {filename}")
|
579 |
+
st.session_state['history'].append(f"Snapshot from Cam 1: {filename}")
|
580 |
+
update_gallery()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
581 |
|
582 |
with tab2:
|
583 |
st.header("Download PDFs 📥")
|
|
|
651 |
st.markdown(get_download_link(zip_path, "application/zip", "Download Fine-Tuned Titan"), unsafe_allow_html=True)
|
652 |
st.rerun()
|
653 |
elif isinstance(st.session_state['builder'], DiffusionBuilder):
|
654 |
+
captured_files = list(st.session_state['captured_files'].values())
|
655 |
if len(captured_files) >= 2:
|
656 |
+
demo_data = [{"image": img, "text": f"Superhero {os.path.basename(img).split('.')[0]}"} for img in captured_files if img]
|
657 |
edited_data = st.data_editor(pd.DataFrame(demo_data), num_rows="dynamic")
|
658 |
if st.button("Fine-Tune with Dataset 🔄"):
|
659 |
images = [Image.open(row["image"]) for _, row in edited_data.iterrows()]
|
|
|
706 |
image = st.session_state['builder'].generate(test_prompt)
|
707 |
output_file = generate_filename("diffusion_test", "png")
|
708 |
image.save(output_file)
|
709 |
+
st.session_state['captured_files']['diffusion_test'] = output_file
|
710 |
st.session_state['history'].append(f"Diffusion Test: {test_prompt} -> {output_file}")
|
711 |
st.image(image, caption="Generated Image")
|
712 |
update_gallery()
|
|
|
734 |
image = agent.generate(row["Image Idea"])
|
735 |
output_file = generate_filename(f"cv_rag_{row['Theme'].lower()}", "png")
|
736 |
image.save(output_file)
|
737 |
+
st.session_state['captured_files'][f"cv_rag_{row['Theme'].lower()}"] = output_file
|
738 |
st.image(image, caption=f"{row['Theme']} - {row['Image Idea']}")
|
739 |
update_gallery()
|
740 |
|
741 |
with tab7:
|
742 |
st.header("Test OCR 🔍")
|
743 |
+
captured_files = list(st.session_state['captured_files'].values())
|
744 |
if captured_files:
|
745 |
+
selected_file = st.selectbox("Select Image", [f for f in captured_files if f and f.endswith(".png")], key="ocr_select")
|
746 |
+
if selected_file:
|
747 |
+
image = Image.open(selected_file)
|
748 |
+
st.image(image, caption="Input Image", use_container_width=True)
|
749 |
+
if st.button("Run OCR 🚀", key="ocr_run"):
|
750 |
+
output_file = generate_filename("ocr_output", "txt")
|
751 |
+
st.session_state['processing']['ocr'] = True
|
752 |
+
result = asyncio.run(process_ocr(image, output_file))
|
753 |
+
st.session_state['history'].append(f"OCR Test: {selected_file} -> {output_file}")
|
754 |
+
st.text_area("OCR Result", result, height=200, key="ocr_result")
|
755 |
+
st.success(f"OCR output saved to {output_file}")
|
756 |
+
st.session_state['processing']['ocr'] = False
|
757 |
else:
|
758 |
st.warning("No images captured yet. Use Camera Snap or Download PDFs first!")
|
759 |
|
760 |
with tab8:
|
761 |
st.header("Test Image Gen 🎨")
|
762 |
+
captured_files = list(st.session_state['captured_files'].values())
|
763 |
if captured_files:
|
764 |
+
selected_file = st.selectbox("Select Image", [f for f in captured_files if f and f.endswith(".png")], key="gen_select")
|
765 |
+
if selected_file:
|
766 |
+
image = Image.open(selected_file)
|
767 |
+
st.image(image, caption="Reference Image", use_container_width=True)
|
768 |
+
prompt = st.text_area("Prompt", "Generate a similar superhero image", key="gen_prompt")
|
769 |
+
if st.button("Run Image Gen 🚀", key="gen_run"):
|
770 |
+
output_file = generate_filename("gen_output", "png")
|
771 |
+
st.session_state['processing']['gen'] = True
|
772 |
+
result = asyncio.run(process_image_gen(prompt, output_file))
|
773 |
+
st.session_state['history'].append(f"Image Gen Test: {prompt} -> {output_file}")
|
774 |
+
st.image(result, caption="Generated Image", use_container_width=True)
|
775 |
+
st.success(f"Image saved to {output_file}")
|
776 |
+
st.session_state['processing']['gen'] = False
|
777 |
else:
|
778 |
st.warning("No images captured yet. Use Camera Snap or Download PDFs first!")
|
779 |
|
780 |
with tab9:
|
781 |
st.header("Custom Diffusion 🎨🤓")
|
782 |
st.write("Unleash your inner artist with our tiny diffusion models!")
|
783 |
+
captured_files = list(st.session_state['captured_files'].values())
|
784 |
if captured_files:
|
785 |
st.subheader("Select Images to Train")
|
786 |
+
selected_files = st.multiselect("Pick Images", [f for f in captured_files if f and f.endswith(".png")], key="diffusion_select")
|
787 |
images = [Image.open(file) for file in selected_files]
|
788 |
|
789 |
model_options = [
|
|
|
804 |
builder.load_model(model_name)
|
805 |
result = builder.generate("A superhero scene inspired by captured images")
|
806 |
result.save(output_file)
|
807 |
+
st.session_state['captured_files']['diffusion'] = output_file
|
808 |
st.session_state['history'].append(f"Custom Diffusion: {model_choice} -> {output_file}")
|
809 |
st.image(result, caption=f"{model_choice} Masterpiece", use_container_width=True)
|
810 |
st.success(f"Image saved to {output_file}")
|