awacke1 commited on
Commit
794b68e
·
verified ·
1 Parent(s): c252fa6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -18
app.py CHANGED
@@ -597,16 +597,22 @@ with tab_ocr:
597
  st.header("Test OCR 🔍")
598
  all_files = get_gallery_files()
599
  if all_files:
 
 
600
  if st.button("OCR All Assets 🚀"):
601
  full_text = "# OCR Results\n\n"
602
- for file in all_files:
603
  if file.endswith('.png'):
604
  image = Image.open(file)
605
- else:
606
- doc = fitz.open(file)
607
- pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
608
- image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
609
- doc.close()
 
 
 
 
610
  output_file = generate_filename(f"ocr_{os.path.basename(file)}", "txt")
611
  result = asyncio.run(process_ocr(image, output_file))
612
  full_text += f"## {os.path.basename(file)}\n\n{result}\n\n"
@@ -616,21 +622,28 @@ with tab_ocr:
616
  f.write(full_text)
617
  st.success(f"Full OCR saved to {md_output_file}")
618
  st.markdown(get_download_link(md_output_file, "text/markdown", "Download Full OCR Markdown"), unsafe_allow_html=True)
619
- selected_file = st.selectbox("Select Image or PDF", all_files, key="ocr_select")
620
  if selected_file:
621
  if selected_file.endswith('.png'):
622
  image = Image.open(selected_file)
623
- else:
624
- doc = fitz.open(selected_file)
625
- pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
626
- image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
627
- doc.close()
628
- st.image(image, caption="Input Image", use_container_width=True)
629
- if st.button("Run OCR 🚀", key="ocr_run"):
630
- output_file = generate_filename("ocr_output", "txt")
631
- result = asyncio.run(process_ocr(image, output_file))
632
- st.text_area("OCR Result", result, height=200)
633
- st.session_state['history'].append(f"OCR Test: {selected_file} -> {output_file}")
 
 
 
 
 
 
 
634
 
635
  with tab_build:
636
  st.header("Build Titan 🌱")
 
597
  st.header("Test OCR 🔍")
598
  all_files = get_gallery_files()
599
  if all_files:
600
+ # Filter for only PNG and PDF files
601
+ ocr_files = [f for f in all_files if f.endswith(('.png', '.pdf'))]
602
  if st.button("OCR All Assets 🚀"):
603
  full_text = "# OCR Results\n\n"
604
+ for file in ocr_files:
605
  if file.endswith('.png'):
606
  image = Image.open(file)
607
+ else: # PDF
608
+ try:
609
+ doc = fitz.open(file)
610
+ pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
611
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
612
+ doc.close()
613
+ except Exception as e:
614
+ st.error(f"Failed to process {file}: {str(e)}")
615
+ continue
616
  output_file = generate_filename(f"ocr_{os.path.basename(file)}", "txt")
617
  result = asyncio.run(process_ocr(image, output_file))
618
  full_text += f"## {os.path.basename(file)}\n\n{result}\n\n"
 
622
  f.write(full_text)
623
  st.success(f"Full OCR saved to {md_output_file}")
624
  st.markdown(get_download_link(md_output_file, "text/markdown", "Download Full OCR Markdown"), unsafe_allow_html=True)
625
+ selected_file = st.selectbox("Select Image or PDF", ocr_files, key="ocr_select")
626
  if selected_file:
627
  if selected_file.endswith('.png'):
628
  image = Image.open(selected_file)
629
+ else: # PDF
630
+ try:
631
+ doc = fitz.open(selected_file)
632
+ pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
633
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
634
+ doc.close()
635
+ except Exception as e:
636
+ st.error(f"Cannot process {selected_file}: {str(e)}. Please select a PNG or PDF file.")
637
+ image = None
638
+ if image:
639
+ st.image(image, caption="Input Image", use_container_width=True)
640
+ if st.button("Run OCR 🚀", key="ocr_run"):
641
+ output_file = generate_filename("ocr_output", "txt")
642
+ result = asyncio.run(process_ocr(image, output_file))
643
+ st.text_area("OCR Result", result, height=200)
644
+ st.session_state['history'].append(f"OCR Test: {selected_file} -> {output_file}")
645
+ else:
646
+ st.warning("No assets in gallery yet. Use Camera Snap or Download PDFs!")
647
 
648
  with tab_build:
649
  st.header("Build Titan 🌱")