Spaces:

srimanth-d
/

OCR_app

Sleeping

App Files Files Community

srimanth-d commited on Apr 27

Commit

bcb8309

verified ·

1 Parent(s): 6ccd7a1

Upload app.py

Browse files

Files changed (1) hide show

app.py +104 -0

app.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import re
+import streamlit as st
+from transformers import AutoModel, AutoTokenizer
+import io
+from PIL import Image
+@st.cache_resource
+def load_model():
+    tokenizer = AutoTokenizer.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True)
+    model = AutoModel.from_pretrained("srimanth-d/GOT_CPU", trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True, pad_token_id=151643)
+    model.eval()
+    return model, tokenizer
+def handle_error(error_message):
+    #logging.error(error_message)
+    st.error(f"An error occurred: {error_message}")
+def extract_text(image_bytes, ocr_type):
+    try:
+        model, tokenizer = load_model()
+        image = Image.open(io.BytesIO(image_bytes))
+        image.save("temp_image.png", format="PNG")
+        res = model.chat(tokenizer, "temp_image.png", ocr_type=ocr_type)
+        return res
+    except Exception as e:
+        handle_error(f"Error during OCR extraction: {str(e)}")
+        return None
+def search_keyword(extracted_text, keyword):
+    keyword = re.escape(keyword)
+    regex_pattern = rf'\b({keyword})\b'
+    occurrences = len(re.findall(regex_pattern, extracted_text, flags=re.IGNORECASE))
+    highlighted_text = re.sub(regex_pattern, r"<span style='color:red'><b>\1</b></span>", extracted_text, flags=re.IGNORECASE)
+    return highlighted_text, occurrences
+@st.cache_data
+def cache_image_ocr(image_bytes, ocr_type):
+    return extract_text(image_bytes, ocr_type)
+def app():
+    st.set_page_config(page_title="OCR Tool", layout="wide", page_icon=":chart_with_upwards_trend:")
+    st.header("Optical Character Recognition for English and Hindi Texts")
+    st.write("Upload an image below for OCR:")
+    if 'extracted_text' not in st.session_state:
+        st.session_state.extracted_text = None
+    col1, col2 = st.columns([1, 1])
+    with col1:
+        st.subheader("Upload and OCR Extraction")
+        uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"], accept_multiple_files=False)
+        # Add OCR type selection dropdown
+        ocr_type = st.selectbox("Select OCR Type:", ["ocr", "format"])
+        if uploaded_file is not None:
+            st.image(uploaded_file, caption='Uploaded Image', use_column_width=True)
+            image_bytes = uploaded_file.read()
+            if st.session_state.extracted_text is None:
+                with st.spinner("Extracting the text..."):
+                    extracted_text = cache_image_ocr(image_bytes, ocr_type)
+                    if extracted_text:
+                        st.success("Text extraction completed!", icon="🎉")
+                        st.session_state.extracted_text = extracted_text
+                        st.write("Extracted Text:")
+                        st.write(extracted_text)
+                    else:
+                        st.error("Failed to extract text. Please try with a different image.")
+            else:
+                st.write("Extracted Text:")
+                st.write(st.session_state.extracted_text)
+        else:
+            st.session_state.extracted_text = None
+            st.info("Please upload an image file to proceed.")
+    with col2:
+        st.subheader("Keyword Search")
+        if st.session_state.extracted_text:
+            keyword = st.text_input("Enter keyword to search")
+            if keyword:
+                with st.spinner(f"Searching for '{keyword}'..."):
+                    highlighted_text, occurrences = search_keyword(st.session_state.extracted_text, keyword)
+                    if occurrences > 0:
+                        st.success(f"Found {occurrences} occurrences of the keyword '{keyword}'!")
+                        st.markdown(highlighted_text, unsafe_allow_html=True)
+                    else:
+                        st.warning(f"No occurrences of the keyword '{keyword}' were found.")
+        else:
+            st.info("Please upload an image and extract text first.")
+def main():
+    try:
+        app()
+    except Exception as main_error:
+        handle_error(f"Unexpected error in the main function: {str(main_error)}")
+if __name__ == "__main__":
+    main()