Spaces:

Mattral
/

HandWritten-Text-Recognizer

Sleeping

App Files Files Community

Mattral commited on Mar 28, 2024

Commit

28cc9b7

verified ·

1 Parent(s): a6259d7

Update app.py

Browse files

Files changed (1) hide show

app.py +226 -7

app.py CHANGED Viewed

@@ -1,12 +1,231 @@
 import os
 import sys
-# Add the 'app' directory to sys.path
-app_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'app')
-if app_dir not in sys.path:
-    sys.path.append(app_dir)
-# Import and run FinalApp.py
-import FinalApp
-FinalApp.main()  # Call the main function directly if defined

 import os
+import cv2
+import numpy as np
+from PIL import Image
+from path import Path
+import streamlit as st
+from typing import Tuple
+import easyocr  # Import EasyOCR
+from pathlib import Path
 import sys
+# Add the 'app' directory to the sys.path
+# Assuming 'app' is in the current working directory
+sys.path.append(str(Path(__file__).parent / 'app'))
+from app.dataloader_iam import Batch
+from app.model import Model, DecoderType
+from app.preprocessor import Preprocessor
+from streamlit_drawable_canvas import st_canvas
+# Set page config at the very beginning (only executed once)
+st.set_page_config(
+    page_title="HTR App",
+    page_icon=":pencil:",
+    layout="centered",
+    initial_sidebar_state="auto",
+)
+ms = st.session_state
+if "themes" not in ms:
+  ms.themes = {"current_theme": "light",
+                    "refreshed": True,
+                    "light": {"theme.base": "dark",
+                              "theme.backgroundColor": "black",
+                              "theme.primaryColor": "#c98bdb",
+                              "theme.secondaryBackgroundColor": "#5591f5",
+                              "theme.textColor": "white",
+                              "theme.textColor": "white",
+                              "button_face": "🌜"},
+                    "dark":  {"theme.base": "light",
+                              "theme.backgroundColor": "white",
+                              "theme.primaryColor": "#5591f5",
+                              "theme.secondaryBackgroundColor": "#82E1D7",
+                              "theme.textColor": "#0a1464",
+                              "button_face": "🌞"},
+                    }
+def ChangeTheme():
+  previous_theme = ms.themes["current_theme"]
+  tdict = ms.themes["light"] if ms.themes["current_theme"] == "light" else ms.themes["dark"]
+  for vkey, vval in tdict.items():
+    if vkey.startswith("theme"): st._config.set_option(vkey, vval)
+  ms.themes["refreshed"] = False
+  if previous_theme == "dark": ms.themes["current_theme"] = "light"
+  elif previous_theme == "light": ms.themes["current_theme"] = "dark"
+btn_face = ms.themes["light"]["button_face"] if ms.themes["current_theme"] == "light" else ms.themes["dark"]["button_face"]
+st.button(btn_face, on_click=ChangeTheme)
+if ms.themes["refreshed"] == False:
+  ms.themes["refreshed"] = True
+  st.rerun()
+def get_img_size(line_mode: bool = False) -> Tuple[int, int]:
+    """
+    Auxiliary method that sets the height and width
+    Height is fixed while width is set according to the Model used.
+    """
+    if line_mode:
+        return 256, get_img_height()
+    return 128, get_img_height()
+def get_img_height() -> int:
+    """
+    Auxiliary method that sets the height, which is fixed for the Neural Network.
+    """
+    return 32
+def infer(line_mode: bool, model: Model, fn_img: Path) -> None:
+    """
+    Auxiliary method that does inference using the pretrained models:
+    Recognizes text in an image given its path.
+    """
+    img = cv2.imread(fn_img, cv2.IMREAD_GRAYSCALE)
+    assert img is not None
+    preprocessor = Preprocessor(get_img_size(line_mode), dynamic_width=True, padding=16)
+    img = preprocessor.process_img(img)
+    batch = Batch([img], None, 1)
+    recognized, probability = model.infer_batch(batch, True)
+    return [recognized, probability]
+def infer_super_model(image_path) -> None:
+    reader = easyocr.Reader(['en'])  # Initialize EasyOCR reader
+    result = reader.readtext(image_path)
+    recognized_texts = [text[1] for text in result]  # Extract recognized texts
+    probabilities = [text[2] for text in result]  # Extract probabilities
+    return recognized_texts, probabilities
+def main():
+    st.title('Extract text from Image Demo')
+    st.markdown("""
+    Streamlit Web Interface for Handwritten Text Recognition (HTR), Optical Character Recognition (OCR)
+                implemented with TensorFlow and trained on the IAM off-line HTR dataset.
+                The model takes images of single words or text lines (multiple words) as input and outputs the recognized text.
+    """, unsafe_allow_html=True)
+    st.markdown("""
+    Predictions can be made using one of two models:
+    - Single_Model (Trained on Single Word Images)
+    - Line_Model (Trained on Text Line Images)
+    - Super_Model ( Most Robust Option for English )
+    - Burmese (Link)
+    """, unsafe_allow_html=True)
+    st.subheader('Select a Model, Choose the Arguments and Draw in the box below or Upload an Image to obtain a prediction.')
+    #Selectors for the model and decoder
+    modelSelect = st.selectbox("Select a Model", ['Single_Model', 'Line_Model', 'Super_Model'])
+    if modelSelect != 'Super_Model':
+        decoderSelect = st.selectbox("Select a Decoder", ['Bestpath', 'Beamsearch', 'Wordbeamsearch'])
+    #Mappings (dictionaries) for the model and decoder. Asigns the directory or the DecoderType of the selected option.
+    modelMapping = {
+        "Single_Model": '../model/word-model',
+        "Line_Model": '../model/line-model'
+    }
+    decoderMapping = {
+        'Bestpath': DecoderType.BestPath,
+        'Beamsearch': DecoderType.BeamSearch,
+        'Wordbeamsearch': DecoderType.WordBeamSearch
+    }
+    #Slider for pencil width
+    strokeWidth = st.slider("Stroke Width: ", 1, 25, 6)
+    #Canvas/Text Box for user input. BackGround Color must be white (#FFFFFF) or else text will not be properly recognised.
+    inputDrawn = st_canvas(
+        fill_color="rgba(255, 165, 0, 0.3)",
+        stroke_width=strokeWidth,
+        update_streamlit=True,
+        background_image=None,
+        height = 200,
+        width = 400,
+        drawing_mode='freedraw',
+        key="canvas",
+        background_color = '#FFFFFF'
+    )
+    #Buffer for user input (images uploaded from the user's device)
+    inputBuffer = st.file_uploader("Upload an Image", type=["png"])
+    #Inference Button
+    inferBool = st.button("Recognize Text")
+    # After clicking the "Recognize Text" button, check if the model selected is Super_Model
+    if inferBool:
+        if modelSelect == 'Super_Model':
+            inputArray = None  # Initialize inputArray to None
+            # Handling uploaded file
+            if inputBuffer is not None:
+                with Image.open(inputBuffer).convert('RGB') as img:
+                    inputArray = np.array(img)
+            # Handling canvas data
+            elif inputDrawn.image_data is not None:
+                # Convert RGBA to RGB
+                inputArray = cv2.cvtColor(np.array(inputDrawn.image_data, dtype=np.uint8), cv2.COLOR_RGBA2RGB)
+            # Now check if inputArray has been set
+            if inputArray is not None:
+                # Initialize EasyOCR Reader
+                reader = easyocr.Reader(['en'])  # Assuming English language; adjust as necessary
+                # Perform OCR
+                results = reader.readtext(inputArray)
+                # Display results
+                all_text = ''
+                for (bbox, text, prob) in results:
+                    all_text += f'{text} (confidence: {prob:.2f})\n'
+                st.write("**Recognized Texts and their Confidence Scores:**")
+                st.text(all_text)
+            else:
+                st.write("No image data found. Please upload an image or draw on the canvas.")
+        else:
+            # Handle other model selections as before
+            if ((inputDrawn.image_data is not None or inputBuffer is not None) and inferBool == True):
+                #We turn the input into a numpy array
+                if inputDrawn.image_data is not None:
+                    inputArray = np.array(inputDrawn.image_data)
+                if inputBuffer is not None:
+                    inputBufferImage = Image.open(inputBuffer)
+                    inputArray = np.array(inputBufferImage)
+                #We turn this array into a .png format and save it.
+                inputImage = Image.fromarray(inputArray.astype('uint8'), 'RGBA')
+                inputImage.save('userInput.png')
+                #We obtain the model directory and the decoder type from their mapping
+                modelDir = modelMapping[modelSelect]
+                decoderType = decoderMapping[decoderSelect]
+                #Finally, we call the model with this image as attribute and display the Best Candidate and its probability on the Interface
+                model = Model(list(open(modelDir + "/charList.txt").read()), modelDir, decoderType, must_restore=True)
+                inferedText = infer(modelDir == '../model/line-model', model, 'userInput.png')
+                st.write("**Best Candidate: **", inferedText[0][0])
+                st.write("**Probability: **", str(inferedText[1][0]*100) + "%")
+if __name__ == "__main__":
+    main()