srimanth-d commited on
Commit
bcb8309
·
verified ·
1 Parent(s): 6ccd7a1

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -0
app.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import streamlit as st
3
+ from transformers import AutoModel, AutoTokenizer
4
+ import io
5
+ from PIL import Image
6
+
7
+ @st.cache_resource
8
+ def load_model():
9
+ tokenizer = AutoTokenizer.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True)
10
+ model = AutoModel.from_pretrained("srimanth-d/GOT_CPU", trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True, pad_token_id=151643)
11
+ model.eval()
12
+ return model, tokenizer
13
+
14
+ def handle_error(error_message):
15
+ #logging.error(error_message)
16
+ st.error(f"An error occurred: {error_message}")
17
+
18
+ def extract_text(image_bytes, ocr_type):
19
+ try:
20
+ model, tokenizer = load_model()
21
+ image = Image.open(io.BytesIO(image_bytes))
22
+ image.save("temp_image.png", format="PNG")
23
+ res = model.chat(tokenizer, "temp_image.png", ocr_type=ocr_type)
24
+ return res
25
+ except Exception as e:
26
+ handle_error(f"Error during OCR extraction: {str(e)}")
27
+ return None
28
+
29
+ def search_keyword(extracted_text, keyword):
30
+ keyword = re.escape(keyword)
31
+ regex_pattern = rf'\b({keyword})\b'
32
+ occurrences = len(re.findall(regex_pattern, extracted_text, flags=re.IGNORECASE))
33
+ highlighted_text = re.sub(regex_pattern, r"<span style='color:red'><b>\1</b></span>", extracted_text, flags=re.IGNORECASE)
34
+ return highlighted_text, occurrences
35
+
36
+ @st.cache_data
37
+ def cache_image_ocr(image_bytes, ocr_type):
38
+ return extract_text(image_bytes, ocr_type)
39
+
40
+ def app():
41
+ st.set_page_config(page_title="OCR Tool", layout="wide", page_icon=":chart_with_upwards_trend:")
42
+ st.header("Optical Character Recognition for English and Hindi Texts")
43
+ st.write("Upload an image below for OCR:")
44
+
45
+ if 'extracted_text' not in st.session_state:
46
+ st.session_state.extracted_text = None
47
+
48
+ col1, col2 = st.columns([1, 1])
49
+
50
+ with col1:
51
+ st.subheader("Upload and OCR Extraction")
52
+ uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"], accept_multiple_files=False)
53
+
54
+ # Add OCR type selection dropdown
55
+ ocr_type = st.selectbox("Select OCR Type:", ["ocr", "format"])
56
+
57
+ if uploaded_file is not None:
58
+ st.image(uploaded_file, caption='Uploaded Image', use_column_width=True)
59
+ image_bytes = uploaded_file.read()
60
+
61
+ if st.session_state.extracted_text is None:
62
+ with st.spinner("Extracting the text..."):
63
+ extracted_text = cache_image_ocr(image_bytes, ocr_type)
64
+
65
+ if extracted_text:
66
+ st.success("Text extraction completed!", icon="🎉")
67
+ st.session_state.extracted_text = extracted_text
68
+ st.write("Extracted Text:")
69
+ st.write(extracted_text)
70
+ else:
71
+ st.error("Failed to extract text. Please try with a different image.")
72
+ else:
73
+ st.write("Extracted Text:")
74
+ st.write(st.session_state.extracted_text)
75
+ else:
76
+ st.session_state.extracted_text = None
77
+ st.info("Please upload an image file to proceed.")
78
+
79
+ with col2:
80
+ st.subheader("Keyword Search")
81
+
82
+ if st.session_state.extracted_text:
83
+ keyword = st.text_input("Enter keyword to search")
84
+
85
+ if keyword:
86
+ with st.spinner(f"Searching for '{keyword}'..."):
87
+ highlighted_text, occurrences = search_keyword(st.session_state.extracted_text, keyword)
88
+
89
+ if occurrences > 0:
90
+ st.success(f"Found {occurrences} occurrences of the keyword '{keyword}'!")
91
+ st.markdown(highlighted_text, unsafe_allow_html=True)
92
+ else:
93
+ st.warning(f"No occurrences of the keyword '{keyword}' were found.")
94
+ else:
95
+ st.info("Please upload an image and extract text first.")
96
+
97
+ def main():
98
+ try:
99
+ app()
100
+ except Exception as main_error:
101
+ handle_error(f"Unexpected error in the main function: {str(main_error)}")
102
+
103
+ if __name__ == "__main__":
104
+ main()