srimanth-d commited on
Commit
6ccd7a1
·
verified ·
1 Parent(s): ba82074

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -147
app.py DELETED
@@ -1,147 +0,0 @@
1
- import re
2
- import streamlit as st # Importing required libraries
3
- from transformers import AutoModel, AutoTokenizer
4
- import io
5
- #import logging
6
- from PIL import Image
7
-
8
- # Configure logging for error handling
9
- #logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
10
-
11
- # Helper function for logging and displaying errors
12
- def handle_error(error_message):
13
- #logging.error(error_message)
14
- st.error(f"An error occurred: {error_message}")
15
-
16
- # Cache the model and tokenizer to avoid reloading on every run
17
- @st.cache_resource
18
- def load_model():
19
- tokenizer = AutoTokenizer.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True)
20
- model = AutoModel.from_pretrained("srimanth-d/GOT_CPU", trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True, pad_token_id=151643)
21
- model.eval()
22
- return model, tokenizer
23
-
24
- # OCR function using the cached model
25
- def extract_text(image_bytes):
26
- try:
27
- # Load the cached model and tokenizer
28
- model, tokenizer = load_model()
29
-
30
- # Open the image from bytes in memory and convert to PNG for the model
31
- image = Image.open(io.BytesIO(image_bytes))
32
- image.save("temp_image.png", format="PNG")
33
-
34
- # Extract text using the cached model
35
- res = model.chat(tokenizer, "temp_image.png", ocr_type='ocr')
36
- return res
37
-
38
- except Exception as e:
39
- handle_error(f"Error during OCR extraction: {str(e)}")
40
- return None
41
-
42
- # Function to search for the keyword in the extracted text and highlight it in red
43
- def search_keyword(extracted_text, keyword):
44
- # Using regex for case-insensitive and whole-word matching
45
- keyword = re.escape(keyword) # Escape any special characters in the keyword
46
- regex_pattern = rf'\b({keyword})\b' # Match the whole word
47
-
48
- # Count occurrences
49
- occurrences = len(re.findall(regex_pattern, extracted_text, flags=re.IGNORECASE))
50
-
51
- # Highlight the keyword in red using HTML
52
- highlighted_text = re.sub(regex_pattern, r"<span style='color:red'><b>\1</b></span>", extracted_text, flags=re.IGNORECASE)
53
-
54
- return highlighted_text, occurrences
55
-
56
- # Cache the image and OCR results
57
- @st.cache_data
58
- def cache_image_ocr(image_bytes):
59
- return extract_text(image_bytes)
60
-
61
- # Main function for setting up the Streamlit app
62
- def app():
63
- st.set_page_config(
64
- page_title="OCR Tool",
65
- layout="wide",
66
- page_icon=":chart_with_upwards_trend:"
67
- )
68
-
69
- st.header("Optical Character Recognition for English and Hindi Texts")
70
- st.write("Upload an image below for OCR:")
71
-
72
- # Initialize session state to store extracted text
73
- if 'extracted_text' not in st.session_state:
74
- st.session_state.extracted_text = None
75
-
76
- # Create a two-column layout
77
- col1, col2 = st.columns([1, 1]) # Equal width columns
78
-
79
- with col1:
80
- st.subheader("Upload and OCR Extraction")
81
- # File uploader with exception handling
82
- uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"], accept_multiple_files=False)
83
-
84
- if uploaded_file is not None:
85
- # Displaying uploaded image
86
- st.image(uploaded_file, caption='Uploaded Image', use_column_width=True)
87
-
88
- # Convert uploaded file to bytes
89
- image_bytes = uploaded_file.read()
90
-
91
- # Use cache to store the OCR results
92
- if st.session_state.extracted_text is None:
93
- with st.spinner("Extracting the text..."):
94
- # Cache the OCR result
95
- extracted_text = cache_image_ocr(image_bytes)
96
-
97
- if extracted_text:
98
- st.success("Text extraction completed!", icon="🎉")
99
-
100
- # Store the extracted text in session state so it doesn't re-run
101
- st.session_state.extracted_text = extracted_text
102
-
103
- st.write("Extracted Text:")
104
- st.write(extracted_text)
105
-
106
- else:
107
- st.error("Failed to extract text. Please try with a different image.")
108
-
109
- else:
110
- # If text is already in session state, just display it
111
- st.write("Extracted Text:")
112
- st.write(st.session_state.extracted_text)
113
-
114
- else:
115
- # Clear extracted text when the image is removed
116
- st.session_state.extracted_text = None
117
- st.info("Please upload an image file to proceed.")
118
-
119
- # Keyword search functionality (only after text is extracted)
120
- with col2:
121
- st.subheader("Keyword Search")
122
-
123
- if st.session_state.extracted_text:
124
- keyword = st.text_input("Enter keyword to search")
125
-
126
- if keyword:
127
- with st.spinner(f"Searching for '{keyword}'..."):
128
- highlighted_text, occurrences = search_keyword(st.session_state.extracted_text, keyword)
129
-
130
- if occurrences > 0:
131
- st.success(f"Found {occurrences} occurrences of the keyword '{keyword}'!")
132
- # Display the text with red-colored highlights
133
- st.markdown(highlighted_text, unsafe_allow_html=True)
134
- else:
135
- st.warning(f"No occurrences of the keyword '{keyword}' were found.")
136
- else:
137
- st.info("Please upload an image and extract text first.")
138
-
139
- # Main function to launch the app
140
- def main():
141
- try:
142
- app()
143
- except Exception as main_error:
144
- handle_error(f"Unexpected error in the main function: {str(main_error)}")
145
-
146
- if __name__ == "__main__":
147
- main()