srimanth-d commited on
Commit
0a20e12
·
verified ·
1 Parent(s): bcb8309

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -104
app.py DELETED
@@ -1,104 +0,0 @@
1
- import re
2
- import streamlit as st
3
- from transformers import AutoModel, AutoTokenizer
4
- import io
5
- from PIL import Image
6
-
7
- @st.cache_resource
8
- def load_model():
9
- tokenizer = AutoTokenizer.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True)
10
- model = AutoModel.from_pretrained("srimanth-d/GOT_CPU", trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True, pad_token_id=151643)
11
- model.eval()
12
- return model, tokenizer
13
-
14
- def handle_error(error_message):
15
- #logging.error(error_message)
16
- st.error(f"An error occurred: {error_message}")
17
-
18
- def extract_text(image_bytes, ocr_type):
19
- try:
20
- model, tokenizer = load_model()
21
- image = Image.open(io.BytesIO(image_bytes))
22
- image.save("temp_image.png", format="PNG")
23
- res = model.chat(tokenizer, "temp_image.png", ocr_type=ocr_type)
24
- return res
25
- except Exception as e:
26
- handle_error(f"Error during OCR extraction: {str(e)}")
27
- return None
28
-
29
- def search_keyword(extracted_text, keyword):
30
- keyword = re.escape(keyword)
31
- regex_pattern = rf'\b({keyword})\b'
32
- occurrences = len(re.findall(regex_pattern, extracted_text, flags=re.IGNORECASE))
33
- highlighted_text = re.sub(regex_pattern, r"<span style='color:red'><b>\1</b></span>", extracted_text, flags=re.IGNORECASE)
34
- return highlighted_text, occurrences
35
-
36
- @st.cache_data
37
- def cache_image_ocr(image_bytes, ocr_type):
38
- return extract_text(image_bytes, ocr_type)
39
-
40
- def app():
41
- st.set_page_config(page_title="OCR Tool", layout="wide", page_icon=":chart_with_upwards_trend:")
42
- st.header("Optical Character Recognition for English and Hindi Texts")
43
- st.write("Upload an image below for OCR:")
44
-
45
- if 'extracted_text' not in st.session_state:
46
- st.session_state.extracted_text = None
47
-
48
- col1, col2 = st.columns([1, 1])
49
-
50
- with col1:
51
- st.subheader("Upload and OCR Extraction")
52
- uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"], accept_multiple_files=False)
53
-
54
- # Add OCR type selection dropdown
55
- ocr_type = st.selectbox("Select OCR Type:", ["ocr", "format"])
56
-
57
- if uploaded_file is not None:
58
- st.image(uploaded_file, caption='Uploaded Image', use_column_width=True)
59
- image_bytes = uploaded_file.read()
60
-
61
- if st.session_state.extracted_text is None:
62
- with st.spinner("Extracting the text..."):
63
- extracted_text = cache_image_ocr(image_bytes, ocr_type)
64
-
65
- if extracted_text:
66
- st.success("Text extraction completed!", icon="🎉")
67
- st.session_state.extracted_text = extracted_text
68
- st.write("Extracted Text:")
69
- st.write(extracted_text)
70
- else:
71
- st.error("Failed to extract text. Please try with a different image.")
72
- else:
73
- st.write("Extracted Text:")
74
- st.write(st.session_state.extracted_text)
75
- else:
76
- st.session_state.extracted_text = None
77
- st.info("Please upload an image file to proceed.")
78
-
79
- with col2:
80
- st.subheader("Keyword Search")
81
-
82
- if st.session_state.extracted_text:
83
- keyword = st.text_input("Enter keyword to search")
84
-
85
- if keyword:
86
- with st.spinner(f"Searching for '{keyword}'..."):
87
- highlighted_text, occurrences = search_keyword(st.session_state.extracted_text, keyword)
88
-
89
- if occurrences > 0:
90
- st.success(f"Found {occurrences} occurrences of the keyword '{keyword}'!")
91
- st.markdown(highlighted_text, unsafe_allow_html=True)
92
- else:
93
- st.warning(f"No occurrences of the keyword '{keyword}' were found.")
94
- else:
95
- st.info("Please upload an image and extract text first.")
96
-
97
- def main():
98
- try:
99
- app()
100
- except Exception as main_error:
101
- handle_error(f"Unexpected error in the main function: {str(main_error)}")
102
-
103
- if __name__ == "__main__":
104
- main()