PRIYANSHUDHAKED commited on
Commit
d1a52ca
·
verified ·
1 Parent(s): 35f540b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -30
app.py CHANGED
@@ -17,9 +17,12 @@ model = genai.GenerativeModel("gemini-1.5-flash")
17
  def extract_text_with_gemini(image, keyword=None):
18
  if keyword:
19
  prompt = f"""
20
- Extract all text from this image. Provide the output as HTML, maintaining the general layout and structure of the document. Include all visible text, headings, and any important information.
21
- Highlight all instances of the keyword '{keyword}' (case-insensitive) with a yellow background using HTML span tags.
 
 
22
  For example: <span style="background-color: yellow;">keyword</span>
 
23
  """
24
  else:
25
  prompt = """
@@ -35,6 +38,18 @@ def extract_text_with_gemini(image, keyword=None):
35
 
36
  return text
37
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  def search_and_highlight(full_text, keyword):
39
  pattern = re.compile(re.escape(keyword), re.IGNORECASE)
40
  matches = list(pattern.finditer(full_text))
@@ -67,7 +82,7 @@ def search_and_highlight(full_text, keyword):
67
  return results, highlighted_text
68
 
69
  def app():
70
- st.title("Image OCR and Search")
71
  uploaded_file = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
72
 
73
  if uploaded_file is not None:
@@ -78,36 +93,45 @@ def app():
78
  # Select search method
79
  search_method = st.radio("Choose search method:",
80
  ("Extract text first, then search",
81
- "Search while extracting text"))
82
 
83
  search_keyword = st.text_input("Enter a keyword to search (or press Enter to exit)")
84
 
85
- if st.button("Process Image"):
86
- if search_method == "Extract text first, then search":
87
- print("Extracting text from the image...")
88
- extracted_text = extract_text_with_gemini(image)
89
- st.subheader("Extracted Text:")
90
- st.write(extracted_text)
91
-
92
- if search_keyword:
93
- results, highlighted_text = search_and_highlight(extracted_text, search_keyword)
94
- if results:
95
- st.subheader(f"Keyword '{search_keyword}' found in the extracted text:")
96
- for i, result in enumerate(results, 1):
97
- st.markdown(f"{i}. ...{result}...", unsafe_allow_html=True)
98
-
99
- st.subheader("Full Text with Highlighted Keywords:")
100
- st.markdown(highlighted_text, unsafe_allow_html=True)
101
- else:
102
- st.write(f"Keyword '{search_keyword}' not found in the extracted text.")
103
-
104
- else: # Search while extracting text
105
- print("Extracting text and highlighting keyword...")
106
- highlighted_text = extract_text_with_gemini(image, search_keyword)
107
- st.subheader("Extracted Text with Highlighted Keyword:")
108
- st.markdown(highlighted_text, unsafe_allow_html=True)
109
-
110
- st.write("OCR and search completed.")
 
 
 
 
 
 
 
 
 
111
 
112
  if __name__ == "__main__":
113
  app()
 
17
  def extract_text_with_gemini(image, keyword=None):
18
  if keyword:
19
  prompt = f"""
20
+ 1. Extract all text from this image.
21
+ 2. Search for the keyword '{keyword}' (case-insensitive) in the extracted text.
22
+ 3. Provide the output as HTML, maintaining the general layout and structure of the document.
23
+ 4. Highlight all instances of the keyword '{keyword}' with a yellow background using HTML span tags.
24
  For example: <span style="background-color: yellow;">keyword</span>
25
+ 5. If the keyword is not found, simply return the extracted text without highlighting.
26
  """
27
  else:
28
  prompt = """
 
38
 
39
  return text
40
 
41
+ def extract_ner_with_gemini(image):
42
+ prompt = """
43
+ Analyze this image and extract all Named Entities (NER) present in the text.
44
+ Categorize them into types such as Person, Organization, Location, Date, etc.
45
+ Provide the output as a formatted list with categories and entities.
46
+ """
47
+
48
+ response = model.generate_content([prompt, image])
49
+ ner_text = response.text
50
+
51
+ return ner_text
52
+
53
  def search_and_highlight(full_text, keyword):
54
  pattern = re.compile(re.escape(keyword), re.IGNORECASE)
55
  matches = list(pattern.finditer(full_text))
 
82
  return results, highlighted_text
83
 
84
  def app():
85
+ st.title("Image OCR, Search, and NER Extraction")
86
  uploaded_file = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
87
 
88
  if uploaded_file is not None:
 
93
  # Select search method
94
  search_method = st.radio("Choose search method:",
95
  ("Extract text first, then search",
96
+ "Search while extracting text (using Gemini API)"))
97
 
98
  search_keyword = st.text_input("Enter a keyword to search (or press Enter to exit)")
99
 
100
+ col1, col2 = st.columns(2)
101
+ with col1:
102
+ if st.button("Process Image"):
103
+ if search_method == "Extract text first, then search":
104
+ print("Extracting text from the image...")
105
+ extracted_text = extract_text_with_gemini(image)
106
+ st.subheader("Extracted Text:")
107
+ st.write(extracted_text)
108
+
109
+ if search_keyword:
110
+ results, highlighted_text = search_and_highlight(extracted_text, search_keyword)
111
+ if results:
112
+ st.subheader(f"Keyword '{search_keyword}' found in the extracted text:")
113
+ for i, result in enumerate(results, 1):
114
+ st.markdown(f"{i}. ...{result}...", unsafe_allow_html=True)
115
+
116
+ st.subheader("Full Text with Highlighted Keywords:")
117
+ st.markdown(highlighted_text, unsafe_allow_html=True)
118
+ else:
119
+ st.write(f"Keyword '{search_keyword}' not found in the extracted text.")
120
+
121
+ else: # Search while extracting text using Gemini API
122
+ print("Extracting text and searching keyword using Gemini API...")
123
+ highlighted_text = extract_text_with_gemini(image, search_keyword)
124
+ st.subheader("Extracted Text with Highlighted Keyword:")
125
+ st.markdown(highlighted_text, unsafe_allow_html=True)
126
+
127
+ st.write("OCR and search completed.")
128
+
129
+ with col2:
130
+ if st.button("Extract NER"):
131
+ print("Extracting Named Entities...")
132
+ ner_results = extract_ner_with_gemini(image)
133
+ st.subheader("Named Entities Extracted:")
134
+ st.write(ner_results)
135
 
136
  if __name__ == "__main__":
137
  app()