PRIYANSHUDHAKED's picture
Update app.py
a9c96c1 verified
raw
history blame
3.17 kB
import os
import google.generativeai as genai
from PIL import Image
import io
import streamlit as st
import re
# Google Gemini API Key
GOOGLE_API_KEY = os.getenv("AIzaSyD0GxR2J1JxGic807Cc89Jq6MB4aDJYgDc")
# Configure Google Gemini with your API key
genai.configure(api_key=GOOGLE_API_KEY)
# Create a GenerativeModel instance
model = genai.GenerativeModel("gemini-1.5-flash")
def extract_text_with_gemini(image):
prompt = """
Extract all text from this image. Provide the output as plain text,
maintaining the general layout and structure of the document.
Include all visible text, headings, and any important information.
"""
response = model.generate_content([prompt, image])
return response.text
def search_and_highlight(full_text, keyword):
pattern = re.compile(re.escape(keyword), re.IGNORECASE)
matches = list(pattern.finditer(full_text))
if not matches:
return [], full_text
highlighted_text = full_text
html_text = full_text
results = []
for match in reversed(matches):
start, end = match.span()
context_start = max(0, start - 50)
context_end = min(len(full_text), end + 50)
context = full_text[context_start:context_end]
# Highlight for console output
highlighted_context = (
context[:start-context_start] +
'\033[43m' + context[start-context_start:end-context_start] + '\033[0m' +
context[end-context_start:]
)
results.append(highlighted_context)
# Highlight for HTML output
html_text = (
html_text[:start] +
f'<mark>{html_text[start:end]}</mark>' +
html_text[end:]
)
return results, html_text
def app():
st.title("Image OCR and Search")
uploaded_file = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
if uploaded_file is not None:
# Open and display the image
image = Image.open(uploaded_file)
st.image(image, caption="Uploaded Image", use_column_width=True)
print("Extracting text from the image...")
extracted_text = extract_text_with_gemini(image)
st.subheader("Extracted Text:")
st.write(extracted_text)
# Search functionality
search_keyword = st.text_input("Enter a keyword to search (or press Enter to exit)")
if search_keyword:
results, html_text = search_and_highlight(extracted_text, search_keyword)
if results:
st.subheader(f"Keyword '{search_keyword}' found in the extracted text:")
for i, result in enumerate(results, 1):
st.write(f"{i}. ...{result}...")
# Display HTML with highlighted text
st.markdown(f"<p>{html_text}</p>", unsafe_allow_html=True)
else:
st.write(f"Keyword '{search_keyword}' not found in the extracted text.")
st.write("OCR and search completed.")
if __name__ == "__main__":
app()