Spaces:

bulentsoykan
/

streamlit-OCR-app

Running

App Files Files Community

streamlit-OCR-app / app.py

bulentsoykan

Update app.py

feae4d7 verified 4 months ago

raw

history blame contribute delete

5.53 kB

	import streamlit as st
	from PIL import Image
	import io
	import base64
	import requests
	import os

	# Page configuration
	st.set_page_config(
	page_title="Vision OCR",
	page_icon="🔎",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Set up Hugging Face API
	HF_API_KEY = os.environ.get("HF_API_KEY", "") # Get API key from environment variable
	if not HF_API_KEY:
	HF_API_KEY = st.secrets.get("HF_API_KEY", "") # Try getting from Streamlit secrets

	# Hugging Face API function
	def process_image_with_hf(image_bytes, model_id):
	API_URL = f"https://api-inference.huggingface.co/models/{model_id}"
	headers = {"Authorization": f"Bearer {HF_API_KEY}"}

	# Convert image to base64
	image_b64 = base64.b64encode(image_bytes).decode('utf-8')

	# Prepare payload based on model type
	if "llava" in model_id.lower():
	payload = {
	"inputs": {
	"image": image_b64,
	"prompt": """Analyze the text in the provided image. Extract all readable content
	and present it in a structured Markdown format that is clear, concise,
	and well-organized. Ensure proper formatting (e.g., headings, lists, or
	code blocks) as necessary to represent the content effectively."""
	},
	"parameters": {
	"max_new_tokens": 1024
	}
	}
	else:
	# Generic payload format for other models
	payload = {
	"inputs": {
	"image": image_b64,
	"text": """Analyze the text in the provided image. Extract all readable content
	and present it in a structured Markdown format that is clear, concise,
	and well-organized. Ensure proper formatting (e.g., headings, lists, or
	code blocks) as necessary to represent the content effectively."""
	}
	}

	# Make API request
	response = requests.post(API_URL, headers=headers, json=payload)

	if response.status_code != 200:
	raise Exception(f"API request failed with status code {response.status_code}: {response.text}")

	# Handle different response formats
	response_json = response.json()
	if isinstance(response_json, list):
	return response_json[0]["generated_text"]
	elif isinstance(response_json, dict):
	if "generated_text" in response_json:
	return response_json["generated_text"]
	elif "text" in response_json:
	return response_json["text"]

	# Fallback
	return str(response_json)

	# Title and description in main area
	try:
	# Try to load the image from assets folder
	st.markdown("""
	# <img src="data:image/png;base64,{}" width="50" style="vertical-align: -12px;"> Vision OCR
	""".format(base64.b64encode(open("./assets/gemma3.png", "rb").read()).decode()), unsafe_allow_html=True)
	except FileNotFoundError:
	# Fallback if image doesn't exist
	st.title("Vision OCR")

	# Add clear button to top right
	col1, col2 = st.columns([6,1])
	with col2:
	if st.button("Clear 🗑️"):
	if 'ocr_result' in st.session_state:
	del st.session_state['ocr_result']
	st.rerun()

	st.markdown('<p style="margin-top: -20px;">Extract structured text from images using advanced vision models!</p>', unsafe_allow_html=True)
	st.markdown("---")

	# Add model selection
	with st.sidebar:
	st.header("Settings")
	model_option = st.selectbox(
	"Select Vision Model",
	["LLaVA-1.5-7B", "MiniGPT-4", "Idefics"],
	index=0
	)

	# Updated model mapping with confirmed working models
	model_mapping = {
	"LLaVA-1.5-7B": "llava-hf/llava-1.5-7b-hf",
	"MiniGPT-4": "Vision-CAIR/MiniGPT-4",
	"Idefics": "HuggingFaceM4/idefics-9b-instruct"
	}

	selected_model = model_mapping[model_option]

	st.header("Upload Image")
	uploaded_file = st.file_uploader("Choose an image...", type=['png', 'jpg', 'jpeg'])

	if uploaded_file is not None:
	# Display the uploaded image
	image = Image.open(uploaded_file)
	st.image(image, caption="Uploaded Image")

	# Check if API key is available
	if not HF_API_KEY:
	st.error("Hugging Face API key is missing. Please set it as an environment variable or in Streamlit secrets.")
	else:
	if st.button("Extract Text 🔍", type="primary"):
	with st.spinner(f"Processing image with {model_option}..."):
	try:
	# Get image bytes
	img_bytes = uploaded_file.getvalue()

	# Process with Hugging Face API using selected model
	result = process_image_with_hf(img_bytes, selected_model)
	st.session_state['ocr_result'] = result
	except Exception as e:
	st.error(f"Error processing image: {str(e)}")
	st.info("Try selecting a different model from the dropdown.")

	# Main content area for results
	if 'ocr_result' in st.session_state:
	st.markdown(st.session_state['ocr_result'])
	else:
	st.info("Upload an image and click 'Extract Text' to see the results here.")

	# Footer
	st.markdown("---")
	st.markdown("Made with ❤️ using Hugging Face Vision Models \| [Report an Issue](https://github.com/bulentsoykan/streamlit-OCR-app/issues)")