Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,7 @@ import os
|
|
7 |
|
8 |
# Page configuration
|
9 |
st.set_page_config(
|
10 |
-
page_title="
|
11 |
page_icon="π",
|
12 |
layout="wide",
|
13 |
initial_sidebar_state="expanded"
|
@@ -20,7 +20,8 @@ if not HF_API_KEY:
|
|
20 |
|
21 |
# Hugging Face API function
|
22 |
def process_image_with_hf(image_bytes):
|
23 |
-
|
|
|
24 |
headers = {"Authorization": f"Bearer {HF_API_KEY}"}
|
25 |
|
26 |
# Convert image to base64
|
@@ -43,17 +44,23 @@ def process_image_with_hf(image_bytes):
|
|
43 |
if response.status_code != 200:
|
44 |
raise Exception(f"API request failed with status code {response.status_code}: {response.text}")
|
45 |
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
# Title and description in main area
|
49 |
try:
|
50 |
# Try to load the image from assets folder
|
51 |
st.markdown("""
|
52 |
-
# <img src="data:image/png;base64,{}" width="50" style="vertical-align: -12px;">
|
53 |
""".format(base64.b64encode(open("./assets/gemma3.png", "rb").read()).decode()), unsafe_allow_html=True)
|
54 |
except FileNotFoundError:
|
55 |
# Fallback if image doesn't exist
|
56 |
-
st.title("
|
57 |
|
58 |
# Add clear button to top right
|
59 |
col1, col2 = st.columns([6,1])
|
@@ -63,11 +70,27 @@ with col2:
|
|
63 |
del st.session_state['ocr_result']
|
64 |
st.rerun()
|
65 |
|
66 |
-
st.markdown('<p style="margin-top: -20px;">Extract structured text from images using
|
67 |
st.markdown("---")
|
68 |
|
69 |
-
#
|
70 |
with st.sidebar:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
st.header("Upload Image")
|
72 |
uploaded_file = st.file_uploader("Choose an image...", type=['png', 'jpg', 'jpeg'])
|
73 |
|
@@ -81,8 +104,11 @@ with st.sidebar:
|
|
81 |
st.error("Hugging Face API key is missing. Please set it as an environment variable or in Streamlit secrets.")
|
82 |
else:
|
83 |
if st.button("Extract Text π", type="primary"):
|
84 |
-
with st.spinner("Processing image..."):
|
85 |
try:
|
|
|
|
|
|
|
86 |
# Get image bytes
|
87 |
img_bytes = uploaded_file.getvalue()
|
88 |
|
@@ -91,6 +117,7 @@ with st.sidebar:
|
|
91 |
st.session_state['ocr_result'] = result
|
92 |
except Exception as e:
|
93 |
st.error(f"Error processing image: {str(e)}")
|
|
|
94 |
|
95 |
# Main content area for results
|
96 |
if 'ocr_result' in st.session_state:
|
@@ -100,4 +127,4 @@ else:
|
|
100 |
|
101 |
# Footer
|
102 |
st.markdown("---")
|
103 |
-
st.markdown("Made with using
|
|
|
7 |
|
8 |
# Page configuration
|
9 |
st.set_page_config(
|
10 |
+
page_title="Vision OCR",
|
11 |
page_icon="π",
|
12 |
layout="wide",
|
13 |
initial_sidebar_state="expanded"
|
|
|
20 |
|
21 |
# Hugging Face API function
|
22 |
def process_image_with_hf(image_bytes):
|
23 |
+
# Use an available multimodal model that can handle images and text
|
24 |
+
API_URL = "https://api-inference.huggingface.co/models/llava-hf/llava-1.5-7b-hf"
|
25 |
headers = {"Authorization": f"Bearer {HF_API_KEY}"}
|
26 |
|
27 |
# Convert image to base64
|
|
|
44 |
if response.status_code != 200:
|
45 |
raise Exception(f"API request failed with status code {response.status_code}: {response.text}")
|
46 |
|
47 |
+
# Handle different response formats
|
48 |
+
if isinstance(response.json(), list):
|
49 |
+
return response.json()[0]["generated_text"]
|
50 |
+
elif isinstance(response.json(), dict) and "generated_text" in response.json():
|
51 |
+
return response.json()["generated_text"]
|
52 |
+
else:
|
53 |
+
return str(response.json())
|
54 |
|
55 |
# Title and description in main area
|
56 |
try:
|
57 |
# Try to load the image from assets folder
|
58 |
st.markdown("""
|
59 |
+
# <img src="data:image/png;base64,{}" width="50" style="vertical-align: -12px;"> Vision OCR
|
60 |
""".format(base64.b64encode(open("./assets/gemma3.png", "rb").read()).decode()), unsafe_allow_html=True)
|
61 |
except FileNotFoundError:
|
62 |
# Fallback if image doesn't exist
|
63 |
+
st.title("Vision OCR")
|
64 |
|
65 |
# Add clear button to top right
|
66 |
col1, col2 = st.columns([6,1])
|
|
|
70 |
del st.session_state['ocr_result']
|
71 |
st.rerun()
|
72 |
|
73 |
+
st.markdown('<p style="margin-top: -20px;">Extract structured text from images using advanced vision models!</p>', unsafe_allow_html=True)
|
74 |
st.markdown("---")
|
75 |
|
76 |
+
# Add model selection
|
77 |
with st.sidebar:
|
78 |
+
st.header("Settings")
|
79 |
+
model_option = st.selectbox(
|
80 |
+
"Select Vision Model",
|
81 |
+
["LLaVA 1.5 (7B)", "CLIP-ViT", "BLIP-2"],
|
82 |
+
index=0
|
83 |
+
)
|
84 |
+
|
85 |
+
# Map selection to model ID
|
86 |
+
model_mapping = {
|
87 |
+
"LLaVA 1.5 (7B)": "llava-hf/llava-1.5-7b-hf",
|
88 |
+
"CLIP-ViT": "openai/clip-vit-base-patch32",
|
89 |
+
"BLIP-2": "Salesforce/blip2-opt-2.7b"
|
90 |
+
}
|
91 |
+
|
92 |
+
selected_model = model_mapping[model_option]
|
93 |
+
|
94 |
st.header("Upload Image")
|
95 |
uploaded_file = st.file_uploader("Choose an image...", type=['png', 'jpg', 'jpeg'])
|
96 |
|
|
|
104 |
st.error("Hugging Face API key is missing. Please set it as an environment variable or in Streamlit secrets.")
|
105 |
else:
|
106 |
if st.button("Extract Text π", type="primary"):
|
107 |
+
with st.spinner(f"Processing image with {model_option}..."):
|
108 |
try:
|
109 |
+
# Update the model URL
|
110 |
+
API_URL = f"https://api-inference.huggingface.co/models/{selected_model}"
|
111 |
+
|
112 |
# Get image bytes
|
113 |
img_bytes = uploaded_file.getvalue()
|
114 |
|
|
|
117 |
st.session_state['ocr_result'] = result
|
118 |
except Exception as e:
|
119 |
st.error(f"Error processing image: {str(e)}")
|
120 |
+
st.info("Try selecting a different model from the dropdown.")
|
121 |
|
122 |
# Main content area for results
|
123 |
if 'ocr_result' in st.session_state:
|
|
|
127 |
|
128 |
# Footer
|
129 |
st.markdown("---")
|
130 |
+
st.markdown("Made with β€οΈ using Hugging Face Vision Models | [Report an Issue](https://github.com/bulentsoykan/streamlit-OCR-app/issues)")
|