Spaces:

ProfessorLeVesseur
/

VisionTexts

Sleeping

App Files Files Community

ProfessorLeVesseur commited on Nov 20, 2024

Commit

a293fc1

verified ·

1 Parent(s): 4551af5

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -10

app.py CHANGED Viewed

@@ -2,9 +2,9 @@ import streamlit as st
 import base64
 from huggingface_hub import InferenceClient
-# Function to encode the image to base64
-def encode_image(image_file):
-    return base64.b64encode(image_file.getvalue()).decode("utf-8")
 # Streamlit page setup
 st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered", initial_sidebar_state="auto")
@@ -63,8 +63,8 @@ complex_image_prompt_text = (
 if uploaded_file is not None and analyze_button:
     with st.spinner("Analyzing the image ..."):
-        # Encode the image
-        base64_image = encode_image(uploaded_file)
         # Determine which prompt to use based on the complexity of the image
         if complex_image:
@@ -89,10 +89,10 @@ if uploaded_file is not None and analyze_button:
                 "content": [
                     {"type": "text", "text": prompt_text},
                     {
-                        "type": "image_url",
-                        "image_url": {
-                            # Since we have the image in base64, we need to use a data URL
-                            "url": f"data:image/jpeg;base64,{base64_image}"
                         },
                     },
                 ],
@@ -105,7 +105,7 @@ if uploaded_file is not None and analyze_button:
             completion = client.chat.completions.create(
                 model="meta-llama/Llama-3.2-11B-Vision-Instruct",
                 messages=messages,
-                max_tokens=500  # Adjusted to match the template
             )
             # Extract the assistant's response

 import base64
 from huggingface_hub import InferenceClient
+# Function to read the image file as bytes
+def get_image_bytes(image_file):
+    return image_file.read()
 # Streamlit page setup
 st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered", initial_sidebar_state="auto")
 if uploaded_file is not None and analyze_button:
     with st.spinner("Analyzing the image ..."):
+        # Read the image bytes
+        image_bytes = get_image_bytes(uploaded_file)
         # Determine which prompt to use based on the complexity of the image
         if complex_image:
                 "content": [
                     {"type": "text", "text": prompt_text},
                     {
+                        "type": "image",
+                        "image": {
+                            # Provide the image bytes directly
+                            "bytes": image_bytes
                         },
                     },
                 ],
             completion = client.chat.completions.create(
                 model="meta-llama/Llama-3.2-11B-Vision-Instruct",
                 messages=messages,
+                max_tokens=500
             )
             # Extract the assistant's response