ProfessorLeVesseur commited on
Commit
a293fc1
·
verified ·
1 Parent(s): 4551af5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -10
app.py CHANGED
@@ -2,9 +2,9 @@ import streamlit as st
2
  import base64
3
  from huggingface_hub import InferenceClient
4
 
5
- # Function to encode the image to base64
6
- def encode_image(image_file):
7
- return base64.b64encode(image_file.getvalue()).decode("utf-8")
8
 
9
  # Streamlit page setup
10
  st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered", initial_sidebar_state="auto")
@@ -63,8 +63,8 @@ complex_image_prompt_text = (
63
  if uploaded_file is not None and analyze_button:
64
 
65
  with st.spinner("Analyzing the image ..."):
66
- # Encode the image
67
- base64_image = encode_image(uploaded_file)
68
 
69
  # Determine which prompt to use based on the complexity of the image
70
  if complex_image:
@@ -89,10 +89,10 @@ if uploaded_file is not None and analyze_button:
89
  "content": [
90
  {"type": "text", "text": prompt_text},
91
  {
92
- "type": "image_url",
93
- "image_url": {
94
- # Since we have the image in base64, we need to use a data URL
95
- "url": f"data:image/jpeg;base64,{base64_image}"
96
  },
97
  },
98
  ],
@@ -105,7 +105,7 @@ if uploaded_file is not None and analyze_button:
105
  completion = client.chat.completions.create(
106
  model="meta-llama/Llama-3.2-11B-Vision-Instruct",
107
  messages=messages,
108
- max_tokens=500 # Adjusted to match the template
109
  )
110
 
111
  # Extract the assistant's response
 
2
  import base64
3
  from huggingface_hub import InferenceClient
4
 
5
+ # Function to read the image file as bytes
6
+ def get_image_bytes(image_file):
7
+ return image_file.read()
8
 
9
  # Streamlit page setup
10
  st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered", initial_sidebar_state="auto")
 
63
  if uploaded_file is not None and analyze_button:
64
 
65
  with st.spinner("Analyzing the image ..."):
66
+ # Read the image bytes
67
+ image_bytes = get_image_bytes(uploaded_file)
68
 
69
  # Determine which prompt to use based on the complexity of the image
70
  if complex_image:
 
89
  "content": [
90
  {"type": "text", "text": prompt_text},
91
  {
92
+ "type": "image",
93
+ "image": {
94
+ # Provide the image bytes directly
95
+ "bytes": image_bytes
96
  },
97
  },
98
  ],
 
105
  completion = client.chat.completions.create(
106
  model="meta-llama/Llama-3.2-11B-Vision-Instruct",
107
  messages=messages,
108
+ max_tokens=500
109
  )
110
 
111
  # Extract the assistant's response