Spaces:
Sleeping
Sleeping
File size: 6,565 Bytes
bd07176 d2e6583 bd07176 d2e6583 4880a2a bd07176 d2e6583 bd07176 d2e6583 bd07176 d2e6583 bd07176 d2e6583 bd07176 d2e6583 bd07176 d2e6583 bd07176 d2e6583 bd07176 d2e6583 e6bd973 d2e6583 bd07176 d2e6583 bd07176 d2e6583 bd07176 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import os
import base64
import requests
import streamlit as st
import json
if "stream" not in st.session_state:
st.session_state.stream = True
api_key = os.getenv("NVIDIA_VISION_API_KEY")
MODEL_ID = "meta/llama-3.2-90b-vision-instruct"
invoke_url = "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-90b-vision-instruct/chat/completions"
# Function to encode the image
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def extract_content(chunk):
try:
decoded_chunk = chunk.decode('utf-8')
json_data = decoded_chunk.split('data: ')[1]
parsed_data = json.loads(json_data)
content = parsed_data['choices'][0]['delta']['content']
return content
except json.JSONDecodeError as e:
#ignore the error
return ""
def main():
st.title("Multimodal Image Analysis with " + MODEL_ID)
text = """Prof. Louie F. Cervantes, M. Eng. (Information Engineering)
CCS 229 - Intelligent Systems
Department of Computer Science
College of Information and Communications Technology
West Visayas State University
"""
with st.expander("About"):
st.text(text)
st.write("Upload an image and select the image analysis task.")
# File upload for image
uploaded_image = st.file_uploader("Upload an Image", type=["jpg", "jpeg", "png"])
if uploaded_image is not None:
# Encode the uploaded image to base64
base64_image = base64.b64encode(uploaded_image.getvalue()).decode('utf-8')
# Display the uploaded image
st.image(uploaded_image, caption="Uploaded Image", use_container_width=True)
# List of image analysis tasks
analysis_tasks = [
"Scene Analysis: Describe the scene depicted in the image. Identify the objects present, their spatial relationships, and any actions taking place.",
"Object Detection and Classification: Identify and classify all objects present in the image. Provide detailed descriptions of each object, including its size, shape, color, and texture.",
"Image Captioning: Generate a concise and accurate caption that describes the content of the image.",
"Visual Question Answering: Answer specific questions about the image, such as 'What color is the car?' or 'How many people are in the image?'",
"Image Similarity Search: Given a query image, find similar images from a large dataset based on visual features.",
"Image Segmentation: Segment the image into different regions corresponding to objects or areas of interest.",
"Optical Character Recognition (OCR): Extract text from the image, such as printed or handwritten text.",
"Diagram Understanding: Analyze a diagram (e.g., flowchart, circuit diagram) and extract its structure and meaning.",
"Art Analysis: Describe the artistic style, subject matter, and emotional impact of an image.",
"Medical Image Analysis: Analyze medical images (e.g., X-rays, MRIs) to detect abnormalities or diagnose diseases."
]
# Task selection dropdown
selected_task = st.selectbox("Select an image analysis task:", analysis_tasks)
if st.button("Generate Response"):
st.session_state.stream = st.checkbox("Begin streaming the AI response as soon as it is available.", value=True)
stream = st.session_state.stream
if uploaded_image is None or selected_task == "":
st.error("Please upload an image and select a task.")
return
else:
headers = {
"Authorization": f"Bearer {api_key}",
"Accept": "text/event-stream" if stream else "application/json"
}
# Prepare the multimodal prompt
payload = {
"model": MODEL_ID,
"messages": [
{
"role": "user",
"content": f'{selected_task} <img src="data:image/png;base64,{base64_image}" />'
}
],
"max_tokens": 512,
"temperature": 1.00,
"top_p": 1.00,
"stream": stream
}
with st.spinner("Processing..."):
response = requests.post(
invoke_url,
headers=headers,
json=payload,
stream=stream # Important for streaming
)
if stream:
response_container = st.empty()
content = ""
# Efficiently handle streaming response
for chunk in response.iter_lines():
if len(chunk) > 0:
# Decode the bytes object into a string
chunk_str = chunk.decode('utf-8')
# Remove the "data: " prefix
if chunk_str.startswith("data: "):
chunk_str = chunk_str[6:]
if chunk_str.strip() == "[DONE]":
break
# Check if the string is not empty
if chunk_str.strip() != "":
try:
# Attempt to parse the string as JSON
chunk_dict = json.loads(chunk_str)
# Now you can access the 'choices' key
content += chunk_dict['choices'][0]['delta']['content']
response_container.markdown(content)
except json.JSONDecodeError as e:
# Handle the error if the string is not valid JSON
print(f"Error parsing JSON: {e}")
print(f"Invalid JSON string: {chunk_str}")
else:
try:
content = response.json()
content_string = content.get('choices', [{}])[0].get('message', {}).get('content', '')
st.write(f"AI Response: {content_string}")
st.success("Response generated!")
except Exception as e:
st.error(f"An error occurred: {e}")
if __name__ == "__main__":
main() |