Spaces:

louiecerv
/

nvidia-image-multimodal

Running

App Files Files Community

louiecerv commited on Jan 9

Commit

d2e6583

1 Parent(s): bd07176

added error handilig

Browse files

Files changed (1) hide show

app.py +81 -39

app.py CHANGED Viewed

@@ -1,28 +1,35 @@
 import os
-import time
 import base64
 import requests
 import streamlit as st
-# Access the secret API key
-# if the app is running locally, you can set the API key as an environment variable
-api_key = os.getenv("NVIDIA_APP_KEY")
 # Function to encode the image
 def encode_image(image_path):
     with open(image_path, "rb") as image_file:
         return base64.b64encode(image_file.read()).decode('utf-8')
-# stream the response
-stream = True
-headers = {
-  "Authorization": f"Bearer {api_key}",
-  "Accept": "text/event-stream" if stream else "application/json"
-}
 def main():
-    st.title("Multimodal using GPT 4 Turbo Model")
     text = """Prof. Louie F. Cervantes, M. Eng. (Information Engineering)
     CCS 229 - Intelligent Systems
@@ -60,44 +67,79 @@ def main():
     # Task selection dropdown
     selected_task = st.selectbox("Select an image analysis task:", analysis_tasks)
-    # Button to generate response
     if st.button("Generate Response"):
         if uploaded_image is None or selected_task == "":
-            st.error("Please upload an image and sekect a task.")
         else:
             # Prepare the multimodal prompt
             payload = {
-            "model": 'meta/llama-3.2-90b-vision-instruct',
-            "messages": [
-                {
-                "role": "user",
-                "content": f'{selected_task} <img src="data:image/png;base64,{base64_image}" />'
-                }
-            ],
-            "max_tokens": 512,
-            "temperature": 1.00,
-            "top_p": 1.00,
-            "stream": stream
             }
             with st.spinner("Processing..."):
                 try:
-                    # Generate response
-                    response = requests.post("https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-90b-vision-instruct/chat/completions", headers=headers, json=payload)
-                    # Display the response if streaming
-                    if stream:
-                        for line in response.iter_lines():
-                            if line:
-                                st.write(line.decode("utf-8"))
-                    else:
-                        # Show the response content
                         content = response.json()
-                        contentstring = content['choices'][0]['message']['content']
-                        st.write(f"AI Response: {contentstring}")
-                    st.success("Response generated!")
-                except Exception as e:
-                    st.error(f"An error occurred: {e}")
 if __name__ == "__main__":
     main()

 import os
 import base64
 import requests
 import streamlit as st
+import json
+if "stream" not in st.session_state:
+    st.session_state.stream = True
+api_key = os.getenv("NVIDIA_VISION_KEY")
+MODEL_ID = "llama-3.2-11b-vision-instruct"
+invoke_url = "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-11b-vision-instruct/chat/completions"
 # Function to encode the image
 def encode_image(image_path):
     with open(image_path, "rb") as image_file:
         return base64.b64encode(image_file.read()).decode('utf-8')
+def extract_content(chunk):
+    try:
+        decoded_chunk = chunk.decode('utf-8')
+        json_data = decoded_chunk.split('data: ')[1]
+        parsed_data = json.loads(json_data)
+        content = parsed_data['choices'][0]['delta']['content']
+        return content
+    except json.JSONDecodeError as e:
+        #ignore the error
+        return ""
 def main():
+    st.title("Multimodal Image Analysis with " + MODEL_ID)
     text = """Prof. Louie F. Cervantes, M. Eng. (Information Engineering)
     CCS 229 - Intelligent Systems
     # Task selection dropdown
     selected_task = st.selectbox("Select an image analysis task:", analysis_tasks)
     if st.button("Generate Response"):
+        st.session_state.stream = st.checkbox("Begin streaming the AI response as soon as it is available.", value=True)
+        stream = st.session_state.stream
         if uploaded_image is None or selected_task == "":
+            st.error("Please upload an image and select a task.")
+            return
         else:
+            headers = {
+                "Authorization": f"Bearer {api_key}",
+                "Accept": "text/event-stream" if stream else "application/json"
+            }
             # Prepare the multimodal prompt
             payload = {
+                "model": MODEL_ID,
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": f'{selected_task} <img src="data:image/png;base64,{base64_image}" />'
+                    }
+                ],
+                "max_tokens": 512,
+                "temperature": 1.00,
+                "top_p": 1.00,
+                "stream": stream
             }
             with st.spinner("Processing..."):
+                response = requests.post(
+                    invoke_url,
+                    headers=headers,
+                    json=payload,
+                    stream=stream  # Important for streaming
+                )
+                #handle if the AI refused to connect
                 try:
+                    if (
+                        json.loads(response).get("type") == "about:blank"
+                        and json.loads(response).get("status") == 404):
+                        st.error("Resource not found. Please check the URL.")
+                        return
+                except json.JSONDecodeError as e:
+                        st.error("Resource not found. Please check the URL.")
+                        pass
+                if stream:
+                    print(f"response: {response.text}")
+                    response_container = st.empty()
+                    content = ""
+                    # Efficiently handle streaming response
+                    for chunk in response.iter_lines():
+                        if len(chunk) > 0:
+                            content += extract_content(chunk)
+                            response_container.markdown(content)
+                else:
+                    try:
                         content = response.json()
+                        content_string = content.get('choices', [{}])[0].get('message', {}).get('content', '')
+                        st.write(f"AI Response: {content_string}")
+                        st.success("Response generated!")
+                    except Exception as e:
+                        st.error(f"An error occurred: {e}")
 if __name__ == "__main__":
     main()