louiecerv commited on
Commit
bd07176
·
1 Parent(s): 16951c5

save changes

Browse files
Files changed (2) hide show
  1. app.py +103 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import base64
4
+ import requests
5
+ import streamlit as st
6
+
7
+ # Access the secret API key
8
+ # if the app is running locally, you can set the API key as an environment variable
9
+ api_key = os.getenv("NVIDIA_APP_KEY")
10
+
11
+ # Function to encode the image
12
+ def encode_image(image_path):
13
+ with open(image_path, "rb") as image_file:
14
+ return base64.b64encode(image_file.read()).decode('utf-8')
15
+
16
+ # stream the response
17
+ stream = True
18
+
19
+ headers = {
20
+ "Authorization": f"Bearer {api_key}",
21
+ "Accept": "text/event-stream" if stream else "application/json"
22
+ }
23
+
24
+ def main():
25
+ st.title("Multimodal using GPT 4 Turbo Model")
26
+
27
+ text = """Prof. Louie F. Cervantes, M. Eng. (Information Engineering)
28
+ CCS 229 - Intelligent Systems
29
+ Department of Computer Science
30
+ College of Information and Communications Technology
31
+ West Visayas State University
32
+ """
33
+ with st.expander("About"):
34
+ st.text(text)
35
+
36
+ st.write("Upload an image and select the image analysis task.")
37
+
38
+ # File upload for image
39
+ uploaded_image = st.file_uploader("Upload an Image", type=["jpg", "jpeg", "png"])
40
+ if uploaded_image is not None:
41
+ # Encode the uploaded image to base64
42
+ base64_image = base64.b64encode(uploaded_image.getvalue()).decode('utf-8')
43
+
44
+ # Display the uploaded image
45
+ st.image(uploaded_image, caption="Uploaded Image", use_container_width=True)
46
+
47
+ # List of image analysis tasks
48
+ analysis_tasks = [
49
+ "Scene Analysis: Describe the scene depicted in the image. Identify the objects present, their spatial relationships, and any actions taking place.",
50
+ "Object Detection and Classification: Identify and classify all objects present in the image. Provide detailed descriptions of each object, including its size, shape, color, and texture.",
51
+ "Image Captioning: Generate a concise and accurate caption that describes the content of the image.",
52
+ "Visual Question Answering: Answer specific questions about the image, such as 'What color is the car?' or 'How many people are in the image?'",
53
+ "Image Similarity Search: Given a query image, find similar images from a large dataset based on visual features.",
54
+ "Image Segmentation: Segment the image into different regions corresponding to objects or areas of interest.",
55
+ "Optical Character Recognition (OCR): Extract text from the image, such as printed or handwritten text.",
56
+ "Diagram Understanding: Analyze a diagram (e.g., flowchart, circuit diagram) and extract its structure and meaning.",
57
+ "Art Analysis: Describe the artistic style, subject matter, and emotional impact of an image.",
58
+ "Medical Image Analysis: Analyze medical images (e.g., X-rays, MRIs) to detect abnormalities or diagnose diseases."
59
+ ]
60
+
61
+ # Task selection dropdown
62
+ selected_task = st.selectbox("Select an image analysis task:", analysis_tasks)
63
+
64
+ # Button to generate response
65
+ if st.button("Generate Response"):
66
+ if uploaded_image is None or selected_task == "":
67
+ st.error("Please upload an image and sekect a task.")
68
+ else:
69
+ # Prepare the multimodal prompt
70
+ payload = {
71
+ "model": 'meta/llama-3.2-90b-vision-instruct',
72
+ "messages": [
73
+ {
74
+ "role": "user",
75
+ "content": f'{selected_task} <img src="data:image/png;base64,{base64_image}" />'
76
+ }
77
+ ],
78
+ "max_tokens": 512,
79
+ "temperature": 1.00,
80
+ "top_p": 1.00,
81
+ "stream": stream
82
+ }
83
+
84
+ with st.spinner("Processing..."):
85
+ try:
86
+ # Generate response
87
+ response = requests.post("https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-90b-vision-instruct/chat/completions", headers=headers, json=payload)
88
+ # Display the response if streaming
89
+ if stream:
90
+ for line in response.iter_lines():
91
+ if line:
92
+ st.write(line.decode("utf-8"))
93
+ else:
94
+ # Show the response content
95
+ content = response.json()
96
+ contentstring = content['choices'][0]['message']['content']
97
+ st.write(f"AI Response: {contentstring}")
98
+ st.success("Response generated!")
99
+ except Exception as e:
100
+ st.error(f"An error occurred: {e}")
101
+
102
+ if __name__ == "__main__":
103
+ main()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ streamlit
2
+ openai