Update gemini_vision.py
Browse files- gemini_vision.py +43 -0
gemini_vision.py
CHANGED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import google.generativeai as genai
|
2 |
+
from PIL import Image
|
3 |
+
import os
|
4 |
+
import cv2
|
5 |
+
|
6 |
+
# Used to securely store your API key
|
7 |
+
|
8 |
+
|
9 |
+
import os
|
10 |
+
import google.generativeai as genai
|
11 |
+
|
12 |
+
os.environ["GOOGLE_API_KEY"] = "AIzaSyD0GxR2J1JxGic807Cc89Jq6MB4aDJYgDc"
|
13 |
+
genai.configure()
|
14 |
+
# Configure the Gemini API
|
15 |
+
# Initialize the Gemini Vision Pro model
|
16 |
+
model = genai.GenerativeModel('gemini-1.5-flash')
|
17 |
+
|
18 |
+
def process_frame_with_gemini(frame, mode="image"):
|
19 |
+
if mode == "image":
|
20 |
+
# Convert OpenCV frame to PIL Image
|
21 |
+
image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
22 |
+
elif mode == "audio":
|
23 |
+
# For audio, we'll use a placeholder image and include the audio path in the prompt
|
24 |
+
image = Image.new('RGB', (1, 1), color='white')
|
25 |
+
audio_path = frame # In this case, 'frame' is actually the audio file path
|
26 |
+
|
27 |
+
# Process the image or audio with Gemini Vision Pro
|
28 |
+
if mode == "image":
|
29 |
+
response = model.generate_content(["Describe the content of this frame", image])
|
30 |
+
else:
|
31 |
+
response = model.generate_content(["Transcribe the audio file at this path", audio_path, image])
|
32 |
+
|
33 |
+
return response.text
|
34 |
+
|
35 |
+
def summarize_with_gemini(text, max_words):
|
36 |
+
text_model = genai.GenerativeModel('gemini-pro')
|
37 |
+
response = text_model.generate_content(f"Summarize the following text in about {max_words} words: {text}")
|
38 |
+
return response.text
|
39 |
+
|
40 |
+
def extract_code_with_gemini(text):
|
41 |
+
text_model = genai.GenerativeModel('gemini-pro')
|
42 |
+
response = text_model.generate_content(f"Extract and format any code snippets from the following text: {text}")
|
43 |
+
return response.text
|