PRIYANSHUDHAKED's picture
Update gemini_vision.py
49e7178 verified
import google.generativeai as genai
from PIL import Image
import os
import cv2
# Used to securely store your API key
import os
import google.generativeai as genai
os.environ["GOOGLE_API_KEY"] = "AIzaSyDGtjV-u_m3Szi_LTa2B1CHiFqmCZLWFDk"
genai.configure()
# Configure the Gemini API
# Initialize the Gemini Vision Pro model
model = genai.GenerativeModel('gemini-1.5-flash')
def process_frame_with_gemini(frame, mode="image"):
if mode == "image":
# Convert OpenCV frame to PIL Image
image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
elif mode == "audio":
# For audio, we'll use a placeholder image and include the audio path in the prompt
image = Image.new('RGB', (1, 1), color='white')
audio_path = frame # In this case, 'frame' is actually the audio file path
# Process the image or audio with Gemini Vision Pro
if mode == "image":
response = model.generate_content(["Describe the content of this frame", image])
else:
response = model.generate_content(["Transcribe the audio file at this path", audio_path, image])
return response.text
def summarize_with_gemini(text, max_words):
text_model = genai.GenerativeModel('gemini-pro')
response = text_model.generate_content(f"Summarize the following text in about {max_words} words: {text}")
return response.text
def extract_code_with_gemini(text):
text_model = genai.GenerativeModel('gemini-pro')
response = text_model.generate_content(f"Extract and format any code snippets from the following text: {text}")
return response.text