File size: 1,617 Bytes
6ef8dec
 
 
 
 
 
 
 
 
 
 
49e7178
6ef8dec
 
 
 
 
 
 
 
 
 
 
 
 
 
c87f855
6ef8dec
c87f855
6ef8dec
c87f855
 
 
 
 
 
 
 
 
 
 
 
 
256df3b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import google.generativeai as genai
from PIL import Image
import os
import cv2

# Used to securely store your API key


import os  
import google.generativeai as genai  

os.environ["GOOGLE_API_KEY"] = "AIzaSyDGtjV-u_m3Szi_LTa2B1CHiFqmCZLWFDk"   
genai.configure()
# Configure the Gemini API
# Initialize the Gemini Vision Pro model
model = genai.GenerativeModel('gemini-1.5-flash')

def process_frame_with_gemini(frame, mode="image"):
    if mode == "image":
        # Convert OpenCV frame to PIL Image
        image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    elif mode == "audio":
        # For audio, we'll use a placeholder image and include the audio path in the prompt
        image = Image.new('RGB', (1, 1), color='white')
        audio_path = frame  # In this case, 'frame' is actually the audio file path

    # Process the image or audio with Gemini Vision Pro
    if mode == "image":
        response = model.generate_content(["Describe the content of this frame", image])
    else:
        response = model.generate_content(["Transcribe the audio file at this path", audio_path, image])
    
    return response.text

def summarize_with_gemini(text, max_words):
    text_model = genai.GenerativeModel('gemini-pro')
    response = text_model.generate_content(f"Summarize the following text in about {max_words} words: {text}")
    return response.text

def extract_code_with_gemini(text):
    text_model = genai.GenerativeModel('gemini-pro')
    response = text_model.generate_content(f"Extract and format any code snippets from the following text: {text}")
    return response.text