PRIYANSHUDHAKED commited on
Commit
6ef8dec
·
verified ·
1 Parent(s): 8251c15

Update gemini_vision.py

Browse files
Files changed (1) hide show
  1. gemini_vision.py +43 -0
gemini_vision.py CHANGED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import google.generativeai as genai
2
+ from PIL import Image
3
+ import os
4
+ import cv2
5
+
6
+ # Used to securely store your API key
7
+
8
+
9
+ import os
10
+ import google.generativeai as genai
11
+
12
+ os.environ["GOOGLE_API_KEY"] = "AIzaSyD0GxR2J1JxGic807Cc89Jq6MB4aDJYgDc"
13
+ genai.configure()
14
+ # Configure the Gemini API
15
+ # Initialize the Gemini Vision Pro model
16
+ model = genai.GenerativeModel('gemini-1.5-flash')
17
+
18
+ def process_frame_with_gemini(frame, mode="image"):
19
+ if mode == "image":
20
+ # Convert OpenCV frame to PIL Image
21
+ image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
22
+ elif mode == "audio":
23
+ # For audio, we'll use a placeholder image and include the audio path in the prompt
24
+ image = Image.new('RGB', (1, 1), color='white')
25
+ audio_path = frame # In this case, 'frame' is actually the audio file path
26
+
27
+ # Process the image or audio with Gemini Vision Pro
28
+ if mode == "image":
29
+ response = model.generate_content(["Describe the content of this frame", image])
30
+ else:
31
+ response = model.generate_content(["Transcribe the audio file at this path", audio_path, image])
32
+
33
+ return response.text
34
+
35
+ def summarize_with_gemini(text, max_words):
36
+ text_model = genai.GenerativeModel('gemini-pro')
37
+ response = text_model.generate_content(f"Summarize the following text in about {max_words} words: {text}")
38
+ return response.text
39
+
40
+ def extract_code_with_gemini(text):
41
+ text_model = genai.GenerativeModel('gemini-pro')
42
+ response = text_model.generate_content(f"Extract and format any code snippets from the following text: {text}")
43
+ return response.text