Spaces:

PRIYANSHUDHAKED
/

Intelligent_QA_with_VIDEO_System

Sleeping

Intelligent_QA_with_VIDEO_System / gemini_vision.py

Update gemini_vision.py

49e7178 verified 9 months ago

1.62 kB

	import google.generativeai as genai
	from PIL import Image
	import os
	import cv2

	# Used to securely store your API key


	import os
	import google.generativeai as genai

	os.environ["GOOGLE_API_KEY"] = "AIzaSyDGtjV-u_m3Szi_LTa2B1CHiFqmCZLWFDk"
	genai.configure()
	# Configure the Gemini API
	# Initialize the Gemini Vision Pro model
	model = genai.GenerativeModel('gemini-1.5-flash')

	def process_frame_with_gemini(frame, mode="image"):
	if mode == "image":
	# Convert OpenCV frame to PIL Image
	image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
	elif mode == "audio":
	# For audio, we'll use a placeholder image and include the audio path in the prompt
	image = Image.new('RGB', (1, 1), color='white')
	audio_path = frame # In this case, 'frame' is actually the audio file path

	# Process the image or audio with Gemini Vision Pro
	if mode == "image":
	response = model.generate_content(["Describe the content of this frame", image])
	else:
	response = model.generate_content(["Transcribe the audio file at this path", audio_path, image])

	return response.text

	def summarize_with_gemini(text, max_words):
	text_model = genai.GenerativeModel('gemini-pro')
	response = text_model.generate_content(f"Summarize the following text in about {max_words} words: {text}")
	return response.text

	def extract_code_with_gemini(text):
	text_model = genai.GenerativeModel('gemini-pro')
	response = text_model.generate_content(f"Extract and format any code snippets from the following text: {text}")
	return response.text