import base64 import requests import os import logging from dotenv import load_dotenv # Load environment variables load_dotenv() # Configure logging logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", handlers=[ logging.StreamHandler(), # Log to console logging.FileHandler("api_request_logs.log") # Log to a file ] ) # Get the API key from environment variable GROQ_API_KEY = "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" if not GROQ_API_KEY: raise ValueError("GROQ_API_KEY is not set in the .env file") def process_image_and_get_description(image_path, model="llama-3.2-90b-vision-preview", retries=3): """ Process the image using the Groq API and get a description. Retries in case of failure. Args: image_path (str): Path to the image. model (str): Model to use for processing. retries (int): Number of retries before giving up. Returns: str: Description of the image or an error message. """ encoded_image = image_path # # Encode the image to base64 # try: # with open(image_path, "rb") as image_file: # encoded_image = base64.b64encode(image_file.read()).decode("utf-8") # logging.info("Successfully encoded the image to base64.") # except Exception as e: # logging.error(f"Error encoding the image: {e}") # return "Error encoding the image." # Prepare the message payload messages = [ { "role": "user", "content": [ {"type": "text", "text": "Analyze the image to identify what is happening, describe the overall context, and perform OCR to extract any visible text. Additionally, specify whether the subject is a human, animal, or object, and provide a detailed description of any object the human is holding or their specific actions."}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}} ] } ] for attempt in range(1, retries + 1): try: logging.info(f"Attempt {attempt} to process the image with Groq API.") # Make the API request response = requests.post( "https://api.groq.com/openai/v1/chat/completions", json={ "model": model, "messages": messages, "max_tokens": 4096, "stop": None, "stream": False }, headers={ "Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json" }, timeout=30 ) # Process the response if response.status_code == 200: result = response.json() answer = result["choices"][0]["message"]["content"] logging.info("Successfully processed the image and received a response.") return answer else: logging.warning(f"Received error response: {response.status_code} - {response.text}") except requests.RequestException as e: logging.error(f"RequestException on attempt {attempt}: {e}") logging.error("All attempts to process the image failed.") return "Error: Unable to process the image after multiple attempts." # # Example usage # image_path = r"/content/temp.jpeg" # description = process_image_and_get_description(image_path) # print(description)