Spaces:

NEXAS
/

Titan

Running

App Files Files Community

NEXAS commited on Jan 7

Commit

03d82bf

verified ·

1 Parent(s): 1f61ea3

Upload 9 files

Browse files

Files changed (9) hide show

initate.py +10 -0
user.py +250 -0
utils/doi.py +97 -0
utils/ingest_image.py +51 -0
utils/ingest_image2.py +50 -0
utils/ingest_text.py +106 -0
utils/ingest_video.py +122 -0
utils/llm_ag.py +60 -0
video/temp.py +0 -0

initate.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from utils.ingest_image import extract_and_store_images
+from utils.ingest_text import create_vector_database
+from utils.ingest_video import intiate_video
+def process_pdf(pdf_path):
+    image_collection = extract_and_store_images(pdf_path)
+    text_collection = create_vector_database(pdf_path)
+    video_collection = intiate_video()
+    return image_collection, text_collection, video_collection

user.py ADDED Viewed

	@@ -0,0 +1,250 @@

+import os
+import streamlit as st
+from PIL import Image as PILImage
+from PIL import Image as pilImage
+import base64
+import io
+import chromadb
+from initate import process_pdf
+from utils.llm_ag import intiate_convo
+from utils.doi import process_image_and_get_description
+path = "mm_vdb2"
+client = chromadb.PersistentClient(path=path)
+import streamlit as st
+from PIL import Image as PILImage
+def display_images(image_collection, query_text, max_distance=None, debug=False):
+    """
+    Display images in a Streamlit app based on a query.
+    Args:
+        image_collection: The image collection object for querying.
+        query_text (str): The text query for images.
+        max_distance (float, optional): Maximum allowable distance for filtering.
+        debug (bool, optional): Whether to print debug information.
+    """
+    results = image_collection.query(
+        query_texts=[query_text],
+        n_results=10,
+        include=['uris', 'distances']
+    )
+    uris = results['uris'][0]
+    distances = results['distances'][0]
+    # Combine uris and distances, then sort by URI in ascending order
+    sorted_results = sorted(zip(uris, distances), key=lambda x: x[0])
+    # Filter and display images
+    for uri, distance in sorted_results:
+        if max_distance is None or distance <= max_distance:
+            if debug:
+                st.write(f"URI: {uri} - Distance: {distance}")
+            try:
+                img = PILImage.open(uri)
+                st.image(img, width=300)
+            except Exception as e:
+                st.error(f"Error loading image {uri}: {e}")
+        else:
+            if debug:
+                st.write(f"URI: {uri} - Distance: {distance} (Filtered out)")
+def display_videos_streamlit(video_collection, query_text, max_distance=None, max_results=5, debug=False):
+    """
+    Display videos in a Streamlit app based on a query.
+    Args:
+        video_collection: The video collection object for querying.
+        query_text (str): The text query for videos.
+        max_distance (float, optional): Maximum allowable distance for filtering.
+        max_results (int, optional): Maximum number of results to display.
+        debug (bool, optional): Whether to print debug information.
+    """
+    # Deduplication set
+    displayed_videos = set()
+    # Query the video collection with the specified text
+    results = video_collection.query(
+        query_texts=[query_text],
+        n_results=max_results,  # Adjust the number of results if needed
+        include=['uris', 'distances', 'metadatas']
+    )
+    # Extract URIs, distances, and metadatas from the result
+    uris = results['uris'][0]
+    distances = results['distances'][0]
+    metadatas = results['metadatas'][0]
+    # Display the videos that meet the distance criteria
+    for uri, distance, metadata in zip(uris, distances, metadatas):
+        video_uri = metadata['video_uri']
+        # Check if a max_distance filter is applied and the distance is within the allowed range
+        if (max_distance is None or distance <= max_distance) and video_uri not in displayed_videos:
+            if debug:
+                st.write(f"URI: {uri} - Video URI: {video_uri} - Distance: {distance}")
+            st.video(video_uri)  # Display video in Streamlit
+            displayed_videos.add(video_uri)  # Add to the set to prevent duplication
+        else:
+            if debug:
+                st.write(f"URI: {uri} - Video URI: {video_uri} - Distance: {distance} (Filtered out)")
+def image_uris(image_collection,query_text, max_distance=None, max_results=5):
+    results = image_collection.query(
+        query_texts=[query_text],
+        n_results=max_results,
+        include=['uris', 'distances']
+    )
+    filtered_uris = []
+    for uri, distance in zip(results['uris'][0], results['distances'][0]):
+        if max_distance is None or distance <= max_distance:
+            filtered_uris.append(uri)
+    return filtered_uris
+def text_uris(text_collection,query_text, max_distance=None, max_results=5):
+    results = text_collection.query(
+        query_texts=[query_text],
+        n_results=max_results,
+        include=['documents', 'distances']
+    )
+    filtered_texts = []
+    for doc, distance in zip(results['documents'][0], results['distances'][0]):
+        if max_distance is None or distance <= max_distance:
+            filtered_texts.append(doc)
+    return filtered_texts
+def frame_uris(video_collection,query_text, max_distance=None, max_results=5):
+    results = video_collection.query(
+        query_texts=[query_text],
+        n_results=max_results,
+        include=['uris', 'distances']
+    )
+    filtered_uris = []
+    seen_folders = set()
+    for uri, distance in zip(results['uris'][0], results['distances'][0]):
+        if max_distance is None or distance <= max_distance:
+            folder = os.path.dirname(uri)
+            if folder not in seen_folders:
+                filtered_uris.append(uri)
+                seen_folders.add(folder)
+        if len(filtered_uris) == max_results:
+            break
+    return filtered_uris
+def image_uris2(image_collection2,query_text, max_distance=None, max_results=5):
+    results = image_collection2.query(
+        query_texts=[query_text],
+        n_results=max_results,
+        include=['uris', 'distances']
+    )
+    filtered_uris = []
+    for uri, distance in zip(results['uris'][0], results['distances'][0]):
+        if max_distance is None or distance <= max_distance:
+            filtered_uris.append(uri)
+    return filtered_uris
+def format_prompt_inputs(image_collection, text_collection, video_collection, user_query):
+    frame_candidates = frame_uris(video_collection, user_query, max_distance=1.55)
+    image_candidates = image_uris(image_collection, user_query, max_distance=1.5)
+    texts = text_uris(text_collection, user_query, max_distance=1.3)
+    inputs = {"query": user_query, "texts": texts}
+    frame = frame_candidates[0] if frame_candidates else ""
+    inputs["frame"] = frame
+    if image_candidates:
+        image = image_candidates[0]
+        with PILImage.open(image) as img:
+            img = img.resize((img.width // 6, img.height // 6))
+            img = img.convert("L")
+            with io.BytesIO() as output:
+                img.save(output, format="JPEG", quality=60)
+                compressed_image_data = output.getvalue()
+        inputs["image_data_1"] = base64.b64encode(compressed_image_data).decode('utf-8')
+    else:
+        inputs["image_data_1"] = ""
+    return inputs
+def page_1():
+    st.title("Page 1: Upload and Process PDF")
+    uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
+    if uploaded_file:
+        pdf_path = f"/tmp/{uploaded_file.name}"
+        with open(pdf_path, "wb") as f:
+            f.write(uploaded_file.getbuffer())
+        try:
+            image_collection, text_collection, video_collection = process_pdf(pdf_path)
+            st.session_state.image_collection = image_collection
+            st.session_state.text_collection = text_collection
+            st.session_state.video_collection = video_collection
+            st.success("PDF processed successfully! Collections saved to session state.")
+        except Exception as e:
+            st.error(f"Error processing PDF: {e}")
+def page_2():
+    st.title("Page 2: Query and Use Processed Collections")
+    if "image_collection" in st.session_state and "text_collection" in st.session_state and "video_collection" in st.session_state:
+        image_collection = st.session_state.image_collection
+        text_collection = st.session_state.text_collection
+        video_collection = st.session_state.video_collection
+        st.success("Collections loaded successfully.")
+        query = st.text_input("Enter your query", value="Example Query")
+        if query:
+            inputs = format_prompt_inputs(image_collection, text_collection, video_collection, query)
+            texts = inputs["texts"]
+            image_data_1 = inputs["image_data_1"]
+            if image_data_1:
+                image_data_1 = process_image_and_get_description(image_data_1)
+            response = intiate_convo(query, image_data_1, texts)
+            st.write("Response:", response)
+            st.markdown("### Images")
+            display_images(image_collection, query, max_distance=1.55, debug=True)
+            st.markdown("### Videos")
+            frame = inputs["frame"]
+            if frame:
+                video_path = f"StockVideos-CC0/{os.path.basename(frame).split('/')[0]}.mp4"
+                if os.path.exists(video_path):
+                    st.video(video_path)
+                else:
+                    st.write("No related videos found.")
+    else:
+        st.error("Collections not found in session state. Please process the PDF on Page 1.")
+# --- Navigation ---
+PAGES = {
+    "Upload and Process PDF": page_1,
+    "Query and Use Processed Collections": page_2
+}
+# Select page
+selected_page = st.sidebar.selectbox("Choose a page", options=list(PAGES.keys()))
+# Render selected page
+PAGES[selected_page]()

utils/doi.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import base64
+import requests
+import os
+import logging
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+    handlers=[
+        logging.StreamHandler(),  # Log to console
+        logging.FileHandler("api_request_logs.log")  # Log to a file
+    ]
+)
+# Get the API key from environment variable
+GROQ_API_KEY = "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"
+if not GROQ_API_KEY:
+    raise ValueError("GROQ_API_KEY is not set in the .env file")
+def process_image_and_get_description(image_path, model="llama-3.2-90b-vision-preview", retries=3):
+    """
+    Process the image using the Groq API and get a description.
+    Retries in case of failure.
+    Args:
+        image_path (str): Path to the image.
+        model (str): Model to use for processing.
+        retries (int): Number of retries before giving up.
+    Returns:
+        str: Description of the image or an error message.
+    """
+    encoded_image = image_path
+    # # Encode the image to base64
+    # try:
+    #     with open(image_path, "rb") as image_file:
+    #         encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
+    #     logging.info("Successfully encoded the image to base64.")
+    # except Exception as e:
+    #     logging.error(f"Error encoding the image: {e}")
+    #     return "Error encoding the image."
+    # Prepare the message payload
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Analyze the image to identify what is happening, describe the overall context, and perform OCR to extract any visible text. Additionally, specify whether the subject is a human, animal, or object, and provide a detailed description of any object the human is holding or their specific actions."},
+                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}}
+            ]
+        }
+    ]
+    for attempt in range(1, retries + 1):
+        try:
+            logging.info(f"Attempt {attempt} to process the image with Groq API.")
+            # Make the API request
+            response = requests.post(
+                "https://api.groq.com/openai/v1/chat/completions",
+                json={
+                    "model": model,
+                    "messages": messages,
+                    "max_tokens": 4096,
+                    "stop": None,
+                    "stream": False
+                },
+                headers={
+                    "Authorization": f"Bearer {GROQ_API_KEY}",
+                    "Content-Type": "application/json"
+                },
+                timeout=30
+            )
+            # Process the response
+            if response.status_code == 200:
+                result = response.json()
+                answer = result["choices"][0]["message"]["content"]
+                logging.info("Successfully processed the image and received a response.")
+                return answer
+            else:
+                logging.warning(f"Received error response: {response.status_code} - {response.text}")
+        except requests.RequestException as e:
+            logging.error(f"RequestException on attempt {attempt}: {e}")
+    logging.error("All attempts to process the image failed.")
+    return "Error: Unable to process the image after multiple attempts."
+# # Example usage
+# image_path = r"/content/temp.jpeg"
+# description = process_image_and_get_description(image_path)
+# print(description)

utils/ingest_image.py ADDED Viewed

	@@ -0,0 +1,51 @@

+#inget image
+import os
+import fitz
+import chromadb
+from chromadb.utils.data_loaders import ImageLoader
+from chromadb.utils.embedding_functions import OpenCLIPEmbeddingFunction # type: ignore
+path = "mm_vdb2"
+client = chromadb.PersistentClient(path=path)
+def extract_and_store_images(pdf_path,images_dir=r'extracted_images'):
+    # Step 1: Extract images from PDF
+    pdf_document = fitz.open(pdf_path)
+    os.makedirs(images_dir, exist_ok=True)
+    for page_num in range(len(pdf_document)):
+        page = pdf_document.load_page(page_num)
+        image_list = page.get_images(full=True)
+        for image_index, img in enumerate(image_list):
+            xref = img[0]
+            base_image = pdf_document.extract_image(xref)
+            image_bytes = base_image["image"]
+            image_ext = base_image["ext"]
+            image_filename = f"{images_dir}/page_{page_num+1}_img_{image_index+1}.{image_ext}"
+            with open(image_filename, "wb") as image_file:
+                image_file.write(image_bytes)
+            print(f"Saved: {image_filename}")
+    print("Image extraction complete.")
+    # Step 2: Add extracted images to ChromaDB
+    image_loader = ImageLoader()
+    CLIP = OpenCLIPEmbeddingFunction()
+    image_collection = client.get_or_create_collection(name="image", embedding_function=CLIP, data_loader=image_loader)
+    ids = []
+    uris = []
+    for i, filename in enumerate(sorted(os.listdir(images_dir))):
+        if filename.endswith('.jpeg') or filename.endswith('.png'):
+            file_path = os.path.join(images_dir, filename)
+            ids.append(str(i))
+            uris.append(file_path)
+    image_collection.add(ids=ids, uris=uris)
+    print("Images added to the database.")
+    # return image_vdb
+    return image_collection

utils/ingest_image2.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from pdf2image import convert_from_path
+import os
+#inget image
+import os
+import fitz
+import chromadb
+from chromadb.utils.data_loaders import ImageLoader
+from chromadb.utils.embedding_functions import OpenCLIPEmbeddingFunction # type: ignore
+path = "mm_vdb2"
+client = chromadb.PersistentClient(path=path)
+def extract_and_store_images2(pdf_path,images_dir=r'extracted_images2'):
+  output_dir = 'extracted_images2'  # Directory to save images
+  # Ensure the output directory exists
+  os.makedirs(output_dir, exist_ok=True)
+  # Convert PDF to a list of images (one per page)
+  pages = convert_from_path(pdf_path, 300)  # 300 dpi is a good resolution
+  # Save each page as an image (screenshot)
+  for i, page in enumerate(pages):
+      output_path = os.path.join(output_dir, f'page_{i + 1}.png')
+      page.save(output_path, 'PNG')
+      print(f"Saved: {output_path}")
+  print("Image extraction complete.")
+    # Step 2: Add extracted images to ChromaDB
+  image_loader = ImageLoader()
+  CLIP = OpenCLIPEmbeddingFunction()
+  image_collection2 = client.get_or_create_collection(name="image2", embedding_function=CLIP, data_loader=image_loader)
+  ids = []
+  uris = []
+  for i, filename in enumerate(sorted(os.listdir(images_dir))):
+      if filename.endswith('.jpeg') or filename.endswith('.png'):
+          file_path = os.path.join(images_dir, filename)
+          ids.append(str(i))
+          uris.append(file_path)
+  image_collection2.add(ids=ids, uris=uris)
+  print("Images added to the database.")
+    # return image_vdb
+  return image_collection2

utils/ingest_text.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import chromadb
+import os
+# Ingest Text
+from llama_parse import LlamaParse
+from langchain.document_loaders import PyMuPDFLoader
+from langchain.text_splitter import CharacterTextSplitter
+import os
+import pickle
+import nest_asyncio
+nest_asyncio.apply()
+path = "mm_vdb2"
+client = chromadb.PersistentClient(path=path)
+llamaparse_api_key = "llx-qXMliHH4UOphFaahO8HEqR5wOj1U6T7oxqC4DoLiik7UvKkJ"
+groq_api_key = "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"
+parsed_data_file = r"parsed_data.pkl"
+output_md = r"output.md"
+loki = r"data"
+# Define a function to load parsed data if available, or parse if not
+def load_or_parse_data(loc):
+    data_file = parsed_data_file
+    if os.path.exists(data_file):
+        # Load the parsed data from the file
+        with open(data_file, "rb") as f:
+            parsed_data = pickle.load(f)
+    else:
+        # Perform the parsing step and store the result in llama_parse_documents
+        parsingInstructiontest10k = """The provided document is an user guide or a manual.
+        It contains many images and tables.
+        Try to be precise while answering the questions"""
+        parser = LlamaParse(api_key=llamaparse_api_key, result_type="markdown", parsing_instruction=parsingInstructiontest10k)  # type: ignore
+        llama_parse_documents = parser.load_data(loc)
+        # Save the parsed data to a file
+        with open(data_file, "wb") as f:
+            pickle.dump(llama_parse_documents, f)
+        # Set the parsed data to the variable
+        parsed_data = llama_parse_documents
+    return parsed_data
+# Create vector database
+def create_vector_database(loc):
+    """
+    Creates a vector database using document loaders and embeddings.
+    This function loads urls,
+    splits the loaded documents into chunks, transforms them into embeddings using OllamaEmbeddings,
+    and finally persists the embeddings into a Chroma vector database.
+    """
+    # Call the function to either load or parse the data
+    data = loc
+    loader = PyMuPDFLoader(file_path=data)
+    docs = loader.load()  # This returns a list of pages/documents
+    print(f"Number of documents: {len(docs)}")
+    print("Vector DB started!")
+    # Initialize a list for document content and IDs
+    document_contents = []
+    ids = []
+    # Generate unique IDs for each document, with PDF page number first
+    for i, doc in enumerate(docs):
+        # Print metadata to understand its structure
+        print(f"Metadata for document {i+1}: {doc.metadata}")
+        # Try to extract the page number from metadata or use a default
+        page_num = doc.metadata.get('page_number', f'unknown_{i+1}')  # Use i+1 to ensure uniqueness
+        # Extract text from each page
+        page_content = doc.page_content  # Get the content of the page
+        # Split the content into chunks based on the text splitter
+        text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200)
+        doc_chunks = text_splitter.split_text(page_content)
+        # Add chunk contents and corresponding page-based IDs
+        for chunk_idx, chunk in enumerate(doc_chunks):
+            document_contents.append(chunk)  # Add the chunk content
+            ids.append(f"page_{page_num}_chunk_{i+1}_{chunk_idx+1}")  # Add a unique chunk ID
+    # Ensure the number of ids matches the number of documents (contents)
+    assert len(ids) == len(document_contents), "Mismatch between number of ids and document contents"
+    # Create or get the text collection
+    text_collection = client.get_or_create_collection(name="text_collection")
+    # Add documents and their embeddings to the collection
+    text_collection.add(
+        documents=document_contents,  # All the chunk-level content
+        ids=ids  # Matching IDs for each chunk content
+    )
+    print('Vector DB created successfully!')
+    return text_collection

utils/ingest_video.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import gdown
+import zipfile
+import os
+import chromadb
+from chromadb.utils.embedding_functions import OpenCLIPEmbeddingFunction
+from chromadb.utils.data_loaders import ImageLoader
+import cv2
+path = "mm_vdb2"
+client = chromadb.PersistentClient(path=path)
+image_loader = ImageLoader()
+CLIP = OpenCLIPEmbeddingFunction()
+video_collection = client.get_or_create_collection(
+    name='video_collection',
+    embedding_function=CLIP,
+    data_loader=image_loader
+)
+def unzip_file(zip_path, extract_to):
+    """
+    Unzips a zip file to the specified directory.
+    Args:
+        zip_path (str): Path to the zip file.
+        extract_to (str): Directory where the contents should be extracted.
+    """
+    try:
+        # Ensure the destination directory exists
+        os.makedirs(extract_to, exist_ok=True)
+        # Open the zip file
+        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+            # Extract all the contents
+            zip_ref.extractall(extract_to)
+        print(f"Successfully extracted {zip_path} to {extract_to}")
+    except Exception as e:
+        print(f"An error occurred: {e}")
+def extract_frames(video_folder, output_folder):
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+    for video_filename in os.listdir(video_folder):
+        if video_filename.endswith('.mp4'):
+            video_path = os.path.join(video_folder, video_filename)
+            video_capture = cv2.VideoCapture(video_path)
+            fps = video_capture.get(cv2.CAP_PROP_FPS)
+            frame_count = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
+            duration = frame_count / fps
+            output_subfolder = os.path.join(output_folder, os.path.splitext(video_filename)[0])
+            if not os.path.exists(output_subfolder):
+                os.makedirs(output_subfolder)
+            success, image = video_capture.read()
+            frame_number = 0
+            while success:
+                if frame_number == 0 or frame_number % int(fps * 5) == 0 or frame_number == frame_count - 1:
+                    frame_time = frame_number / fps
+                    output_frame_filename = os.path.join(output_subfolder, f'frame_{int(frame_time)}.jpg')
+                    cv2.imwrite(output_frame_filename, image)
+                success, image = video_capture.read()
+                frame_number += 1
+            video_capture.release()
+def add_frames_to_chromadb(video_dir, frames_dir):
+    # Dictionary to hold video titles and their corresponding frames
+    video_frames = {}
+    # Process each video and associate its frames
+    for video_file in os.listdir(video_dir):
+        if video_file.endswith('.mp4'):
+            video_title = video_file[:-4]
+            frame_folder = os.path.join(frames_dir, video_title)
+            if os.path.exists(frame_folder):
+                # List all jpg files in the folder
+                video_frames[video_title] = [f for f in os.listdir(frame_folder) if f.endswith('.jpg')]
+    # Prepare ids, uris and metadatas
+    ids = []
+    uris = []
+    metadatas = []
+    for video_title, frames in video_frames.items():
+        video_path = os.path.join(video_dir, f"{video_title}.mp4")
+        for frame in frames:
+            frame_id = f"{frame[:-4]}_{video_title}"
+            frame_path = os.path.join(frames_dir, video_title, frame)
+            ids.append(frame_id)
+            uris.append(frame_path)
+            metadatas.append({'video_uri': video_path})
+    video_collection.add(ids=ids, uris=uris, metadatas=metadatas)
+# Running it
+def intiate_video():
+    file_id = "1Fm8Cge1VM4w8fmE0cZfRKhIQV0UgBXzp"
+    output_file = r"video\StockVideos-CC01.zip"
+    gdown.download(f"https://drive.google.com/uc?id={file_id}", output_file, quiet=False)
+    print(f"File downloaded successfully: {output_file}")
+    # Example Usage
+    zip_file_path = r"video\StockVideos-CC01.zip"
+    destination_folder = r"video"
+    unzip_file(zip_file_path, destination_folder)
+    print("Unzipped")
+    video_folder_path = r'video\StockVideos-CC0'
+    output_folder_path = r'video\StockVideos-CC0-frames'
+    extract_frames(video_folder_path, output_folder_path)
+    add_frames_to_chromadb(video_folder_path, output_folder_path)
+    return video_collection

utils/llm_ag.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import requests
+import os
+from dotenv import load_dotenv
+load_dotenv()
+# Get the API key from environment variable
+GROQ_API_KEY = "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"
+if not GROQ_API_KEY:
+    raise ValueError("GROQ_API_KEY is not set in the .env file")
+def intiate_convo(user_query, image_description, additional_text, model="mixtral-8x7b-32768"):
+    # Prepare the message payload
+    messages = [
+        {
+            "role": "system",
+            "content": """You are a AI Assistant for training. Given an image description, additional context, and a user query, respond with a detailed long answer with steps, ,be polite.
+            IMPORTANT: When referring to the image, subtly acknowledge it by saying "as I see here" rather than explicitly mentioning "image" or "photo."
+            Your tone should be natural and conversational. Keep it detailed , engaging, and relevant to the query, using both the image description and the additional context as reference points."""
+        },
+        {
+            "role": "user",
+            "content": f"Image description: {image_description}. Additional context: {additional_text}. User query: {user_query}. Provide a detaile response like an ai assistant."
+        }
+    ]
+    # Make the API request
+    response = requests.post(
+        "https://api.groq.com/openai/v1/chat/completions",
+        json={
+            "model": model,
+            "messages": messages,
+            "max_tokens": 32768,
+            "stop": None,
+            "stream": False
+        },
+        headers={
+            "Authorization": f"Bearer {GROQ_API_KEY}",
+            "Content-Type": "application/json"
+        },
+        timeout=60
+    )
+    # Process the response
+    if response.status_code == 200:
+        result = response.json()
+        answer = result["choices"][0]["message"]["content"]
+        return answer
+    else:
+        return f"Error from LLM API: {response.status_code} - {response.text}"
+# # Example usage
+# # Define the inputs
+# user_query = "Can you tell me more about the person in this description?"
+# image_description = """The main subject of the image is a person with dark complexion, short black hair, and white-framed glasses, wearing a dark-colored shirt or jacket. They are looking directly at the camera with a subtle expression."""
+# additional_text = """This individual is a software engineer specializing in AI development. They are known for their expertise in computer vision and enjoy photography as a hobby."""
+# # Get the LLM response
+# response = intiate_convo(user_query, image_description, additional_text)
+# print(response)

video/temp.py ADDED Viewed

File without changes