Spaces:

mohan007
/

autism_exp_with_minicpm_o_2_6

Runtime error

App Files Files Community

mohan007 commited on Jan 29

Commit

841ff1d

1 Parent(s): 3f249f0

autism exp minicpm

Browse files

Files changed (13) hide show

README.md +8 -7
app.py +71 -0
images/bird.jpg +0 -0
images/cat.jpg +0 -0
images/dog.jpg +0 -0
requirements.txt +17 -0
src/__init__.py +0 -0
src/app/__init__.py +0 -0
src/app/model.py +66 -0
src/app/response.py +119 -0
src/config.py +8 -0
src/exception.py +50 -0
src/logger.py +21 -0

README.md CHANGED Viewed

@@ -1,13 +1,14 @@
 ---
-title: Autism Exp With Minicpm O 2 6
-emoji: 📈
-colorFrom: purple
-colorTo: blue
 sdk: gradio
-sdk_version: 5.13.2
 app_file: app.py
 pinned: false
-license: cc-by-nc-4.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: PicQ
+emoji: 🐢
+colorFrom: green
+colorTo: red
 sdk: gradio
+sdk_version: 5.11.0
 app_file: app.py
 pinned: false
+license: mit
+short_description: Demo for MiniCPM-V 2.6 to answer questions about images
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+# Importing the requirements
+# import warnings
+# warnings.filterwarnings("ignore")
+import gradio as gr
+from src.app.response import describe_image
+# Image, text query, and input parameters
+# image = gr.Image(type="pil", label="Image")
+image = gr.Video()
+question = gr.Textbox(label="prompts ", placeholder="Enter your prompts  here")
+temperature = gr.Slider(
+    minimum=0.01, maximum=1.99, step=0.01, value=0.7, label="Temperature"
+)
+top_p = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.8, label="Top P")
+top_k = gr.Slider(minimum=0, maximum=1000, step=1, value=100, label="Top K")
+max_new_tokens = gr.Slider(minimum=1, maximum=4096, step=1, value=512, label="Max Tokens")
+# Output for the interface
+answer = gr.Textbox(label="Predicted answer", show_label=True, show_copy_button=True)
+# Examples for the interface
+examples = [
+    [
+        "images/cat.jpg",
+        "How many cats are there?",
+        0.7,
+        0.8,
+        100,
+        512,
+    ],
+    [
+        "images/dog.jpg",
+        "¿De qué color es el perro?",
+        0.7,
+        0.8,
+        100,
+        512,
+    ],
+    [
+        "images/bird.jpg",
+        "Que fait l'oiseau ?",
+        0.7,
+        0.8,
+        100,
+        512,
+    ],
+]
+# Title, description, and article for the interface
+title = "Visual Question Answering"
+description = "Gradio Demo for the MiniCPM-V 2.6 Vision Language Understanding and Generation model. This model can answer questions about images in natural language. To use it, upload your image, type a question, select associated parameters, use the default values, click 'Submit', or click one of the examples to load them. You can read more at the links below."
+article = "<p style='text-align: center'><a href='https://github.com/OpenBMB/MiniCPM-V' target='_blank'>Model GitHub Repo</a> | <a href='https://huggingface.co/openbmb/MiniCPM-V-2_6' target='_blank'>Model Page</a></p>"
+# Launch the interface
+interface = gr.Interface(
+    fn=describe_image,
+    inputs=[image, question, temperature, top_p, top_k, max_new_tokens],
+    outputs=answer,
+    # examples=examples,
+    cache_examples=True,
+    cache_mode="lazy",
+    title=title,
+    description=description,
+    article=article,
+    theme="Glass",
+    flagging_mode="never",
+)
+interface.launch(debug=False)

images/bird.jpg ADDED Viewed

images/cat.jpg ADDED Viewed

images/dog.jpg ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+python-dotenv==1.0.1
+numpy==1.26.4
+Pillow==10.1.0
+torch==2.2.0
+torchaudio==2.2.0
+torchvision==0.17.0
+transformers==4.44.2
+sentencepiece==0.1.99
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.2/flash_attn-2.6.2+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
+decord
+librosa==0.9.0
+soundfile==0.12.1
+vector-quantize-pytorch==1.18.5
+vocos==0.1.0
+moviepy
+gradio
+accelerate

src/__init__.py ADDED Viewed

File without changes

src/app/__init__.py ADDED Viewed

File without changes

src/app/model.py ADDED Viewed

	@@ -0,0 +1,66 @@

+# Necessary imports
+import os
+import sys
+from dotenv import load_dotenv
+from typing import Any
+import torch
+from transformers import AutoModel, AutoTokenizer, AutoProcessor
+# Local imports
+from src.logger import logging
+from src.exception import CustomExceptionHandling
+# Load the Environment Variables from .env file
+load_dotenv()
+# Access token for using the model
+access_token = os.environ.get("ACCESS_TOKEN")
+def load_model_tokenizer_and_processor(model_name: str, device: str) -> Any:
+    """
+    Load the model, tokenizer and processor.
+    Args:
+        - model_name (str): The name of the model to load.
+        - device (str): The device to load the model onto.
+    Returns:
+        - model: The loaded model.
+        - tokenizer: The loaded tokenizer.
+        - processor: The loaded processor.
+    """
+    # try:
+        # Load the model, tokenizer and processor
+    model = AutoModel.from_pretrained(
+            model_name,
+            # device_map = 'auto',
+            trust_remote_code=True,
+            attn_implementation="sdpa",
+            torch_dtype=torch.bfloat16,
+            # init_vision=True,
+            # init_audio=False,
+            # init_tts=False,
+            # token=access_token
+        ).to('cuda')
+    # model = model
+    tokenizer = AutoTokenizer.from_pretrained(
+            model_name, trust_remote_code=True,
+            # token=access_token
+        )
+    processor = AutoProcessor.from_pretrained(
+            model_name, trust_remote_code=True,
+            # token=access_token
+        )
+        # Log the successful loading of the model, tokenizer and processor
+    logging.info("Model, tokenizer and processor loaded successfully.")
+        # Return the model, tokenizer and processor
+    return model, tokenizer, processor
+    # Handle exceptions that may occur during model, tokenizer and processor loading
+    # except Exception as e:
+        # Custom exception handling
+        # raise CustomExceptionHandling(e, sys) from e

src/app/response.py ADDED Viewed

	@@ -0,0 +1,119 @@

+# Necessary imports
+import sys
+import gradio as gr
+import spaces
+from decord import VideoReader, cpu
+from PIL import Image
+# Local imports
+from src.config import (
+    device,
+    model_name,
+    sampling,
+    stream,
+    repetition_penalty,
+)
+from src.app.model import load_model_tokenizer_and_processor
+from src.logger import logging
+from src.exception import CustomExceptionHandling
+# Model, tokenizer and processor
+# try :
+model, tokenizer, processor = load_model_tokenizer_and_processor(model_name, device)
+# except Exception as e:
+        # Custom exception handling
+        # raise CustomExceptionHandling(e, sys) from e
+MAX_NUM_FRAMES=64
+def encode_video(video_path):
+    def uniform_sample(l, n):
+        gap = len(l) / n
+        idxs = [int(i * gap + gap / 2) for i in range(n)]
+        return [l[i] for i in idxs]
+    vr = VideoReader(video_path, ctx=cpu(0))
+    sample_fps = round(vr.get_avg_fps() / 1)  # FPS
+    frame_idx = [i for i in range(0, len(vr), sample_fps)]
+    if len(frame_idx) > MAX_NUM_FRAMES:
+        frame_idx = uniform_sample(frame_idx, MAX_NUM_FRAMES)
+    frames = vr.get_batch(frame_idx).asnumpy()
+    frames = [Image.fromarray(v.astype('uint8')) for v in frames]
+    print('num frames:', len(frames))
+    return frames
+@spaces.GPU()
+def describe_image(
+    image: str,
+    question: str,
+    temperature: float,
+    top_p: float,
+    top_k: int,
+    max_new_tokens: int,
+) -> str:
+    """
+    Generates an answer to a given question based on the provided image and question.
+    Args:
+        - image (str): The path to the image file.
+        - question (str): The question text.
+        - temperature (float): The temperature parameter for the model.
+        - top_p (float): The top_p parameter for the model.
+        - top_k (int): The top_k parameter for the model.
+        - max_new_tokens (int): The max tokens to be generated by the model.
+    Returns:
+        str: The generated answer to the question.
+    """
+    try:
+        # Check if image or question is None
+        if not image or not question:
+            gr.Warning("Please provide an image and a question.")
+        frames = encode_video(image)
+        msgs = [
+            {'role': 'user', 'content': frames + [question]},
+        ]
+        # Message format for the model
+        # msgs = [{"role": "user", "content": [image, question]}]
+        # Generate the answer
+        # answer = model.chat(
+        #     image=None,
+        #     msgs=msgs,
+        #     tokenizer=tokenizer,
+        #     processor=processor,
+        #     sampling=sampling,
+        #     stream=stream,
+        #     top_p=top_p,
+        #     top_k=top_k,
+        #     temperature=temperature,
+        #     repetition_penalty=repetition_penalty,
+        #     max_new_tokens=max_new_tokens,
+        # )
+        answer = model.chat(
+            msgs=msgs,
+            tokenizer=tokenizer,
+            processor=processor,
+            sampling=sampling,
+            stream=stream,
+            top_p=top_p,
+            top_k=top_k,
+            temperature=temperature,
+            repetition_penalty=repetition_penalty,
+            max_new_tokens=max_new_tokens
+        )
+        # Log the successful generation of the answer
+        logging.info("Answer generated successfully.")
+        logging.info("".join(answer))
+        # Return the answer
+        return "".join(answer)
+    # Handle exceptions that may occur during answer generation
+    except Exception as e:
+        # Custom exception handling
+        raise CustomExceptionHandling(e, sys) from e

src/config.py ADDED Viewed

	@@ -0,0 +1,8 @@

+# Model settings
+device = "cuda"
+model_name = "openbmb/MiniCPM-o-2_6"
+# Decoding settings
+sampling = True
+stream = True
+repetition_penalty = 1.05

src/exception.py ADDED Viewed

	@@ -0,0 +1,50 @@

+"""
+This module defines a custom exception handling class and a function to get error message with details of the error.
+"""
+# Standard Library
+import sys
+# Local imports
+from src.logger import logging
+# Function Definition to get error message with details of the error (file name and line number) when an error occurs in the program
+def get_error_message(error, error_detail: sys):
+    """
+    Get error message with details of the error.
+    Args:
+        - error (Exception): The error that occurred.
+        - error_detail (sys): The details of the error.
+    Returns:
+        str: A string containing the error message along with the file name and line number where the error occurred.
+    """
+    _, _, exc_tb = error_detail.exc_info()
+    # Get error details
+    file_name = exc_tb.tb_frame.f_code.co_filename
+    return "Error occured in python script name [{0}] line number [{1}] error message[{2}]".format(
+        file_name, exc_tb.tb_lineno, str(error)
+    )
+# Custom Exception Handling Class Definition
+class CustomExceptionHandling(Exception):
+    """
+    Custom Exception Handling:
+        This class defines a custom exception that can be raised when an error occurs in the program.
+        It takes an error message and an error detail as input and returns a formatted error message when the exception is raised.
+    """
+    # Constructor
+    def __init__(self, error_message, error_detail: sys):
+        """Initialize the exception"""
+        super().__init__(error_message)
+        self.error_message = get_error_message(error_message, error_detail=error_detail)
+    def __str__(self):
+        """String representation of the exception"""
+        return self.error_message

src/logger.py ADDED Viewed

	@@ -0,0 +1,21 @@

+# Importing the required modules
+import os
+import logging
+from datetime import datetime
+# Creating a log file with the current date and time as the name of the file
+LOG_FILE = f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log"
+# Creating a logs folder if it does not exist
+logs_path = os.path.join(os.getcwd(), "logs", LOG_FILE)
+os.makedirs(logs_path, exist_ok=True)
+# Setting the log file path and the log level
+LOG_FILE_PATH = os.path.join(logs_path, LOG_FILE)
+# Configuring the logger
+logging.basicConfig(
+    filename=LOG_FILE_PATH,
+    format="[ %(asctime)s ] %(lineno)d %(name)s - %(levelname)s - %(message)s",
+    level=logging.INFO,
+)