mohan007 commited on
Commit
841ff1d
Β·
1 Parent(s): 3f249f0

autism exp minicpm

Browse files
README.md CHANGED
@@ -1,13 +1,14 @@
1
  ---
2
- title: Autism Exp With Minicpm O 2 6
3
- emoji: πŸ“ˆ
4
- colorFrom: purple
5
- colorTo: blue
6
  sdk: gradio
7
- sdk_version: 5.13.2
8
  app_file: app.py
9
  pinned: false
10
- license: cc-by-nc-4.0
 
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: PicQ
3
+ emoji: 🐒
4
+ colorFrom: green
5
+ colorTo: red
6
  sdk: gradio
7
+ sdk_version: 5.11.0
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
+ short_description: Demo for MiniCPM-V 2.6 to answer questions about images
12
  ---
13
 
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Importing the requirements
2
+ # import warnings
3
+ # warnings.filterwarnings("ignore")
4
+
5
+ import gradio as gr
6
+ from src.app.response import describe_image
7
+
8
+
9
+ # Image, text query, and input parameters
10
+ # image = gr.Image(type="pil", label="Image")
11
+ image = gr.Video()
12
+ question = gr.Textbox(label="prompts ", placeholder="Enter your prompts here")
13
+ temperature = gr.Slider(
14
+ minimum=0.01, maximum=1.99, step=0.01, value=0.7, label="Temperature"
15
+ )
16
+ top_p = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.8, label="Top P")
17
+ top_k = gr.Slider(minimum=0, maximum=1000, step=1, value=100, label="Top K")
18
+ max_new_tokens = gr.Slider(minimum=1, maximum=4096, step=1, value=512, label="Max Tokens")
19
+
20
+ # Output for the interface
21
+ answer = gr.Textbox(label="Predicted answer", show_label=True, show_copy_button=True)
22
+
23
+ # Examples for the interface
24
+ examples = [
25
+ [
26
+ "images/cat.jpg",
27
+ "How many cats are there?",
28
+ 0.7,
29
+ 0.8,
30
+ 100,
31
+ 512,
32
+ ],
33
+ [
34
+ "images/dog.jpg",
35
+ "ΒΏDe quΓ© color es el perro?",
36
+ 0.7,
37
+ 0.8,
38
+ 100,
39
+ 512,
40
+ ],
41
+ [
42
+ "images/bird.jpg",
43
+ "Que fait l'oiseau ?",
44
+ 0.7,
45
+ 0.8,
46
+ 100,
47
+ 512,
48
+ ],
49
+ ]
50
+
51
+ # Title, description, and article for the interface
52
+ title = "Visual Question Answering"
53
+ description = "Gradio Demo for the MiniCPM-V 2.6 Vision Language Understanding and Generation model. This model can answer questions about images in natural language. To use it, upload your image, type a question, select associated parameters, use the default values, click 'Submit', or click one of the examples to load them. You can read more at the links below."
54
+ article = "<p style='text-align: center'><a href='https://github.com/OpenBMB/MiniCPM-V' target='_blank'>Model GitHub Repo</a> | <a href='https://huggingface.co/openbmb/MiniCPM-V-2_6' target='_blank'>Model Page</a></p>"
55
+
56
+
57
+ # Launch the interface
58
+ interface = gr.Interface(
59
+ fn=describe_image,
60
+ inputs=[image, question, temperature, top_p, top_k, max_new_tokens],
61
+ outputs=answer,
62
+ # examples=examples,
63
+ cache_examples=True,
64
+ cache_mode="lazy",
65
+ title=title,
66
+ description=description,
67
+ article=article,
68
+ theme="Glass",
69
+ flagging_mode="never",
70
+ )
71
+ interface.launch(debug=False)
images/bird.jpg ADDED
images/cat.jpg ADDED
images/dog.jpg ADDED
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python-dotenv==1.0.1
2
+ numpy==1.26.4
3
+ Pillow==10.1.0
4
+ torch==2.2.0
5
+ torchaudio==2.2.0
6
+ torchvision==0.17.0
7
+ transformers==4.44.2
8
+ sentencepiece==0.1.99
9
+ https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.2/flash_attn-2.6.2+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
10
+ decord
11
+ librosa==0.9.0
12
+ soundfile==0.12.1
13
+ vector-quantize-pytorch==1.18.5
14
+ vocos==0.1.0
15
+ moviepy
16
+ gradio
17
+ accelerate
src/__init__.py ADDED
File without changes
src/app/__init__.py ADDED
File without changes
src/app/model.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Necessary imports
2
+ import os
3
+ import sys
4
+ from dotenv import load_dotenv
5
+ from typing import Any
6
+ import torch
7
+ from transformers import AutoModel, AutoTokenizer, AutoProcessor
8
+
9
+ # Local imports
10
+ from src.logger import logging
11
+ from src.exception import CustomExceptionHandling
12
+
13
+
14
+ # Load the Environment Variables from .env file
15
+ load_dotenv()
16
+
17
+ # Access token for using the model
18
+ access_token = os.environ.get("ACCESS_TOKEN")
19
+
20
+
21
+ def load_model_tokenizer_and_processor(model_name: str, device: str) -> Any:
22
+ """
23
+ Load the model, tokenizer and processor.
24
+
25
+ Args:
26
+ - model_name (str): The name of the model to load.
27
+ - device (str): The device to load the model onto.
28
+
29
+ Returns:
30
+ - model: The loaded model.
31
+ - tokenizer: The loaded tokenizer.
32
+ - processor: The loaded processor.
33
+ """
34
+ # try:
35
+ # Load the model, tokenizer and processor
36
+ model = AutoModel.from_pretrained(
37
+ model_name,
38
+ # device_map = 'auto',
39
+ trust_remote_code=True,
40
+ attn_implementation="sdpa",
41
+ torch_dtype=torch.bfloat16,
42
+ # init_vision=True,
43
+ # init_audio=False,
44
+ # init_tts=False,
45
+ # token=access_token
46
+ ).to('cuda')
47
+ # model = model
48
+ tokenizer = AutoTokenizer.from_pretrained(
49
+ model_name, trust_remote_code=True,
50
+ # token=access_token
51
+ )
52
+ processor = AutoProcessor.from_pretrained(
53
+ model_name, trust_remote_code=True,
54
+ # token=access_token
55
+ )
56
+
57
+ # Log the successful loading of the model, tokenizer and processor
58
+ logging.info("Model, tokenizer and processor loaded successfully.")
59
+
60
+ # Return the model, tokenizer and processor
61
+ return model, tokenizer, processor
62
+
63
+ # Handle exceptions that may occur during model, tokenizer and processor loading
64
+ # except Exception as e:
65
+ # Custom exception handling
66
+ # raise CustomExceptionHandling(e, sys) from e
src/app/response.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Necessary imports
2
+ import sys
3
+ import gradio as gr
4
+ import spaces
5
+ from decord import VideoReader, cpu
6
+ from PIL import Image
7
+
8
+ # Local imports
9
+ from src.config import (
10
+ device,
11
+ model_name,
12
+ sampling,
13
+ stream,
14
+ repetition_penalty,
15
+ )
16
+ from src.app.model import load_model_tokenizer_and_processor
17
+ from src.logger import logging
18
+ from src.exception import CustomExceptionHandling
19
+
20
+
21
+ # Model, tokenizer and processor
22
+ # try :
23
+ model, tokenizer, processor = load_model_tokenizer_and_processor(model_name, device)
24
+ # except Exception as e:
25
+ # Custom exception handling
26
+ # raise CustomExceptionHandling(e, sys) from e
27
+
28
+ MAX_NUM_FRAMES=64
29
+
30
+ def encode_video(video_path):
31
+ def uniform_sample(l, n):
32
+ gap = len(l) / n
33
+ idxs = [int(i * gap + gap / 2) for i in range(n)]
34
+ return [l[i] for i in idxs]
35
+
36
+ vr = VideoReader(video_path, ctx=cpu(0))
37
+ sample_fps = round(vr.get_avg_fps() / 1) # FPS
38
+ frame_idx = [i for i in range(0, len(vr), sample_fps)]
39
+ if len(frame_idx) > MAX_NUM_FRAMES:
40
+ frame_idx = uniform_sample(frame_idx, MAX_NUM_FRAMES)
41
+ frames = vr.get_batch(frame_idx).asnumpy()
42
+ frames = [Image.fromarray(v.astype('uint8')) for v in frames]
43
+ print('num frames:', len(frames))
44
+ return frames
45
+
46
+ @spaces.GPU()
47
+ def describe_image(
48
+ image: str,
49
+ question: str,
50
+ temperature: float,
51
+ top_p: float,
52
+ top_k: int,
53
+ max_new_tokens: int,
54
+ ) -> str:
55
+ """
56
+ Generates an answer to a given question based on the provided image and question.
57
+
58
+ Args:
59
+ - image (str): The path to the image file.
60
+ - question (str): The question text.
61
+ - temperature (float): The temperature parameter for the model.
62
+ - top_p (float): The top_p parameter for the model.
63
+ - top_k (int): The top_k parameter for the model.
64
+ - max_new_tokens (int): The max tokens to be generated by the model.
65
+
66
+ Returns:
67
+ str: The generated answer to the question.
68
+ """
69
+ try:
70
+ # Check if image or question is None
71
+ if not image or not question:
72
+ gr.Warning("Please provide an image and a question.")
73
+ frames = encode_video(image)
74
+ msgs = [
75
+ {'role': 'user', 'content': frames + [question]},
76
+ ]
77
+
78
+
79
+ # Message format for the model
80
+ # msgs = [{"role": "user", "content": [image, question]}]
81
+
82
+ # Generate the answer
83
+ # answer = model.chat(
84
+ # image=None,
85
+ # msgs=msgs,
86
+ # tokenizer=tokenizer,
87
+ # processor=processor,
88
+ # sampling=sampling,
89
+ # stream=stream,
90
+ # top_p=top_p,
91
+ # top_k=top_k,
92
+ # temperature=temperature,
93
+ # repetition_penalty=repetition_penalty,
94
+ # max_new_tokens=max_new_tokens,
95
+ # )
96
+ answer = model.chat(
97
+ msgs=msgs,
98
+ tokenizer=tokenizer,
99
+ processor=processor,
100
+ sampling=sampling,
101
+ stream=stream,
102
+ top_p=top_p,
103
+ top_k=top_k,
104
+ temperature=temperature,
105
+ repetition_penalty=repetition_penalty,
106
+ max_new_tokens=max_new_tokens
107
+ )
108
+
109
+ # Log the successful generation of the answer
110
+ logging.info("Answer generated successfully.")
111
+ logging.info("".join(answer))
112
+
113
+ # Return the answer
114
+ return "".join(answer)
115
+
116
+ # Handle exceptions that may occur during answer generation
117
+ except Exception as e:
118
+ # Custom exception handling
119
+ raise CustomExceptionHandling(e, sys) from e
src/config.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Model settings
2
+ device = "cuda"
3
+ model_name = "openbmb/MiniCPM-o-2_6"
4
+
5
+ # Decoding settings
6
+ sampling = True
7
+ stream = True
8
+ repetition_penalty = 1.05
src/exception.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This module defines a custom exception handling class and a function to get error message with details of the error.
3
+ """
4
+
5
+ # Standard Library
6
+ import sys
7
+
8
+ # Local imports
9
+ from src.logger import logging
10
+
11
+
12
+ # Function Definition to get error message with details of the error (file name and line number) when an error occurs in the program
13
+ def get_error_message(error, error_detail: sys):
14
+ """
15
+ Get error message with details of the error.
16
+
17
+ Args:
18
+ - error (Exception): The error that occurred.
19
+ - error_detail (sys): The details of the error.
20
+
21
+ Returns:
22
+ str: A string containing the error message along with the file name and line number where the error occurred.
23
+ """
24
+ _, _, exc_tb = error_detail.exc_info()
25
+
26
+ # Get error details
27
+ file_name = exc_tb.tb_frame.f_code.co_filename
28
+ return "Error occured in python script name [{0}] line number [{1}] error message[{2}]".format(
29
+ file_name, exc_tb.tb_lineno, str(error)
30
+ )
31
+
32
+
33
+ # Custom Exception Handling Class Definition
34
+ class CustomExceptionHandling(Exception):
35
+ """
36
+ Custom Exception Handling:
37
+ This class defines a custom exception that can be raised when an error occurs in the program.
38
+ It takes an error message and an error detail as input and returns a formatted error message when the exception is raised.
39
+ """
40
+
41
+ # Constructor
42
+ def __init__(self, error_message, error_detail: sys):
43
+ """Initialize the exception"""
44
+ super().__init__(error_message)
45
+
46
+ self.error_message = get_error_message(error_message, error_detail=error_detail)
47
+
48
+ def __str__(self):
49
+ """String representation of the exception"""
50
+ return self.error_message
src/logger.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Importing the required modules
2
+ import os
3
+ import logging
4
+ from datetime import datetime
5
+
6
+ # Creating a log file with the current date and time as the name of the file
7
+ LOG_FILE = f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log"
8
+
9
+ # Creating a logs folder if it does not exist
10
+ logs_path = os.path.join(os.getcwd(), "logs", LOG_FILE)
11
+ os.makedirs(logs_path, exist_ok=True)
12
+
13
+ # Setting the log file path and the log level
14
+ LOG_FILE_PATH = os.path.join(logs_path, LOG_FILE)
15
+
16
+ # Configuring the logger
17
+ logging.basicConfig(
18
+ filename=LOG_FILE_PATH,
19
+ format="[ %(asctime)s ] %(lineno)d %(name)s - %(levelname)s - %(message)s",
20
+ level=logging.INFO,
21
+ )