File size: 2,955 Bytes
789acc7
 
fd950ef
66011b0
789acc7
fd950ef
 
 
 
 
 
 
5220358
 
 
fd950ef
cda5511
fd950ef
66011b0
fd950ef
c65567a
66011b0
9aeab55
4f9f0e6
f8d9f18
4f9f0e6
c65567a
 
 
 
fd950ef
b1bbde3
fd950ef
b1bbde3
fd950ef
 
 
 
 
 
 
 
 
5220358
225c3f2
5220358
fd950ef
225c3f2
 
 
 
fd950ef
cd44f8b
66011b0
5220358
 
 
 
9aeab55
 
5220358
 
fd950ef
 
789acc7
 
5ee7893
fd950ef
b1bbde3
 
 
 
 
fd950ef
 
9aeab55
 
fd950ef
 
 
 
9aeab55
 
b1bbde3
9aeab55
 
789acc7
f8d9f18
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import gradio as gr
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import warnings

# disable some warnings
transformers.logging.set_verbosity_error()
transformers.logging.disable_progress_bar()
warnings.filterwarnings('ignore')

# Set device to GPU if available, else CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

model_name = 'cognitivecomputations/dolphin-vision-72b'

# create model and load it to the specified device
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True
)

def inference(prompt, image, temperature, beam_size, system_instruction):
    messages = [
        {"role": "system", "content": system_instruction},
        {"role": "user", "content": f'<image>\n{prompt}'}
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
    input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0).to(device)

    image_tensor = model.process_images([image], model.config).to(device)

    # Add debug prints
    print(f"Device of model: {next(model.parameters()).device}")
    print(f"Device of input_ids: {input_ids.device}")
    print(f"Device of image_tensor: {image_tensor.device}")

    # generate
    with torch.cuda.amp.autocast():
        output_ids = model.generate(
            input_ids,
            images=image_tensor,
            max_new_tokens=1024,
            temperature=temperature,
            num_beams=beam_size,
            use_cache=True
        )[0]

    return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            system_instruction = gr.Textbox(
                label="System Instruction",
                value="You are Dolphin, a helpful AI assistant",
                lines=2
            )
            prompt_input = gr.Textbox(label="Prompt", placeholder="Describe this image in detail")
            image_input = gr.Image(label="Image", type="pil")
            temperature_input = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
            beam_size_input = gr.Slider(minimum=1, maximum=10, value=4, step=1, label="Beam Size")
            submit_button = gr.Button("Submit")
        with gr.Column():
            output_text = gr.Textbox(label="Output")

    submit_button.click(
        fn=inference, 
        inputs=[prompt_input, image_input, temperature_input, beam_size_input, system_instruction], 
        outputs=output_text
    )

demo.launch(share=True)