Spaces:
Build error
Build error
File size: 2,955 Bytes
789acc7 fd950ef 66011b0 789acc7 fd950ef 5220358 fd950ef cda5511 fd950ef 66011b0 fd950ef c65567a 66011b0 9aeab55 4f9f0e6 f8d9f18 4f9f0e6 c65567a fd950ef b1bbde3 fd950ef b1bbde3 fd950ef 5220358 225c3f2 5220358 fd950ef 225c3f2 fd950ef cd44f8b 66011b0 5220358 9aeab55 5220358 fd950ef 789acc7 5ee7893 fd950ef b1bbde3 fd950ef 9aeab55 fd950ef 9aeab55 b1bbde3 9aeab55 789acc7 f8d9f18 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import gradio as gr
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import warnings
# disable some warnings
transformers.logging.set_verbosity_error()
transformers.logging.disable_progress_bar()
warnings.filterwarnings('ignore')
# Set device to GPU if available, else CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
model_name = 'cognitivecomputations/dolphin-vision-72b'
# create model and load it to the specified device
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=True
)
def inference(prompt, image, temperature, beam_size, system_instruction):
messages = [
{"role": "system", "content": system_instruction},
{"role": "user", "content": f'<image>\n{prompt}'}
]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0).to(device)
image_tensor = model.process_images([image], model.config).to(device)
# Add debug prints
print(f"Device of model: {next(model.parameters()).device}")
print(f"Device of input_ids: {input_ids.device}")
print(f"Device of image_tensor: {image_tensor.device}")
# generate
with torch.cuda.amp.autocast():
output_ids = model.generate(
input_ids,
images=image_tensor,
max_new_tokens=1024,
temperature=temperature,
num_beams=beam_size,
use_cache=True
)[0]
return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
system_instruction = gr.Textbox(
label="System Instruction",
value="You are Dolphin, a helpful AI assistant",
lines=2
)
prompt_input = gr.Textbox(label="Prompt", placeholder="Describe this image in detail")
image_input = gr.Image(label="Image", type="pil")
temperature_input = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
beam_size_input = gr.Slider(minimum=1, maximum=10, value=4, step=1, label="Beam Size")
submit_button = gr.Button("Submit")
with gr.Column():
output_text = gr.Textbox(label="Output")
submit_button.click(
fn=inference,
inputs=[prompt_input, image_input, temperature_input, beam_size_input, system_instruction],
outputs=output_text
)
demo.launch(share=True) |