Dolphin-Inference

Build error

Ketengan-Diffusion-Lab commited on Sep 14, 2024

Commit

cd44f8b

verified ·

1 Parent(s): 4f9f0e6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,8 +10,8 @@ transformers.logging.set_verbosity_error()
 transformers.logging.disable_progress_bar()
 warnings.filterwarnings('ignore')
-# set device to a specific GPU (e.g., GPU 0)
-device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 model_name = 'cognitivecomputations/dolphin-vision-7b'
@@ -19,9 +19,9 @@ model_name = 'cognitivecomputations/dolphin-vision-7b'
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     torch_dtype=torch.float16,
-    # device_map='auto',  # Remove auto device mapping
     trust_remote_code=True
-).to(device) # Load the model to the specified device
 tokenizer = AutoTokenizer.from_pretrained(
     model_name,
@@ -39,15 +39,14 @@ def inference(prompt, image):
     )
     text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
-    input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0)
-    image_tensor = model.process_images([image], model.config).to(dtype=model.dtype, device=device)
-    # Generate with autocast for mixed precision on the specified GPU
-    with torch.cuda.amp.autocast():
         output_ids = model.generate(
-            input_ids.to(device),
             images=image_tensor,
             max_new_tokens=2048,
             use_cache=True

 transformers.logging.disable_progress_bar()
 warnings.filterwarnings('ignore')
+# set device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model_name = 'cognitivecomputations/dolphin-vision-7b'
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     torch_dtype=torch.float16,
+    device_map='auto', # Keep auto device mapping
     trust_remote_code=True
+).to(device)  # Explicitly move the model to the device
 tokenizer = AutoTokenizer.from_pretrained(
     model_name,
     )
     text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
+    input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0).to(device) # Move input_ids to device
+    image_tensor = model.process_images([image], model.config).to(dtype=model.dtype, device=device) # Move image_tensor to device
+    # generate
+    with torch.cuda.amp.autocast(): # Use autocast for mixed precision
         output_ids = model.generate(
+            input_ids,
             images=image_tensor,
             max_new_tokens=2048,
             use_cache=True