Dolphin-Inference

Build error

Ketengan-Diffusion-Lab commited on Sep 14, 2024

Commit

225c3f2

verified ·

1 Parent(s): f4dc684

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,24 +10,19 @@ transformers.logging.set_verbosity_error()
 transformers.logging.disable_progress_bar()
 warnings.filterwarnings('ignore')
-# set device
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model_name = 'cognitivecomputations/dolphin-vision-7b'
-# create model and load it to the specified device
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
-    torch_dtype=torch.float16,
     trust_remote_code=True
 )
-model.to(device)  # Explicitly move the model to the device
-# Ensure all model components are on the same device
-for param in model.parameters():
-    param.data = param.data.to(device)
-for buffer in model.buffers():
-    buffer.data = buffer.data.to(device)
 tokenizer = AutoTokenizer.from_pretrained(
     model_name,
@@ -45,18 +40,22 @@ def inference(prompt, image):
     )
     text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
-    input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0).to(device)
-    image_tensor = model.process_images([image], model.config).to(device)
     # generate
-    with torch.cuda.amp.autocast():
-        output_ids = model.generate(
-            input_ids,
-            images=image_tensor,
-            max_new_tokens=2048,
-            use_cache=True
-        )[0]
     return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()

 transformers.logging.disable_progress_bar()
 warnings.filterwarnings('ignore')
+# Force CPU usage
+device = torch.device("cpu")
+torch.set_default_tensor_type(torch.FloatTensor)
 model_name = 'cognitivecomputations/dolphin-vision-7b'
+# create model and load it to CPU
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
+    torch_dtype=torch.float32,  # Use float32 for CPU
+    device_map={'': device},
     trust_remote_code=True
 )
 tokenizer = AutoTokenizer.from_pretrained(
     model_name,
     )
     text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
+    input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0)
+    image_tensor = model.process_images([image], model.config)
+    # Add debug prints
+    print(f"Device of model: {next(model.parameters()).device}")
+    print(f"Device of input_ids: {input_ids.device}")
+    print(f"Device of image_tensor: {image_tensor.device}")
     # generate
+    output_ids = model.generate(
+        input_ids,
+        images=image_tensor,
+        max_new_tokens=2048,
+        use_cache=True
+    )[0]
     return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()