Spaces:

danilohssantana
/

qwen2.5-VL-api

Runtime error

danilohssantana commited on Feb 20

Commit

62d1e32

1 Parent(s): e58d036

same model

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -22,7 +22,6 @@ RUN pip install --no-cache-dir --upgrade pip && \
         fastapi \
         uvicorn[standard] \
         python-multipart \
-        optimum
 # Copy application files
 COPY --chown=user . /app

         fastapi \
         uvicorn[standard] \
         python-multipart \
 # Copy application files
 COPY --chown=user . /app

main.py CHANGED Viewed

@@ -36,15 +36,14 @@ class PredictRequest(BaseModel):
 #     # attn_implementation="flash_attention_2",
 # )
 # checkpoint = "Qwen/Qwen2.5-VL-7B-Instruct"
-checkpoint = "Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4"
 min_pixels = 256 * 28 * 28
 max_pixels = 1280 * 28 * 28
 processor = AutoProcessor.from_pretrained(
     checkpoint, min_pixels=min_pixels, max_pixels=max_pixels
 )
-model = Qwen2VLForConditionalGeneration.from_pretrained(
     checkpoint,
-    # torch_dtype=torch.bfloat16,
     torch_dtype="auto",
     device_map="auto",
     # attn_implementation="flash_attention_2",

 #     # attn_implementation="flash_attention_2",
 # )
 # checkpoint = "Qwen/Qwen2.5-VL-7B-Instruct"
+checkpoint = "Qwen/Qwen2.5-VL-7B-Instruct"
 min_pixels = 256 * 28 * 28
 max_pixels = 1280 * 28 * 28
 processor = AutoProcessor.from_pretrained(
     checkpoint, min_pixels=min_pixels, max_pixels=max_pixels
 )
+model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     checkpoint,
     torch_dtype="auto",
     device_map="auto",
     # attn_implementation="flash_attention_2",