danilohssantana commited on
Commit
62d1e32
·
1 Parent(s): e58d036

same model

Browse files
Files changed (2) hide show
  1. Dockerfile +0 -1
  2. main.py +2 -3
Dockerfile CHANGED
@@ -22,7 +22,6 @@ RUN pip install --no-cache-dir --upgrade pip && \
22
  fastapi \
23
  uvicorn[standard] \
24
  python-multipart \
25
- optimum
26
 
27
  # Copy application files
28
  COPY --chown=user . /app
 
22
  fastapi \
23
  uvicorn[standard] \
24
  python-multipart \
 
25
 
26
  # Copy application files
27
  COPY --chown=user . /app
main.py CHANGED
@@ -36,15 +36,14 @@ class PredictRequest(BaseModel):
36
  # # attn_implementation="flash_attention_2",
37
  # )
38
  # checkpoint = "Qwen/Qwen2.5-VL-7B-Instruct"
39
- checkpoint = "Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int4"
40
  min_pixels = 256 * 28 * 28
41
  max_pixels = 1280 * 28 * 28
42
  processor = AutoProcessor.from_pretrained(
43
  checkpoint, min_pixels=min_pixels, max_pixels=max_pixels
44
  )
45
- model = Qwen2VLForConditionalGeneration.from_pretrained(
46
  checkpoint,
47
- # torch_dtype=torch.bfloat16,
48
  torch_dtype="auto",
49
  device_map="auto",
50
  # attn_implementation="flash_attention_2",
 
36
  # # attn_implementation="flash_attention_2",
37
  # )
38
  # checkpoint = "Qwen/Qwen2.5-VL-7B-Instruct"
39
+ checkpoint = "Qwen/Qwen2.5-VL-7B-Instruct"
40
  min_pixels = 256 * 28 * 28
41
  max_pixels = 1280 * 28 * 28
42
  processor = AutoProcessor.from_pretrained(
43
  checkpoint, min_pixels=min_pixels, max_pixels=max_pixels
44
  )
45
+ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
46
  checkpoint,
 
47
  torch_dtype="auto",
48
  device_map="auto",
49
  # attn_implementation="flash_attention_2",