sounar commited on
Commit
2629ae5
·
verified ·
1 Parent(s): 768d9ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -7,7 +7,8 @@ from PIL import Image
7
  from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
8
  import gradio as gr
9
 
10
- # Load the model and tokenizer
 
11
  bnb_config = BitsAndBytesConfig(
12
  load_in_4bit=True,
13
  bnb_4bit_quant_type="nf4",
@@ -15,20 +16,22 @@ bnb_config = BitsAndBytesConfig(
15
  bnb_4bit_compute_dtype=torch.float16,
16
  )
17
 
 
18
  model = AutoModel.from_pretrained(
19
  "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1",
20
  quantization_config=bnb_config,
21
  device_map="auto",
22
  torch_dtype=torch.float16,
23
  trust_remote_code=True,
24
- attn_implementation="flash_attention_2",
25
  )
26
 
27
  tokenizer = AutoTokenizer.from_pretrained(
28
- "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1",
29
  trust_remote_code=True
30
  )
31
 
 
32
  # Define the function to handle the input
33
  def process_input(image, question):
34
  image = Image.open(image).convert("RGB")
 
7
  from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
8
  import gradio as gr
9
 
10
+
11
+ # Configuration for 4-bit quantization
12
  bnb_config = BitsAndBytesConfig(
13
  load_in_4bit=True,
14
  bnb_4bit_quant_type="nf4",
 
16
  bnb_4bit_compute_dtype=torch.float16,
17
  )
18
 
19
+ # Load the model without flash-attn
20
  model = AutoModel.from_pretrained(
21
  "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1",
22
  quantization_config=bnb_config,
23
  device_map="auto",
24
  torch_dtype=torch.float16,
25
  trust_remote_code=True,
26
+ attn_implementation=None, # Disable flash-attn
27
  )
28
 
29
  tokenizer = AutoTokenizer.from_pretrained(
30
+ "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1",
31
  trust_remote_code=True
32
  )
33
 
34
+
35
  # Define the function to handle the input
36
  def process_input(image, question):
37
  image = Image.open(image).convert("RGB")