jatingocodeo commited on
Commit
d70db54
·
verified ·
1 Parent(s): 6cfe9be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -11
app.py CHANGED
@@ -4,21 +4,43 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
4
  from peft import PeftModel, PeftConfig
5
  from PIL import Image
6
  import torchvision.datasets as datasets
 
 
7
 
8
  def load_model():
9
- # Load base Phi model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  base_model = AutoModelForCausalLM.from_pretrained(
11
  "microsoft/Phi-3-mini-4k-instruct",
12
  trust_remote_code=True,
13
- device_map="auto",
14
- torch_dtype=torch.float32
 
 
15
  )
16
 
17
- # Load our fine-tuned LoRA adapter
18
  model = PeftModel.from_pretrained(
19
  base_model,
20
- "jatingocodeo/phi-vlm", # Your uploaded model
21
- device_map="auto"
 
22
  )
23
 
24
  tokenizer = AutoTokenizer.from_pretrained("jatingocodeo/phi-vlm")
@@ -30,13 +52,16 @@ def generate_description(image, model, tokenizer):
30
  if image.mode != "RGB":
31
  image = image.convert("RGB")
32
 
33
- # Resize image to match training size
34
  image = image.resize((32, 32))
35
 
36
- # Prepare prompt
37
- prompt = """Below is an image. Please describe it in detail.
 
 
 
38
 
39
- Image: <image>
40
  Description: """
41
 
42
  # Tokenize input
@@ -51,7 +76,8 @@ Description: """
51
  # Generate description
52
  with torch.no_grad():
53
  outputs = model.generate(
54
- **inputs,
 
55
  max_new_tokens=100,
56
  temperature=0.7,
57
  do_sample=True,
 
4
  from peft import PeftModel, PeftConfig
5
  from PIL import Image
6
  import torchvision.datasets as datasets
7
+ import numpy as np
8
+ import os
9
 
10
  def load_model():
11
+ # Create offload directory
12
+ os.makedirs("offload", exist_ok=True)
13
+
14
+ # Configure device map for memory efficiency
15
+ device_map = {
16
+ 'base_model.model.model.embed_tokens': 0,
17
+ 'base_model.model.model.layers.0': 0,
18
+ 'base_model.model.model.layers.1': 0,
19
+ 'base_model.model.model.layers.2': 0,
20
+ 'base_model.model.model.layers.3': 0,
21
+ 'base_model.model.model.layers.4': 'cpu',
22
+ 'base_model.model.model.layers.5': 'cpu',
23
+ 'base_model.model.model.layers.6': 'cpu',
24
+ 'base_model.model.model.layers.7': 'cpu',
25
+ 'base_model.model.model.layers.8': 'cpu',
26
+ 'base_model.model.model.norm': 'cpu',
27
+ 'base_model.model.lm_head': 0,
28
+ }
29
+
30
  base_model = AutoModelForCausalLM.from_pretrained(
31
  "microsoft/Phi-3-mini-4k-instruct",
32
  trust_remote_code=True,
33
+ device_map=device_map, # Use custom device map
34
+ torch_dtype=torch.float32,
35
+ attn_implementation='eager',
36
+ offload_folder="offload"
37
  )
38
 
 
39
  model = PeftModel.from_pretrained(
40
  base_model,
41
+ "jatingocodeo/phi-vlm",
42
+ device_map=device_map,
43
+ offload_folder="offload"
44
  )
45
 
46
  tokenizer = AutoTokenizer.from_pretrained("jatingocodeo/phi-vlm")
 
52
  if image.mode != "RGB":
53
  image = image.convert("RGB")
54
 
55
+ # Resize image to match training size (32x32)
56
  image = image.resize((32, 32))
57
 
58
+ # Convert image to tensor and normalize
59
+ image_tensor = torch.FloatTensor(np.array(image)).permute(2, 0, 1) / 255.0
60
+
61
+ # Prepare prompt with image tensor
62
+ prompt = f"""Below is an image. Please describe it in detail.
63
 
64
+ Image: {image_tensor}
65
  Description: """
66
 
67
  # Tokenize input
 
76
  # Generate description
77
  with torch.no_grad():
78
  outputs = model.generate(
79
+ input_ids=inputs.input_ids,
80
+ attention_mask=inputs.attention_mask,
81
  max_new_tokens=100,
82
  temperature=0.7,
83
  do_sample=True,