howard-hou commited on
Commit
88c85c4
·
1 Parent(s): 9ee77be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  os.environ["RWKV_JIT_ON"] = '1'
3
  os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
 
4
  from modeling_rwkv import RWKV
5
 
6
  import gc
@@ -14,6 +15,7 @@ from huggingface_hub import hf_hub_download
14
  from pynvml import *
15
  nvmlInit()
16
  gpu_h = nvmlDeviceGetHandleByIndex(0)
 
17
 
18
  ctx_limit = 3500
19
  title = 'ViusualRWKV-v5'
@@ -36,8 +38,7 @@ vision_local_path = hf_hub_download(repo_id="howard-hou/visualrwkv-5", filename=
36
  vision_state_dict = torch.load(vision_local_path, map_location='cpu')
37
  visual_encoder.load_state_dict(vision_state_dict)
38
  image_processor = CLIPImageProcessor.from_pretrained(vision_tower_name)
39
- if torch.cuda.is_available():
40
- visual_encoder = visual_encoder.cuda()
41
  ##########################################################################
42
  def generate_prompt(instruction):
43
  instruction = instruction.strip().replace('\r\n','\n').replace('\n\n','\n')
@@ -135,6 +136,7 @@ def compute_image_state(image):
135
  image_state = image_cache[base64_image]
136
  else:
137
  image = image_processor(images=image.convert('RGB'), return_tensors='pt')['pixel_values']
 
138
  image_features = visual_encoder.encode_images(image.unsqueeze(0)).squeeze(0) # [L, D]
139
  # apply layer norm to image feature, very important
140
  image_features = F.layer_norm(image_features,
 
1
  import os
2
  os.environ["RWKV_JIT_ON"] = '1'
3
  os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
4
+ # make sure cuda dir is in the same level as modeling_rwkv.py
5
  from modeling_rwkv import RWKV
6
 
7
  import gc
 
15
  from pynvml import *
16
  nvmlInit()
17
  gpu_h = nvmlDeviceGetHandleByIndex(0)
18
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
 
20
  ctx_limit = 3500
21
  title = 'ViusualRWKV-v5'
 
38
  vision_state_dict = torch.load(vision_local_path, map_location='cpu')
39
  visual_encoder.load_state_dict(vision_state_dict)
40
  image_processor = CLIPImageProcessor.from_pretrained(vision_tower_name)
41
+ visual_encoder = visual_encoder.to(device)
 
42
  ##########################################################################
43
  def generate_prompt(instruction):
44
  instruction = instruction.strip().replace('\r\n','\n').replace('\n\n','\n')
 
136
  image_state = image_cache[base64_image]
137
  else:
138
  image = image_processor(images=image.convert('RGB'), return_tensors='pt')['pixel_values']
139
+ image = image.to(device)
140
  image_features = visual_encoder.encode_images(image.unsqueeze(0)).squeeze(0) # [L, D]
141
  # apply layer norm to image feature, very important
142
  image_features = F.layer_norm(image_features,