Spaces:
Runtime error
Runtime error
Commit
·
ccbfd85
1
Parent(s):
b2806b7
Update app.py
Browse files
app.py
CHANGED
@@ -14,14 +14,13 @@ vision_tower_name = 'openai/clip-vit-large-patch14-336'
|
|
14 |
os.environ["RWKV_JIT_ON"] = '1'
|
15 |
os.environ["RWKV_CUDA_ON"] = '0' # if '1' then use CUDA kernel for seq mode (much faster)
|
16 |
|
17 |
-
from
|
18 |
model_path = hf_hub_download(repo_id="howard-hou/visualrwkv-5", filename=f"{title}.pth")
|
19 |
-
model =
|
20 |
from rwkv.utils import PIPELINE, PIPELINE_ARGS
|
21 |
pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
|
22 |
|
23 |
##########################################################################
|
24 |
-
from modeling import VisualEncoder, EmbeddingMixer, VisualEncoderConfig
|
25 |
emb_mixer = EmbeddingMixer(model.w["emb.weight"],
|
26 |
num_image_embeddings=num_image_embeddings)
|
27 |
config = VisualEncoderConfig(n_embd=model.args.n_embd,
|
@@ -102,9 +101,7 @@ def chatbot(image, question):
|
|
102 |
image = image_processor(images=image.convert('RGB'), return_tensors='pt')['pixel_values']
|
103 |
image_features = visual_encoder.encode_images(image.unsqueeze(0))
|
104 |
emb_mixer.set_image_embeddings(image_features.squeeze(0))
|
105 |
-
|
106 |
-
model.w["emb.weight"] = emb_mixer.get_input_embeddings()
|
107 |
-
print(model.w["emb.weight"].shape)
|
108 |
image_ids = [i for i in range(emb_mixer.image_start_index, emb_mixer.image_start_index + len(image_features))]
|
109 |
input_text = generate_prompt(question)
|
110 |
for output in generate(input_text, image_ids):
|
|
|
14 |
os.environ["RWKV_JIT_ON"] = '1'
|
15 |
os.environ["RWKV_CUDA_ON"] = '0' # if '1' then use CUDA kernel for seq mode (much faster)
|
16 |
|
17 |
+
from modeling import UpdatableRWKV, VisualEncoder, EmbeddingMixer, VisualEncoderConfig
|
18 |
model_path = hf_hub_download(repo_id="howard-hou/visualrwkv-5", filename=f"{title}.pth")
|
19 |
+
model = UpdatableRWKV(model=model_path, strategy='cpu fp32')
|
20 |
from rwkv.utils import PIPELINE, PIPELINE_ARGS
|
21 |
pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
|
22 |
|
23 |
##########################################################################
|
|
|
24 |
emb_mixer = EmbeddingMixer(model.w["emb.weight"],
|
25 |
num_image_embeddings=num_image_embeddings)
|
26 |
config = VisualEncoderConfig(n_embd=model.args.n_embd,
|
|
|
101 |
image = image_processor(images=image.convert('RGB'), return_tensors='pt')['pixel_values']
|
102 |
image_features = visual_encoder.encode_images(image.unsqueeze(0))
|
103 |
emb_mixer.set_image_embeddings(image_features.squeeze(0))
|
104 |
+
model.update_emb_weight(emb_mixer.get_input_embeddings())
|
|
|
|
|
105 |
image_ids = [i for i in range(emb_mixer.image_start_index, emb_mixer.image_start_index + len(image_features))]
|
106 |
input_text = generate_prompt(question)
|
107 |
for output in generate(input_text, image_ids):
|