Spaces:

TongkunGuan
/

Token-level_Text_Image_Foundation_Model

Running

TongkunGuan commited on Mar 12

Commit

97be351

verified ·

1 Parent(s): cdb5235

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -77,9 +77,10 @@ def process_image(model, tokenizer, transform, device, check_type, image, text):
         if 'R50' in check_type:
             text_embeds = model.language_embedding(input_ids)
         else:
-            text_embeds = model.tok_embeddings(input_ids)
         vit_embeds, size1 = model.forward_tokenocr(pixel_values.to(torch.bfloat16).to(device))
         print("vit_embeds",vit_embeds)
         print("vit_embeds,shape",vit_embeds.shape)
         print("target_ratio",target_ratio)
@@ -107,11 +108,9 @@ def process_image(model, tokenizer, transform, device, check_type, image, text):
     current_bpe = [tokenizer.decode([i]) for i in input_ids]
     # current_bpe[-1] = 'Input text'
-    print("len1 ",len(current_vis))
-    print("len2 ",len(current_bpe))
-    print("current_bpe!!!!!!!!!!!!!!!!!!!",current_bpe)
-    current_bpe.append(text)
     return image, current_vis, current_bpe

         if 'R50' in check_type:
             text_embeds = model.language_embedding(input_ids)
         else:
+            text_embeds = model.tok_embeddings(input_ids).clone()
         vit_embeds, size1 = model.forward_tokenocr(pixel_values.to(torch.bfloat16).to(device))
         print("vit_embeds",vit_embeds)
         print("vit_embeds,shape",vit_embeds.shape)
         print("target_ratio",target_ratio)
     current_bpe = [tokenizer.decode([i]) for i in input_ids]
     # current_bpe[-1] = 'Input text'
+    # current_bpe.append(text)
     return image, current_vis, current_bpe