IntelLabs
/

LlavaOLMoBitnet1B

English

Model card Files Files and versions Community

naveensp commited on Aug 22, 2024

Commit

5076284

verified ·

1 Parent(s): 0cc08b4

Upload llava_olmo.py with huggingface_hub

Browse files

Files changed (1) hide show

llava_olmo.py +98 -0

llava_olmo.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import json
+from transformers import AutoTokenizer
+import torch
+import llava.model.language_model.llava_olmo1p58b as llava_olmo ##
+import llava.model.language_model.llava_llama as llava_llama
+from OLMo_Bitnet_1B.modeling_olmo import OLMoForCausalLM
+from PIL import Image
+import requests
+from llava.mm_utils import tokenizer_image_token, process_images, get_model_name_from_path
+from llava.conversation import conv_templates
+device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
+DEFAULT_IMAGE_TOKEN = "<image>"
+IMAGE_TOKEN_INDEX = -200
+# Define Image and Text inputs..
+text = "What are the four major tournaments of the sport shown in the image?"
+url = "https://farm3.staticflickr.com/2157/2439959136_d932f4e816_z.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+# LOAD MODEL FROM CHECKPOINT
+with open('./checkpoints/llava-LlavaOLMoBitnet1B-Run3-finetune/config.json') as json_file:
+    data = json.load(json_file)
+config_class = llava_olmo.LlavaOLMoBitnet1BConfig(**data)
+model = llava_olmo.LlavaOLMoBitnet1BForCausalLM(config_class).to(device)
+weight_checkpoint = torch.load('./checkpoints/llava-LlavaOLMoBitnet1B-Run3-finetune/pytorch_model.bin')
+model.load_state_dict(weight_checkpoint)
+# pre-process image; Apply chat template and tokenize text
+image_processor = model.model.vision_tower.image_processor
+tokenizer = AutoTokenizer.from_pretrained(
+            "NousResearch/OLMo-Bitnet-1B",
+            model_max_length=2048,
+            padding_side="right",
+            pad_token_id=1,
+            use_fast=True,
+            legacy=False,
+            unk_token='<|padding|>',
+            )
+image_tensor = process_images([image], image_processor, model.config)[0]
+text = DEFAULT_IMAGE_TOKEN + '\n' + text
+conv = conv_templates['llava_v1'].copy()
+conv.append_message(conv.roles[0], text)
+conv.append_message(conv.roles[1], None)
+prompt = conv.get_prompt()
+text_tokens = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).to(device)
+# Generate response from the model
+response = model.generate(images=image_tensor.unsqueeze(0).to(device), inputs=text_tokens, max_new_tokens=400)
+decoded_text = tokenizer.batch_decode(response, skip_special_tokens=True)[0]
+print("\n\n", "-"*100)
+print(decoded_text[:decoded_text.find('</s>')].replace('|||IP_ADDRESS|||', '')) # The replace part is due to unwanted token introduction at start
+print("-"*100)
+#
+##
+#
+#
+#
+'''
+# ORIGINAL CODE WITH ONLY OLMO:
+with open('llava/config.json') as json_file:
+    data = json.load(json_file)
+text = "Paris is a historic city with architectural marvels. It is also "
+# text = ["Language modeling is "]
+config_class = llava_olmo.LlavaOLMoBitnet1BConfig(**data)
+lolmo = llava_olmo.LlavaOLMoBitnet1BForCausalLM(config_class).to(device)
+lolmo.load_state_dict(torch.load('OLMo_Bitnet_1B/pytorch_model.bin'), strict=False)
+olmo = OLMoForCausalLM(config_class).to(device)
+olmo.load_state_dict(torch.load('OLMo_Bitnet_1B/pytorch_model.bin'))
+actual_olmo = OLMoForCausalLM.from_pretrained("allenai/OLMo-1B").to(device)
+actual_olmo_tokenizer = OLMoTokenizerFast.from_pretrained("allenai/OLMo-1B")
+olmo_tokenizer = AutoTokenizer.from_pretrained("NousResearch/OLMo-Bitnet-1B")
+olmo_tokens = olmo_tokenizer(text, return_tensors='pt', return_token_type_ids=False).to(device)
+# olmo_tokens = actual_olmo_tokenizer(text, return_tensors='pt', return_token_type_ids=False).to(device)
+response = lolmo.generate(inputs=olmo_tokens['input_ids'], attention_mask=olmo_tokens['attention_mask'], max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
+# response = olmo.generate(inputs=olmo_tokens['input_ids'], attention_mask=olmo_tokens['attention_mask'], max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
+print(olmo_tokenizer.batch_decode(response, skip_special_tokens=True)[0])
+'''