internlm
/

internlm-xcomposer-7b

Text Generation

Transformers

PyTorch

InternLMXComposer

feature-extraction

custom_code

Model card Files Files and versions Community

myownskyW7 commited on Oct 12, 2023

Commit

7a51eae

1 Parent(s): 8974c5f

add meta instruct

Browse files

Files changed (1) hide show

modeling_InternLM_XComposer.py +25 -10

modeling_InternLM_XComposer.py CHANGED Viewed

@@ -26,6 +26,13 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
     config_class = InternLMXComposerConfig
     _auto_class = "AutoModelForCausalLM"
     gen_config = dict(
         num_beams=5,
         do_sample=False,
@@ -33,7 +40,7 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
         repetition_penalty=1.5,
         length_penalty=1.0,
         temperature=1.0,
-        max_new_tokens=200,
     )
     def __init__(self, config):
@@ -74,13 +81,14 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
             # speed up init llm
             with torch.device('meta'):
                 self.internlm_model = InternLMForCausalLM._from_config(config)
-            self.internlm_model.to_empty(device=config.device).to(torch.float16)
         for n, m in self.internlm_model.named_modules():
             if 'lora' in n:
                 m.float()
         self.internlm_proj = nn.Linear(self.Qformer.config.hidden_size,
-                                    self.internlm_model.config.hidden_size)
         print('Done')
         self.vis_processor = transforms.Compose([
@@ -154,13 +162,14 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
                 encoder_attention_mask=image_atts,
                 return_dict=True,
             )
-            inputs_internlm = self.internlm_proj(query_output.last_hidden_state)
             inputs_internlm = torch.cat([
                 self.flag_image_start.expand(inputs_internlm.shape[0], -1, -1),
                 inputs_internlm,
                 self.flag_image_end.expand(inputs_internlm.shape[0], -1, -1)
             ],
-                                      dim=1)
         return inputs_internlm
     def encode_text(self, text, add_special_tokens=False):
@@ -195,8 +204,8 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
         text_embeds = self.encode_text(text)
         img_embeds = self.encode_img(image)
         prompt_embeds = self.wrap_prompt(text_embeds, img_embeds)
-        out_embeds = self.internlm_model.generate(inputs_embeds=prompt_embeds,
-                                                **self.get_gen_args(**kwargs))
         out_text = self.decode_text(out_embeds)
         return out_text
@@ -206,8 +215,8 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
         prompt_embeds = self.wrap_prompt(text_embeds,
                                          img_embeds,
                                          history=history)
-        out_embeds = self.internlm_model.generate(inputs_embeds=prompt_embeds,
-                                                **self.get_gen_args(**kwargs))
         out_text = self.decode_text(out_embeds)
         # trunc at eoh and eoa
@@ -231,7 +240,13 @@ class InternLMXComposerForCausalLM(PreTrainedModel):
                     history=None,
                     add_special=True):
         if add_special:
-            prompt_segs = [' <|User|>:', f'\n{self.eoh} <|Bot|>:']
         else:
             prompt_segs = [' <|User|>:', ' <|Bot|>:']  # used in wrap history
         prompt_seg_embeds = []

     config_class = InternLMXComposerConfig
     _auto_class = "AutoModelForCausalLM"
+    meta_instruction = """meta instruction
+You are an AI assistant whose name is 浦语.
+- 浦语 is a conversational language model that is developed by Shanghai AI Laboratory (上海人工智能实验室). It is designed to be helpful, honest, and harmless.
+- 浦语 can understand and communicate fluently in the language chosen by the user such as English and 中文.
+conversation
+"""
     gen_config = dict(
         num_beams=5,
         do_sample=False,
         repetition_penalty=1.5,
         length_penalty=1.0,
         temperature=1.0,
+        max_new_tokens=500,
     )
     def __init__(self, config):
             # speed up init llm
             with torch.device('meta'):
                 self.internlm_model = InternLMForCausalLM._from_config(config)
+            self.internlm_model.to_empty(device=config.device).to(
+                torch.float16)
         for n, m in self.internlm_model.named_modules():
             if 'lora' in n:
                 m.float()
         self.internlm_proj = nn.Linear(self.Qformer.config.hidden_size,
+                                       self.internlm_model.config.hidden_size)
         print('Done')
         self.vis_processor = transforms.Compose([
                 encoder_attention_mask=image_atts,
                 return_dict=True,
             )
+            inputs_internlm = self.internlm_proj(
+                query_output.last_hidden_state)
             inputs_internlm = torch.cat([
                 self.flag_image_start.expand(inputs_internlm.shape[0], -1, -1),
                 inputs_internlm,
                 self.flag_image_end.expand(inputs_internlm.shape[0], -1, -1)
             ],
+                                        dim=1)
         return inputs_internlm
     def encode_text(self, text, add_special_tokens=False):
         text_embeds = self.encode_text(text)
         img_embeds = self.encode_img(image)
         prompt_embeds = self.wrap_prompt(text_embeds, img_embeds)
+        out_embeds = self.internlm_model.generate(
+            inputs_embeds=prompt_embeds, **self.get_gen_args(**kwargs))
         out_text = self.decode_text(out_embeds)
         return out_text
         prompt_embeds = self.wrap_prompt(text_embeds,
                                          img_embeds,
                                          history=history)
+        out_embeds = self.internlm_model.generate(
+            inputs_embeds=prompt_embeds, **self.get_gen_args(**kwargs))
         out_text = self.decode_text(out_embeds)
         # trunc at eoh and eoa
                     history=None,
                     add_special=True):
         if add_special:
+            if history is None:
+                prompt_segs = [
+                    self.meta_instruction + ' <|User|>:',
+                    f'\n{self.eoh} <|Bot|>:'
+                ]
+            else:
+                prompt_segs = [' <|User|>:', f'\n{self.eoh} <|Bot|>:']
         else:
             prompt_segs = [' <|User|>:', ' <|Bot|>:']  # used in wrap history
         prompt_seg_embeds = []