Spaces:

dar-tau
/

selfie

Sleeping

App Files Files Community

dar-tau commited on Apr 11, 2024

Commit

9a230a0

verified ·

1 Parent(s): 7673f3b

Update configs.py

Browse files

Files changed (1) hide show

configs.py +25 -17

configs.py CHANGED Viewed

@@ -1,5 +1,8 @@
 import os
 dataset_info = [
                 {'name': 'Common Sense', 'hf_repo': 'tau/commonsense_qa', 'text_col': 'question'},
@@ -12,29 +15,34 @@ dataset_info = [
 model_info = {
     'LLAMA2-7B': dict(model_path='meta-llama/Llama-2-7b-chat-hf', token=os.environ['hf_token'],
-                                          original_prompt_template='<s>{prompt}',
-                                          interpretation_prompt_template='<s>[INST] [X] [/INST] {prompt}',
-                                         ), # , load_in_8bit=True
-    # 'Gemma-2B': dict(model_path='google/gemma-2b', device_map='cpu', token=os.environ['hf_token'],
-    #                         original_prompt_template='<bos>{prompt}',
-    #                         interpretation_prompt_template='<bos>User: [X]\n\nAnswer: {prompt}',
-    #                        ),
     'GPT-2 Small': dict(model_path='gpt2', original_prompt_template='{prompt}',
-                     interpretation_prompt_template='User: [X]\n\nAnswer: {prompt}'),
     'GPT-2 Medium': dict(model_path='gpt2-medium', original_prompt_template='{prompt}',
-                     interpretation_prompt_template='User: [X]\n\nAnswer: {prompt}'),
     'GPT-2 Large': dict(model_path='gpt2-large', original_prompt_template='{prompt}',
-                     interpretation_prompt_template='User: [X]\n\nAnswer: {prompt}'),
     'GPT-2 XL': dict(model_path='gpt2-xl', original_prompt_template='{prompt}',
-                     interpretation_prompt_template='User: [X]\n\nAnswer: {prompt}'),
     'GPT-J 6B': dict(model_path='EleutherAI/gpt-j-6b', original_prompt_template='{prompt}',
-                     interpretation_prompt_template='User: [X]\n\nAnswer: {prompt}'),
     'Mistral-7B Instruct': dict(model_path='mistralai/Mistral-7B-Instruct-v0.2', device_map='cpu',
-                                               original_prompt_template='<s>{prompt}',
-                                               interpretation_prompt_template='<s>[INST] [X] [/INST] {prompt}',
-                                              ),
     # 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF': dict(model_file='mistral-7b-instruct-v0.2.Q5_K_S.gguf',
     #                                                tokenizer='mistralai/Mistral-7B-Instruct-v0.2',
     #                                                model_type='llama', hf=True, ctransformers=True,

 import os
+llama_layers_format = 'model.layers.{k}'
+gpt_layers_format = 'transformer.h.{k}'
 dataset_info = [
                 {'name': 'Common Sense', 'hf_repo': 'tau/commonsense_qa', 'text_col': 'question'},
 model_info = {
     'LLAMA2-7B': dict(model_path='meta-llama/Llama-2-7b-chat-hf', token=os.environ['hf_token'],
+                      original_prompt_template='<s>{prompt}',
+                      interpretation_prompt_template='<s>[INST] [X] [/INST] {prompt}',
+                      layers_format=llama_layers_format), # , load_in_8bit=True
     'GPT-2 Small': dict(model_path='gpt2', original_prompt_template='{prompt}',
+                     interpretation_prompt_template='User: [X]\n\nAnswer: {prompt}',
+                     layers_format=gpt_layers_format),
     'GPT-2 Medium': dict(model_path='gpt2-medium', original_prompt_template='{prompt}',
+                     interpretation_prompt_template='User: [X]\n\nAnswer: {prompt}',
+                     layers_format=gpt_layers_format),
     'GPT-2 Large': dict(model_path='gpt2-large', original_prompt_template='{prompt}',
+                     interpretation_prompt_template='User: [X]\n\nAnswer: {prompt}',
+                     layers_format=gpt_layers_format),
     'GPT-2 XL': dict(model_path='gpt2-xl', original_prompt_template='{prompt}',
+                     interpretation_prompt_template='User: [X]\n\nAnswer: {prompt}',
+                     layers_format=gpt_layers_format),
     'GPT-J 6B': dict(model_path='EleutherAI/gpt-j-6b', original_prompt_template='{prompt}',
+                     interpretation_prompt_template='User: [X]\n\nAnswer: {prompt}',
+                     layers_format=gpt_layers_format),
     'Mistral-7B Instruct': dict(model_path='mistralai/Mistral-7B-Instruct-v0.2', device_map='cpu',
+                                original_prompt_template='<s>{prompt}',
+                                interpretation_prompt_template='<s>[INST] [X] [/INST] {prompt}',
+                                layers_format=llama_layers_format),
+    # 'Gemma-2B': dict(model_path='google/gemma-2b', device_map='cpu', token=os.environ['hf_token'],
+    #                         original_prompt_template='<bos>{prompt}',
+    #                         interpretation_prompt_template='<bos>User: [X]\n\nAnswer: {prompt}',
+    #                        ),
     # 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF': dict(model_file='mistral-7b-instruct-v0.2.Q5_K_S.gguf',
     #                                                tokenizer='mistralai/Mistral-7B-Instruct-v0.2',
     #                                                model_type='llama', hf=True, ctransformers=True,