aorogat commited on
Commit
afbe3a2
·
1 Parent(s): 8593e60

Re-add large JSON file using Git LFS after cleaning history

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ meta_model_0.pt filter=lfs diff=lfs merge=lfs -text
adapter_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6411aa1e628e188a16b4fc212b80c09fdfd18224f15c6f22b5f29a3462df3edc
3
+ size 6857850
custom_config.yaml ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Config for single device LoRA finetuning in lora_finetune_single_device.py
2
+ # using a Llama3 8B model
3
+ #
4
+ # This config assumes that you've run the following command before launching
5
+ # this run:
6
+ # tune download meta-llama/Meta-Llama-3-8B --output-dir /tmp/Meta-Llama-3-8B --hf-token <HF_TOKEN>
7
+ #
8
+ # To launch on a single device, run the following command from root:
9
+ # tune run lora_finetune_single_device --config llama3/8B_lora_single_device
10
+ #
11
+ # You can add specific overrides through the command line. For example
12
+ # to override the checkpointer directory while launching training
13
+ # you can run:
14
+ # tune run lora_finetune_single_device --config llama3/8B_lora_single_device checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
15
+ #
16
+ # This config works only for training on single device.
17
+
18
+
19
+ # Model Arguments
20
+ model:
21
+ _component_: torchtune.models.llama3.lora_llama3_8b
22
+ lora_attn_modules: ['q_proj', 'v_proj']
23
+ apply_lora_to_mlp: False
24
+ apply_lora_to_output: False
25
+ lora_rank: 8
26
+ lora_alpha: 16
27
+
28
+ # Tokenizer
29
+ tokenizer:
30
+ _component_: torchtune.models.llama3.llama3_tokenizer
31
+ path: /home/aorogat/Meta-Llama-3-8B/original/tokenizer.model
32
+
33
+ checkpointer:
34
+ _component_: torchtune.utils.FullModelMetaCheckpointer
35
+ checkpoint_dir: /home/aorogat/Meta-Llama-3-8B/original/
36
+ checkpoint_files: [
37
+ consolidated.00.pth
38
+ ]
39
+ recipe_checkpoint: null
40
+ output_dir: /home/aorogat/q_to_template/
41
+ model_type: LLAMA3
42
+ resume_from_checkpoint: False
43
+
44
+ # Dataset and Sampler
45
+ dataset:
46
+ _component_: torchtune.datasets.instruct_dataset
47
+ split: train
48
+ source: /home/aorogat/q_to_template/data
49
+ template: AlpacaInstructTemplate
50
+ train_on_input: False
51
+ seed: null
52
+ shuffle: True
53
+ batch_size: 1
54
+
55
+ # Optimizer and Scheduler
56
+ optimizer:
57
+ _component_: torch.optim.AdamW
58
+ weight_decay: 0.01
59
+ lr: 3e-4
60
+ lr_scheduler:
61
+ _component_: torchtune.modules.get_cosine_schedule_with_warmup
62
+ num_warmup_steps: 100
63
+
64
+ loss:
65
+ _component_: torch.nn.CrossEntropyLoss
66
+
67
+ # Training
68
+ epochs: 1
69
+ max_steps_per_epoch: null
70
+ gradient_accumulation_steps: 64
71
+ compile: False
72
+
73
+ # Logging
74
+ output_dir: /home/aorogat/lora_finetune_output
75
+ metric_logger:
76
+ _component_: torchtune.utils.metric_logging.DiskLogger
77
+ log_dir: ${output_dir}
78
+ log_every_n_steps: null
79
+
80
+ # Environment
81
+ device: cuda
82
+ dtype: bf16
83
+ enable_activation_checkpointing: True
84
+
85
+ # Profiler (disabled)
86
+ profiler:
87
+ _component_: torchtune.utils.profiler
88
+ enabled: False
custom_generation_config_bigModel.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Config for running the InferenceRecipe in generate.py to generate output from an LLM
2
+ #
3
+ # To launch, run the following command from root torchtune directory:
4
+ # tune run generate --config generation
5
+
6
+ # Model arguments
7
+ model:
8
+ _component_: torchtune.models.llama3.llama3_8b
9
+
10
+ checkpointer:
11
+ _component_: torchtune.utils.FullModelMetaCheckpointer
12
+
13
+ checkpoint_dir: /home/aorogat/q_to_template/
14
+ checkpoint_files: [
15
+ meta_model_0.pt
16
+ ]
17
+ output_dir: /home/aorogat/q_to_template/
18
+ model_type: LLAMA3
19
+
20
+ device: cuda
21
+ dtype: bf16
22
+
23
+ seed: 1234
24
+
25
+ # Tokenizer arguments
26
+ tokenizer:
27
+ _component_: torchtune.models.llama3.llama3_tokenizer
28
+ path: /home/aorogat/Meta-Llama-3-8B/original/tokenizer.model
29
+
30
+ # Generation arguments; defaults taken from gpt-fast
31
+ #prompt: "### Instruction: \nYou are a powerful model trained to convert questions to tagged questions. Use the tags as follows: \n<qt> to surround question keywords like 'What', 'Who', 'Which', 'How many', 'Return' or any word that represents requests. \n<o> to surround entities as an object like person name, place name, etc. It must be a noun or a noun phrase. \n<s> to surround entities as a subject like person name, place name, etc. The difference between <s> and <o>, <s> only appear in yes/no questions as in the training data you saw before. \n<cc> to surround coordinating conjunctions that connect two or more phrases like 'and', 'or', 'nor', etc. \n<p> to surround predicates that may be an entity attribute or a relationship between two entities. It can be a verb phrase or a noun phrase. The question must contain at least one predicate. \n<off> for offset in questions asking for the second, third, etc. For example, the question 'What is the second largest country?', <off> will be located as follows. 'What is the <off>second</off> largest country?' \n<t> to surround entity types like person, place, etc. \n<op> to surround operators that compare quantities or values, like 'greater than', 'more than', etc. \n<ref> to indicate a reference within the question that requires a cycle to refer back to an entity (e.g., 'Who is the CEO of a company founded by himself?' where 'himself' would be tagged as <ref>himself</ref>). Then, convert the tagged question to a sparql query template with placeholdes []. \nInput: How many persons live in the capital of Canada? \nTagged Question: \n```html"
32
+ prompt: "### Instruction: \nYou are a powerful model trained to convert questions to tagged questions. Use the tags as follows: \n<qt> to surround question keywords like 'What', 'Who', 'Which', 'How many', 'Return' or any word that represents requests. \n<o> to surround entities as an object like person name, place name, etc. It must be a noun or a noun phrase. \n<s> to surround entities as a subject like person name, place name, etc. The difference between <s> and <o>, <s> only appear in yes/no questions as in the training data you saw before. \n<cc> to surround coordinating conjunctions that connect two or more phrases like 'and', 'or', 'nor', etc. \n<p> to surround predicates that may be an entity attribute or a relationship between two entities. It can be a verb phrase or a noun phrase. The question must contain at least one predicate. \n<off> for offset in questions asking for the second, third, etc. For example, the question 'What is the second largest country?', <off> will be located as follows. 'What is the <off>second</off> largest country?' \n<t> to surround entity types like person, place, etc. \n<op> to surround operators that compare quantities or values, like 'greater than', 'more than', etc. \n<ref> to indicate a reference within the question that requires a cycle to refer back to an entity (e.g., 'Who is the CEO of a company founded by himself?' where 'himself' would be tagged as <ref>himself</ref>). Then, convert the tagged question to a sparql query template with placeholdes []. \nInput: Which film directed by Garry Marshall, starring both Julia Roberts and Richard Gere, has a runtime of over 100 minutes? \nTagged Question: \n```html"
33
+ max_new_tokens: 250
34
+ temperature: 0.6 # 0.8 and 0.6 are popular values to try
35
+ top_k: 1
36
+
37
+ quantizer: null
meta_model_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:473f2f92e9b03755cb30743b5393b2795e5697a48e9b8622ca9dff419c662715
3
+ size 16060616659
readme.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ To finetune the model
2
+ - download the model
3
+ - prepare the config file
4
+ - run the finetune
5
+ tune run lora_finetune_single_device --config /home/aorogat/q_to_template/custom_config.yaml
6
+ tune run generate --config /home/aorogat/tq_to_template/custom_generation_config_bigModel.yaml
7
+
8
+ ----
9
+ tmux new -s q_to_template_session
10
+ source myenv/bin/activate
11
+ Ctrl+b Then d
12
+ tmux attach-session -t q_to_template_session