aorogat
/

Questions_to_Query_Templates_LORA

Model card Files Files and versions Community

aorogat commited on Oct 1, 2024

Commit

afbe3a2

1 Parent(s): 8593e60

Re-add large JSON file using Git LFS after cleaning history

Browse files

Files changed (6) hide show

.gitattributes +1 -0
adapter_0.pt +3 -0
custom_config.yaml +88 -0
custom_generation_config_bigModel.yaml +37 -0
meta_model_0.pt +3 -0
readme.txt +12 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+meta_model_0.pt filter=lfs diff=lfs merge=lfs -text

adapter_0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6411aa1e628e188a16b4fc212b80c09fdfd18224f15c6f22b5f29a3462df3edc
+size 6857850

custom_config.yaml ADDED Viewed

	@@ -0,0 +1,88 @@

+# Config for single device LoRA finetuning in lora_finetune_single_device.py
+# using a Llama3 8B model
+#
+# This config assumes that you've run the following command before launching
+# this run:
+#   tune download meta-llama/Meta-Llama-3-8B --output-dir /tmp/Meta-Llama-3-8B --hf-token <HF_TOKEN>
+#
+# To launch on a single device, run the following command from root:
+#   tune run lora_finetune_single_device --config llama3/8B_lora_single_device
+#
+# You can add specific overrides through the command line. For example
+# to override the checkpointer directory while launching training
+# you can run:
+#   tune run lora_finetune_single_device --config llama3/8B_lora_single_device checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
+#
+# This config works only for training on single device.
+# Model Arguments
+model:
+  _component_: torchtune.models.llama3.lora_llama3_8b
+  lora_attn_modules: ['q_proj', 'v_proj']
+  apply_lora_to_mlp: False
+  apply_lora_to_output: False
+  lora_rank: 8
+  lora_alpha: 16
+# Tokenizer
+tokenizer:
+  _component_: torchtune.models.llama3.llama3_tokenizer
+  path: /home/aorogat/Meta-Llama-3-8B/original/tokenizer.model
+checkpointer:
+  _component_: torchtune.utils.FullModelMetaCheckpointer
+  checkpoint_dir: /home/aorogat/Meta-Llama-3-8B/original/
+  checkpoint_files: [
+    consolidated.00.pth
+  ]
+  recipe_checkpoint: null
+  output_dir: /home/aorogat/q_to_template/
+  model_type: LLAMA3
+resume_from_checkpoint: False
+# Dataset and Sampler
+dataset:
+  _component_: torchtune.datasets.instruct_dataset
+  split: train
+  source: /home/aorogat/q_to_template/data
+  template: AlpacaInstructTemplate
+  train_on_input: False
+seed: null
+shuffle: True
+batch_size: 1
+# Optimizer and Scheduler
+optimizer:
+  _component_: torch.optim.AdamW
+  weight_decay: 0.01
+  lr: 3e-4
+lr_scheduler:
+  _component_: torchtune.modules.get_cosine_schedule_with_warmup
+  num_warmup_steps: 100
+loss:
+  _component_: torch.nn.CrossEntropyLoss
+# Training
+epochs: 1
+max_steps_per_epoch: null
+gradient_accumulation_steps: 64
+compile: False
+# Logging
+output_dir: /home/aorogat/lora_finetune_output
+metric_logger:
+  _component_: torchtune.utils.metric_logging.DiskLogger
+  log_dir: ${output_dir}
+log_every_n_steps: null
+# Environment
+device: cuda
+dtype: bf16
+enable_activation_checkpointing: True
+# Profiler (disabled)
+profiler:
+  _component_: torchtune.utils.profiler
+  enabled: False

custom_generation_config_bigModel.yaml ADDED Viewed

	@@ -0,0 +1,37 @@

+# Config for running the InferenceRecipe in generate.py to generate output from an LLM
+#
+# To launch, run the following command from root torchtune directory:
+#    tune run generate --config generation
+# Model arguments
+model:
+  _component_: torchtune.models.llama3.llama3_8b
+checkpointer:
+  _component_: torchtune.utils.FullModelMetaCheckpointer
+  checkpoint_dir: /home/aorogat/q_to_template/
+  checkpoint_files: [
+    meta_model_0.pt
+  ]
+  output_dir: /home/aorogat/q_to_template/
+  model_type: LLAMA3
+device: cuda
+dtype: bf16
+seed: 1234
+# Tokenizer arguments
+tokenizer:
+  _component_: torchtune.models.llama3.llama3_tokenizer
+  path: /home/aorogat/Meta-Llama-3-8B/original/tokenizer.model
+# Generation arguments; defaults taken from gpt-fast
+#prompt: "### Instruction: \nYou are a powerful model trained to convert questions to tagged questions. Use the tags as follows: \n<qt> to surround question keywords like 'What', 'Who', 'Which', 'How many', 'Return' or any word that represents requests. \n<o> to surround entities as an object like person name, place name, etc. It must be a noun or a noun phrase. \n<s> to surround entities as a subject like person name, place name, etc. The difference between <s> and <o>, <s> only appear in yes/no questions as in the training data you saw before. \n<cc> to surround coordinating conjunctions that connect two or more phrases like 'and', 'or', 'nor', etc. \n<p> to surround predicates that may be an entity attribute or a relationship between two entities. It can be a verb phrase or a noun phrase. The question must contain at least one predicate. \n<off> for offset in questions asking for the second, third, etc. For example, the question 'What is the second largest country?', <off> will be located as follows. 'What is the <off>second</off> largest country?' \n<t> to surround entity types like person, place, etc. \n<op> to surround operators that compare quantities or values, like 'greater than', 'more than', etc. \n<ref> to indicate a reference within the question that requires a cycle to refer back to an entity (e.g., 'Who is the CEO of a company founded by himself?' where 'himself' would be tagged as <ref>himself</ref>). Then, convert the tagged question to a sparql query template with placeholdes []. \nInput: How many persons live in the capital of Canada? \nTagged Question: \n```html"
+prompt: "### Instruction: \nYou are a powerful model trained to convert questions to tagged questions. Use the tags as follows: \n<qt> to surround question keywords like 'What', 'Who', 'Which', 'How many', 'Return' or any word that represents requests. \n<o> to surround entities as an object like person name, place name, etc. It must be a noun or a noun phrase. \n<s> to surround entities as a subject like person name, place name, etc. The difference between <s> and <o>, <s> only appear in yes/no questions as in the training data you saw before. \n<cc> to surround coordinating conjunctions that connect two or more phrases like 'and', 'or', 'nor', etc. \n<p> to surround predicates that may be an entity attribute or a relationship between two entities. It can be a verb phrase or a noun phrase. The question must contain at least one predicate. \n<off> for offset in questions asking for the second, third, etc. For example, the question 'What is the second largest country?', <off> will be located as follows. 'What is the <off>second</off> largest country?' \n<t> to surround entity types like person, place, etc. \n<op> to surround operators that compare quantities or values, like 'greater than', 'more than', etc. \n<ref> to indicate a reference within the question that requires a cycle to refer back to an entity (e.g., 'Who is the CEO of a company founded by himself?' where 'himself' would be tagged as <ref>himself</ref>). Then, convert the tagged question to a sparql query template with placeholdes []. \nInput: Which film directed by Garry Marshall, starring both Julia Roberts and Richard Gere, has a runtime of over 100 minutes? \nTagged Question: \n```html"
+max_new_tokens: 250
+temperature: 0.6 # 0.8 and 0.6 are popular values to try
+top_k: 1
+quantizer: null

meta_model_0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:473f2f92e9b03755cb30743b5393b2795e5697a48e9b8622ca9dff419c662715
+size 16060616659

readme.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+To finetune the model
+- download the model
+- prepare the config file
+- run the finetune
+tune run lora_finetune_single_device --config /home/aorogat/q_to_template/custom_config.yaml
+tune run generate --config /home/aorogat/tq_to_template/custom_generation_config_bigModel.yaml
+----
+tmux new -s q_to_template_session
+source myenv/bin/activate
+Ctrl+b Then d
+tmux attach-session -t q_to_template_session