jijihuny
/

llama3-qlora-completion-only

Model card Files Files and versions Community

llama3-qlora-completion-only / config_yaml

jijihuny's picture

Training in progress, step 413

afd196e verified 5 months ago

history blame contribute delete

2.27 kB

	dataset:
	include_answer: true
	name: train
	path: jijihuny/economics_qa
	shuffle: true
	test_size: null
	generation:
	do_sample: false
	dola_layers: null
	length_penalty: null
	max_new_tokens: 50
	num_beams: null
	penalty_alpha: null
	repetition_penalty: null
	return_full_text: false
	top_k: 1
	metric:
	only_inference: false
	path: jijihuny/ecqa
	model:
	attn_implementation: sdpa
	device_map: auto
	path: MLP-KTLim/llama-3-Korean-Bllossom-8B
	system_prompt: "\uB108\uB294 \uC8FC\uC5B4\uC9C4 Context\uC5D0\uC11C Question\uC5D0\
	\ \uB300\uD55C Answer\uB97C \uCC3E\uB294 \uCC57\uBD07\uC774\uC57C. Context\uC5D0\
	\uC11C Answer\uAC00 \uB420 \uC218 \uC788\uB294 \uBD80\uBD84\uC744 \uCC3E\uC544\
	\uC11C \uADF8\uB300\uB85C \uC801\uC5B4\uC918. \uB2E8, Answer\uB294 \uC8FC\uAD00\
	\uC2DD\uC774 \uC544\uB2C8\uB77C \uB2E8\uB2F5\uD615\uC73C\uB85C \uC801\uC5B4\uC57C\
	\ \uD574."
	task: text-generation
	torch_dtype: auto
	seed: 42
	train:
	args:
	bf16: true
	bf16_full_eval: true
	dataloader_num_workers: 12
	eval_accumulation_steps: 1
	eval_on_start: false
	eval_steps: 0.1
	eval_strategy: steps
	gradient_accumulation_steps: 1
	learning_rate: 0.0001
	logging_steps: 1
	lr_scheduler_kwargs:
	num_cycles: 5
	lr_scheduler_type: cosine
	max_grad_norm: 1.2
	max_seq_length: 2048
	num_train_epochs: 1
	optim: paged_adamw_8bit
	output_dir: llama3-qlora-completion-only
	per_device_eval_batch_size: 32
	per_device_train_batch_size: 16
	push_to_hub: true
	report_to: wandb
	run_name: llama3-qlora-completion-only
	save_steps: 0.2
	torch_compile: true
	torch_empty_cache_steps: 5
	warmup_ratio: 0.005
	weight_decay: 0.01
	instruction_template: <\|start_header_id\|>user<\|end_header_id\|>
	lora:
	bias: none
	lora_alpha: 32
	lora_dropout: 0.05
	r: 16
	target_modules:
	- up_proj
	- down_proj
	- gate_proj
	- k_proj
	- q_proj
	- v_proj
	- o_proj
	- lm_head
	task_type: CAUSAL_LM
	quantization:
	bnb_4bit_compute_dtype: bfloat16
	bnb_4bit_quant_type: nf4
	bnb_4bit_use_double_quant: true
	load_in_4bit: true
	response_template: <\|start_header_id\|>assistant<\|end_header_id\|>
	use_completion_only_data_collator: true