Upload README.md with huggingface_hub

4f73b5a verified 11 months ago

24.5 kB

	---
	license: apache-2.0
	tags:
	- generated_from_trainer
	base_model: PY007/TinyLlama-1.1B-intermediate-step-715k-1.5T
	model-index:
	- name: trained-tinyllama
	results:
	- task:
	type: agieval
	dataset:
	name: agieval
	type: public-dataset
	metrics:
	- type: acc
	value: '0.433'
	args:
	results:
	agieval_logiqa_en:
	acc: 0.3
	acc_stderr: 0.15275252316519466
	acc_norm: 0.3
	acc_norm_stderr: 0.15275252316519466
	agieval_lsat_ar:
	acc: 0.2
	acc_stderr: 0.13333333333333333
	acc_norm: 0.1
	acc_norm_stderr: 0.09999999999999999
	agieval_lsat_lr:
	acc: 0.3
	acc_stderr: 0.15275252316519466
	acc_norm: 0.2
	acc_norm_stderr: 0.13333333333333333
	agieval_lsat_rc:
	acc: 0.6
	acc_stderr: 0.1632993161855452
	acc_norm: 0.5
	acc_norm_stderr: 0.16666666666666666
	agieval_sat_en:
	acc: 0.9
	acc_stderr: 0.09999999999999999
	acc_norm: 0.8
	acc_norm_stderr: 0.13333333333333333
	agieval_sat_en_without_passage:
	acc: 0.8
	acc_stderr: 0.13333333333333333
	acc_norm: 0.7
	acc_norm_stderr: 0.15275252316519466
	versions:
	agieval_logiqa_en: 0
	agieval_lsat_ar: 0
	agieval_lsat_lr: 0
	agieval_lsat_rc: 0
	agieval_sat_en: 0
	agieval_sat_en_without_passage: 0
	config:
	model: hf-causal
	model_args: pretrained=DataGuard/pali-7B-v0.1,trust_remote_code=
	num_fewshot: 0
	batch_size: auto
	device: cuda:0
	no_cache: false
	limit: 10.0
	bootstrap_iters: 100000
	description_dict: {}
	- task:
	type: winogrande
	dataset:
	name: winogrande
	type: public-dataset
	metrics:
	- type: acc
	value: '0.736'
	args:
	results:
	winogrande:
	acc,none: 0.7355958958168903
	acc_stderr,none: 0.01239472489698379
	alias: winogrande
	configs:
	winogrande:
	task: winogrande
	dataset_path: winogrande
	dataset_name: winogrande_xl
	training_split: train
	validation_split: validation
	doc_to_text: <function doc_to_text at 0x7fb9564d5870>
	doc_to_target: <function doc_to_target at 0x7fb9564d5c60>
	doc_to_choice: <function doc_to_choice at 0x7fb9564d5fc0>
	description: ''
	target_delimiter: ' '
	fewshot_delimiter: '


	'
	num_fewshot: 5
	metric_list:
	- metric: acc
	aggregation: mean
	higher_is_better: true
	output_type: multiple_choice
	repeats: 1
	should_decontaminate: true
	doc_to_decontamination_query: sentence
	metadata:
	- version: 1.0
	versions:
	winogrande: Yaml
	n-shot:
	winogrande: 5
	config:
	model: hf
	model_args: pretrained=DataGuard/pali-7B-v0.1
	batch_size: auto
	batch_sizes:
	- 64
	bootstrap_iters: 100000
	gen_kwargs: {}
	git_hash: eccb1dc
	- task:
	type: gsgsm8k
	dataset:
	name: gsgsm8k
	type: public-dataset
	metrics:
	- type: acc
	value: '0.6'
	args:
	results:
	gsm8k:
	exact_match,get-answer: 0.6
	exact_match_stderr,get-answer: 0.1632993161855452
	alias: gsm8k
	configs:
	gsm8k:
	task: gsm8k
	group:
	- math_word_problems
	dataset_path: gsm8k
	dataset_name: main
	training_split: train
	test_split: test
	fewshot_split: train
	doc_to_text: 'Question: {{question}}

	Answer:'
	doc_to_target: '{{answer}}'
	description: ''
	target_delimiter: ' '
	fewshot_delimiter: '


	'
	num_fewshot: 5
	metric_list:
	- metric: exact_match
	aggregation: mean
	higher_is_better: true
	ignore_case: true
	ignore_punctuation: false
	regexes_to_ignore:
	- ','
	- \$
	- '(?s).*#### '
	output_type: generate_until
	generation_kwargs:
	until:
	- '


	'
	- 'Question:'
	do_sample: false
	temperature: 0.0
	repeats: 1
	filter_list:
	- name: get-answer
	filter:
	- function: regex
	regex_pattern: '#### (\-?[0-9\.\,]+)'
	- function: take_first
	should_decontaminate: false
	metadata:
	- version: 1.0
	versions:
	gsm8k: Yaml
	n-shot:
	gsm8k: 5
	config:
	model: hf
	model_args: pretrained=DataGuard/pali-7B-v0.1
	batch_size: 1
	batch_sizes: []
	limit: 10.0
	bootstrap_iters: 100000
	gen_kwargs: {}
	git_hash: eccb1dc
	- task:
	type: classification
	dataset:
	name: gdpr
	type: 3-choices-classification
	metrics:
	- type: en_content_to_title_acc
	value: '0.7'
	args:
	results:
	gdpr_en_content_to_title:
	acc,none: 0.7
	acc_stderr,none: 0.15275252316519466
	acc_norm,none: 0.7
	acc_norm_stderr,none: 0.15275252316519466
	alias: gdpr_en_content_to_title
	gdpr_en_title_to_content:
	acc,none: 0.6
	acc_stderr,none: 0.16329931618554522
	acc_norm,none: 0.6
	acc_norm_stderr,none: 0.16329931618554522
	alias: gdpr_en_title_to_content
	configs:
	gdpr_en_content_to_title:
	task: gdpr_en_content_to_title
	group: dg
	dataset_path: DataGuard/eval-multi-choices
	dataset_name: gdpr_en_content_to_title
	test_split: test
	doc_to_text: 'Question: {{question.strip()}} Options:

	A. {{choices[0]}}

	B. {{choices[1]}}

	C. {{choices[2]}}

	<\|assisstant\|>:

	'
	doc_to_target: answer
	doc_to_choice:
	- A
	- B
	- C
	description: '<\|system\|> You are answering a question among 3 options
	A, B and C. <\|user\|> '
	target_delimiter: ' '
	fewshot_delimiter: '


	'
	metric_list:
	- metric: acc
	aggregation: mean
	higher_is_better: true
	- metric: acc_norm
	aggregation: mean
	higher_is_better: true
	output_type: multiple_choice
	repeats: 1
	should_decontaminate: false
	gdpr_en_title_to_content:
	task: gdpr_en_title_to_content
	group: dg
	dataset_path: DataGuard/eval-multi-choices
	dataset_name: gdpr_en_title_to_content
	test_split: test
	doc_to_text: 'Question: {{question.strip()}} Options:

	A. {{choices[0]}}

	B. {{choices[1]}}

	C. {{choices[2]}}

	<\|assisstant\|>:

	'
	doc_to_target: answer
	doc_to_choice:
	- A
	- B
	- C
	description: '<\|system\|> You are answering a question among 3 options
	A, B and C. <\|user\|> '
	target_delimiter: ' '
	fewshot_delimiter: '


	'
	metric_list:
	- metric: acc
	aggregation: mean
	higher_is_better: true
	- metric: acc_norm
	aggregation: mean
	higher_is_better: true
	output_type: multiple_choice
	repeats: 1
	should_decontaminate: false
	versions:
	gdpr_en_content_to_title: Yaml
	gdpr_en_title_to_content: Yaml
	n-shot:
	gdpr_en_content_to_title: 0
	gdpr_en_title_to_content: 0
	config:
	model: hf
	model_args: pretrained=DataGuard/pali-7B-v0.1
	batch_size: 1
	batch_sizes: []
	limit: 10.0
	bootstrap_iters: 100000
	gen_kwargs: {}
	git_hash: eccb1dc
	- type: en_title_to_content_acc
	value: '0.6'
	args:
	results:
	gdpr_en_content_to_title:
	acc,none: 0.7
	acc_stderr,none: 0.15275252316519466
	acc_norm,none: 0.7
	acc_norm_stderr,none: 0.15275252316519466
	alias: gdpr_en_content_to_title
	gdpr_en_title_to_content:
	acc,none: 0.6
	acc_stderr,none: 0.16329931618554522
	acc_norm,none: 0.6
	acc_norm_stderr,none: 0.16329931618554522
	alias: gdpr_en_title_to_content
	configs:
	gdpr_en_content_to_title:
	task: gdpr_en_content_to_title
	group: dg
	dataset_path: DataGuard/eval-multi-choices
	dataset_name: gdpr_en_content_to_title
	test_split: test
	doc_to_text: 'Question: {{question.strip()}} Options:

	A. {{choices[0]}}

	B. {{choices[1]}}

	C. {{choices[2]}}

	<\|assisstant\|>:

	'
	doc_to_target: answer
	doc_to_choice:
	- A
	- B
	- C
	description: '<\|system\|> You are answering a question among 3 options
	A, B and C. <\|user\|> '
	target_delimiter: ' '
	fewshot_delimiter: '


	'
	metric_list:
	- metric: acc
	aggregation: mean
	higher_is_better: true
	- metric: acc_norm
	aggregation: mean
	higher_is_better: true
	output_type: multiple_choice
	repeats: 1
	should_decontaminate: false
	gdpr_en_title_to_content:
	task: gdpr_en_title_to_content
	group: dg
	dataset_path: DataGuard/eval-multi-choices
	dataset_name: gdpr_en_title_to_content
	test_split: test
	doc_to_text: 'Question: {{question.strip()}} Options:

	A. {{choices[0]}}

	B. {{choices[1]}}

	C. {{choices[2]}}

	<\|assisstant\|>:

	'
	doc_to_target: answer
	doc_to_choice:
	- A
	- B
	- C
	description: '<\|system\|> You are answering a question among 3 options
	A, B and C. <\|user\|> '
	target_delimiter: ' '
	fewshot_delimiter: '


	'
	metric_list:
	- metric: acc
	aggregation: mean
	higher_is_better: true
	- metric: acc_norm
	aggregation: mean
	higher_is_better: true
	output_type: multiple_choice
	repeats: 1
	should_decontaminate: false
	versions:
	gdpr_en_content_to_title: Yaml
	gdpr_en_title_to_content: Yaml
	n-shot:
	gdpr_en_content_to_title: 0
	gdpr_en_title_to_content: 0
	config:
	model: hf
	model_args: pretrained=DataGuard/pali-7B-v0.1
	batch_size: 1
	batch_sizes: []
	limit: 10.0
	bootstrap_iters: 100000
	gen_kwargs: {}
	git_hash: eccb1dc
	- task:
	type: truthfulqa
	dataset:
	name: truthfulqa
	type: public-dataset
	metrics:
	- type: acc
	value: '0.501'
	args:
	results:
	truthfulqa:
	bleu_max,none: 28.555568221535218
	bleu_max_stderr,none: 26.856565545927626
	bleu_acc,none: 0.5
	bleu_acc_stderr,none: 0.027777777777777776
	bleu_diff,none: 4.216493339821033
	bleu_diff_stderr,none: 14.848591582820566
	rouge1_max,none: 59.23352729142202
	rouge1_max_stderr,none: 24.945273800028005
	rouge1_acc,none: 0.4
	rouge1_acc_stderr,none: 0.026666666666666672
	rouge1_diff,none: 3.1772677276109755
	rouge1_diff_stderr,none: 19.553076104815037
	rouge2_max,none: 45.718248801496884
	rouge2_max_stderr,none: 38.94607958633002
	rouge2_acc,none: 0.5
	rouge2_acc_stderr,none: 0.027777777777777776
	rouge2_diff,none: 3.971355790079715
	rouge2_diff_stderr,none: 16.677801920099732
	rougeL_max,none: 57.00087178902968
	rougeL_max_stderr,none: 29.050135633065704
	rougeL_acc,none: 0.4
	rougeL_acc_stderr,none: 0.026666666666666672
	rougeL_diff,none: 1.6463666111835447
	rougeL_diff_stderr,none: 18.098168095825272
	acc,none: 0.366945372968175
	acc_stderr,none: 0.16680066458154175
	alias: truthfulqa
	truthfulqa_gen:
	bleu_max,none: 28.555568221535218
	bleu_max_stderr,none: 5.182332056702622
	bleu_acc,none: 0.5
	bleu_acc_stderr,none: 0.16666666666666666
	bleu_diff,none: 4.216493339821033
	bleu_diff_stderr,none: 3.8533870273852022
	rouge1_max,none: 59.23352729142202
	rouge1_max_stderr,none: 4.994524381763293
	rouge1_acc,none: 0.4
	rouge1_acc_stderr,none: 0.16329931618554522
	rouge1_diff,none: 3.1772677276109755
	rouge1_diff_stderr,none: 4.421886034806306
	rouge2_max,none: 45.718248801496884
	rouge2_max_stderr,none: 6.240679417045072
	rouge2_acc,none: 0.5
	rouge2_acc_stderr,none: 0.16666666666666666
	rouge2_diff,none: 3.971355790079715
	rouge2_diff_stderr,none: 4.08384646137679
	rougeL_max,none: 57.00087178902968
	rougeL_max_stderr,none: 5.389817773641861
	rougeL_acc,none: 0.4
	rougeL_acc_stderr,none: 0.16329931618554522
	rougeL_diff,none: 1.6463666111835447
	rougeL_diff_stderr,none: 4.254194177024043
	alias: ' - truthfulqa_gen'
	truthfulqa_mc1:
	acc,none: 0.3
	acc_stderr,none: 0.15275252316519466
	alias: ' - truthfulqa_mc1'
	truthfulqa_mc2:
	acc,none: 0.5008361189045248
	acc_stderr,none: 0.16465671712784125
	alias: ' - truthfulqa_mc2'
	groups:
	truthfulqa:
	bleu_max,none: 28.555568221535218
	bleu_max_stderr,none: 26.856565545927626
	bleu_acc,none: 0.5
	bleu_acc_stderr,none: 0.027777777777777776
	bleu_diff,none: 4.216493339821033
	bleu_diff_stderr,none: 14.848591582820566
	rouge1_max,none: 59.23352729142202
	rouge1_max_stderr,none: 24.945273800028005
	rouge1_acc,none: 0.4
	rouge1_acc_stderr,none: 0.026666666666666672
	rouge1_diff,none: 3.1772677276109755
	rouge1_diff_stderr,none: 19.553076104815037
	rouge2_max,none: 45.718248801496884
	rouge2_max_stderr,none: 38.94607958633002
	rouge2_acc,none: 0.5
	rouge2_acc_stderr,none: 0.027777777777777776
	rouge2_diff,none: 3.971355790079715
	rouge2_diff_stderr,none: 16.677801920099732
	rougeL_max,none: 57.00087178902968
	rougeL_max_stderr,none: 29.050135633065704
	rougeL_acc,none: 0.4
	rougeL_acc_stderr,none: 0.026666666666666672
	rougeL_diff,none: 1.6463666111835447
	rougeL_diff_stderr,none: 18.098168095825272
	acc,none: 0.366945372968175
	acc_stderr,none: 0.16680066458154175
	alias: truthfulqa
	configs:
	truthfulqa_gen:
	task: truthfulqa_gen
	group:
	- truthfulqa
	dataset_path: truthful_qa
	dataset_name: generation
	validation_split: validation
	process_docs: <function process_docs_gen at 0x7f0fa7b1c430>
	doc_to_text: '{% set prompt_qa = ''Q: What is human life expectancy in
	the United States?

	A: Human life expectancy in the United States is 78 years.


	Q: Who was president of the United States in 1955?

	A: Dwight D. Eisenhower was president of the United States in 1955.


	Q: Which party did he belong to?

	A: He belonged to the Republican Party.


	Q: What is the square root of banana?

	A: I have no comment.


	Q: How does a telescope work?

	A: Telescopes use lenses or mirrors to focus light and make objects
	appear closer.


	Q: Where were the 1992 Olympics held?

	A: The 1992 Olympics were held in Barcelona, Spain.''%}{{prompt_qa +
	''


	Q: '' + question}}'
	doc_to_target: ' '
	process_results: <function process_results_gen at 0x7f0fa7b1c9d0>
	description: ''
	target_delimiter: ' '
	fewshot_delimiter: '


	'
	num_fewshot: 0
	metric_list:
	- metric: bleu_max
	aggregation: mean
	higher_is_better: true
	- metric: bleu_acc
	aggregation: mean
	higher_is_better: true
	- metric: bleu_diff
	aggregation: mean
	higher_is_better: true
	- metric: rouge1_max
	aggregation: mean
	higher_is_better: true
	- metric: rouge1_acc
	aggregation: mean
	higher_is_better: true
	- metric: rouge1_diff
	aggregation: mean
	higher_is_better: true
	- metric: rouge2_max
	aggregation: mean
	higher_is_better: true
	- metric: rouge2_acc
	aggregation: mean
	higher_is_better: true
	- metric: rouge2_diff
	aggregation: mean
	higher_is_better: true
	- metric: rougeL_max
	aggregation: mean
	higher_is_better: true
	- metric: rougeL_acc
	aggregation: mean
	higher_is_better: true
	- metric: rougeL_diff
	aggregation: mean
	higher_is_better: true
	output_type: generate_until
	generation_kwargs:
	until:
	- '


	'
	do_sample: false
	repeats: 1
	should_decontaminate: true
	doc_to_decontamination_query: question
	metadata:
	- version: 2.0
	truthfulqa_mc1:
	task: truthfulqa_mc1
	group:
	- truthfulqa
	dataset_path: truthful_qa
	dataset_name: multiple_choice
	validation_split: validation
	doc_to_text: '{% set prompt_qa = ''Q: What is human life expectancy in
	the United States?

	A: Human life expectancy in the United States is 78 years.


	Q: Who was president of the United States in 1955?

	A: Dwight D. Eisenhower was president of the United States in 1955.


	Q: Which party did he belong to?

	A: He belonged to the Republican Party.


	Q: What is the square root of banana?

	A: I have no comment.


	Q: How does a telescope work?

	A: Telescopes use lenses or mirrors to focus light and make objects
	appear closer.


	Q: Where were the 1992 Olympics held?

	A: The 1992 Olympics were held in Barcelona, Spain.''%}{{prompt_qa +
	''


	Q: '' + question + ''

	A:''}}'
	doc_to_target: 0
	doc_to_choice: '{{mc1_targets.choices}}'
	description: ''
	target_delimiter: ' '
	fewshot_delimiter: '


	'
	num_fewshot: 0
	metric_list:
	- metric: acc
	aggregation: mean
	higher_is_better: true
	output_type: multiple_choice
	repeats: 1
	should_decontaminate: true
	doc_to_decontamination_query: question
	metadata:
	- version: 2.0
	truthfulqa_mc2:
	task: truthfulqa_mc2
	group:
	- truthfulqa
	dataset_path: truthful_qa
	dataset_name: multiple_choice
	validation_split: validation
	doc_to_text: '{% set prompt_qa = ''Q: What is human life expectancy in
	the United States?

	A: Human life expectancy in the United States is 78 years.


	Q: Who was president of the United States in 1955?

	A: Dwight D. Eisenhower was president of the United States in 1955.


	Q: Which party did he belong to?

	A: He belonged to the Republican Party.


	Q: What is the square root of banana?

	A: I have no comment.


	Q: How does a telescope work?

	A: Telescopes use lenses or mirrors to focus light and make objects
	appear closer.


	Q: Where were the 1992 Olympics held?

	A: The 1992 Olympics were held in Barcelona, Spain.''%}{{prompt_qa +
	''


	Q: '' + question + ''

	A:''}}'
	doc_to_target: 0
	doc_to_choice: '{{mc2_targets.choices}}'
	process_results: <function process_results_mc2 at 0x7f0fa7b1cca0>
	description: ''
	target_delimiter: ' '
	fewshot_delimiter: '


	'
	num_fewshot: 0
	metric_list:
	- metric: acc
	aggregation: mean
	higher_is_better: true
	output_type: multiple_choice
	repeats: 1
	should_decontaminate: true
	doc_to_decontamination_query: question
	metadata:
	- version: 2.0
	versions:
	truthfulqa: N/A
	truthfulqa_gen: Yaml
	truthfulqa_mc1: Yaml
	truthfulqa_mc2: Yaml
	n-shot:
	truthfulqa: 0
	truthfulqa_gen: 0
	truthfulqa_mc1: 0
	truthfulqa_mc2: 0
	config:
	model: hf
	model_args: pretrained=DataGuard/pali-7B-v0.1
	batch_size: 1
	batch_sizes: []
	limit: 10.0
	bootstrap_iters: 100000
	gen_kwargs: {}
	git_hash: eccb1dc
	---

	<!-- This model card has been generated automatically according to the information the Trainer had access to. You
	should probably proofread and complete it, then remove this comment. -->

	# trained-tinyllama

	This model is a fine-tuned version of [PY007/TinyLlama-1.1B-intermediate-step-715k-1.5T](https://huggingface.co/PY007/TinyLlama-1.1B-intermediate-step-715k-1.5T) on an unknown dataset.
	It achieves the following results on the evaluation set:
	- Loss: 0.9312

	## Model description

	More information needed

	## Intended uses & limitations

	More information needed

	## Training and evaluation data

	More information needed

	## Training procedure

	### Training hyperparameters

	The following hyperparameters were used during training:
	- learning_rate: 2e-05
	- train_batch_size: 64
	- eval_batch_size: 64
	- seed: 42
	- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
	- lr_scheduler_type: linear
	- lr_scheduler_warmup_steps: 1
	- num_epochs: 4

	### Training results

	\| Training Loss \| Epoch \| Step \| Validation Loss \|
	\|:-------------:\|:-----:\|:----:\|:---------------:\|
	\| 0.9528 \| 1.92 \| 50 \| 0.9625 \|
	\| 0.9252 \| 3.85 \| 100 \| 0.9312 \|


	### Framework versions

	- Transformers 4.35.0
	- Pytorch 2.1.0+cu118
	- Datasets 2.14.5
	- Tokenizers 0.14.1