mistral_two_record_id / description.json
sushruthsam's picture
Upload 11 files
7c170bc verified
{
"command": "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py -f /root/.local/share/jupyter/runtime/kernel-3aca3dc1-4d01-4666-980c-0422f6452525.json",
"compute": {
"arch_list": [
"sm_50",
"sm_60",
"sm_70",
"sm_75",
"sm_80",
"sm_86",
"sm_90"
],
"devices": {
"0": {
"device_capability": [
8,
0
],
"device_properties": "_CudaDeviceProperties(name='NVIDIA A100-SXM4-40GB', major=8, minor=0, total_memory=40513MB, multi_processor_count=108)",
"gpu_type": "NVIDIA A100-SXM4-40GB"
}
},
"gencode_flags": "-gencode compute=compute_50,code=sm_50 -gencode compute=compute_60,code=sm_60 -gencode compute=compute_70,code=sm_70 -gencode compute=compute_75,code=sm_75 -gencode compute=compute_80,code=sm_80 -gencode compute=compute_86,code=sm_86 -gencode compute=compute_90,code=sm_90",
"gpus_per_node": 1,
"num_nodes": 1
},
"config": {
"adapter": {
"alpha": 16,
"bias_type": "none",
"dropout": 0.05,
"postprocessor": {
"merge_adapter_into_base_model": false,
"progressbar": false
},
"pretrained_adapter_weights": null,
"r": 8,
"target_modules": null,
"type": "lora",
"use_dora": false,
"use_rslora": false
},
"backend": null,
"base_model": "mistralai/Mistral-7B-Instruct-v0.2",
"defaults": {
"text": {
"decoder": {
"fc_activation": "relu",
"fc_bias_initializer": "zeros",
"fc_dropout": 0.0,
"fc_layers": null,
"fc_norm": null,
"fc_norm_params": null,
"fc_output_size": 256,
"fc_use_bias": true,
"fc_weights_initializer": "xavier_uniform",
"input_size": null,
"max_new_tokens": null,
"num_fc_layers": 0,
"pretrained_model_name_or_path": "",
"tokenizer": "hf_tokenizer",
"type": "text_extractor",
"vocab_file": ""
},
"encoder": {
"skip": false,
"type": "passthrough"
},
"loss": {
"class_similarities": null,
"class_similarities_temperature": 0,
"class_weights": null,
"confidence_penalty": 0,
"robust_lambda": 0,
"type": "next_token_softmax_cross_entropy",
"unique": false,
"weight": 1.0
},
"preprocessing": {
"cache_encoder_embeddings": false,
"compute_idf": false,
"computed_fill_value": "<UNK>",
"fill_value": "<UNK>",
"lowercase": false,
"max_sequence_length": 256,
"missing_value_strategy": "fill_with_const",
"most_common": 20000,
"ngram_size": 2,
"padding": "right",
"padding_symbol": "<PAD>",
"pretrained_model_name_or_path": null,
"prompt": {
"retrieval": {
"index_name": null,
"k": 0,
"model_name": null,
"type": null
},
"task": null,
"template": null
},
"sequence_length": null,
"tokenizer": "space_punct",
"unknown_symbol": "<UNK>",
"vocab_file": null
}
}
},
"generation": {
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": null,
"diversity_penalty": 0.0,
"do_sample": true,
"early_stopping": false,
"encoder_repetition_penalty": 1.0,
"eos_token_id": null,
"epsilon_cutoff": 0.0,
"eta_cutoff": 0.0,
"exponential_decay_length_penalty": null,
"force_words_ids": null,
"forced_bos_token_id": null,
"forced_decoder_ids": null,
"forced_eos_token_id": null,
"guidance_scale": null,
"length_penalty": 1.0,
"max_length": 32,
"max_new_tokens": 512,
"max_time": null,
"min_length": 0,
"min_new_tokens": null,
"no_repeat_ngram_size": 0,
"num_beam_groups": 1,
"num_beams": 1,
"pad_token_id": null,
"penalty_alpha": null,
"prompt_lookup_num_tokens": null,
"remove_invalid_values": false,
"renormalize_logits": false,
"repetition_penalty": 1.0,
"sequence_bias": null,
"suppress_tokens": null,
"temperature": 0.1,
"top_k": 50,
"top_p": 1.0,
"typical_p": 1.0,
"use_cache": true
},
"hyperopt": null,
"input_features": [
{
"active": true,
"column": "question",
"encoder": {
"skip": false,
"type": "passthrough"
},
"name": "question",
"preprocessing": {
"cache_encoder_embeddings": false,
"compute_idf": false,
"computed_fill_value": "<UNK>",
"fill_value": "<UNK>",
"lowercase": false,
"max_sequence_length": null,
"missing_value_strategy": "fill_with_const",
"most_common": 20000,
"ngram_size": 2,
"padding": "left",
"padding_symbol": "<PAD>",
"pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
"sequence_length": null,
"tokenizer": "hf_tokenizer",
"unknown_symbol": "<UNK>",
"vocab_file": null
},
"proc_column": "question_Nlu_HO",
"tied": null,
"type": "text"
}
],
"ludwig_version": "0.10.2",
"model_parameters": null,
"model_type": "llm",
"output_features": [
{
"active": true,
"class_similarities": null,
"column": "record_id",
"decoder": {
"fc_activation": "relu",
"fc_bias_initializer": "zeros",
"fc_dropout": 0.0,
"fc_layers": null,
"fc_norm": null,
"fc_norm_params": null,
"fc_output_size": 256,
"fc_use_bias": true,
"fc_weights_initializer": "xavier_uniform",
"input_size": null,
"max_new_tokens": 512,
"num_fc_layers": 0,
"pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
"tokenizer": "hf_tokenizer",
"type": "text_extractor",
"vocab_file": ""
},
"default_validation_metric": "loss",
"dependencies": [],
"input_size": null,
"loss": {
"class_similarities": null,
"class_similarities_temperature": 0,
"class_weights": null,
"confidence_penalty": 0,
"robust_lambda": 0,
"type": "next_token_softmax_cross_entropy",
"unique": false,
"weight": 1.0
},
"name": "record_id",
"num_classes": null,
"preprocessing": {
"cache_encoder_embeddings": false,
"compute_idf": false,
"computed_fill_value": "<UNK>",
"fill_value": "<UNK>",
"lowercase": false,
"max_sequence_length": null,
"missing_value_strategy": "drop_row",
"most_common": 20000,
"ngram_size": 2,
"padding": "left",
"padding_symbol": "<PAD>",
"pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
"sequence_length": null,
"tokenizer": "hf_tokenizer",
"unknown_symbol": "<UNK>",
"vocab_file": null
},
"proc_column": "record_id_D_Znvc",
"reduce_dependencies": "sum",
"reduce_input": "sum",
"type": "text"
}
],
"preprocessing": {
"global_max_sequence_length": 512,
"oversample_minority": null,
"sample_ratio": 1.0,
"sample_size": null,
"split": {
"probabilities": [
1.0,
0.0,
0.0
],
"type": "random"
},
"undersample_majority": null
},
"prompt": {
"retrieval": {
"index_name": null,
"k": 0,
"model_name": null,
"type": null
},
"task": null,
"template": "Below is an instruction that describes a task, paired with an input that provides further context with language code and country code. Write a answer that appropriately answers the question with respect to the country code and language code.if the input provided consists of combination of two questions, then provide two record id or else only provide one record id only.\n### country_code: {country_code}\n### language_code: {language_code}\n### Instruction: {question}\n### answer:"
},
"quantization": {
"bits": 4,
"bnb_4bit_compute_dtype": "float16",
"bnb_4bit_quant_type": "nf4",
"bnb_4bit_use_double_quant": true,
"llm_int8_has_fp16_weight": false,
"llm_int8_threshold": 6.0
},
"trainer": {
"base_learning_rate": 0.0,
"batch_size": 1,
"bucketing_field": null,
"checkpoints_per_epoch": 0,
"compile": false,
"early_stop": 5,
"effective_batch_size": "auto",
"enable_gradient_checkpointing": false,
"enable_profiling": false,
"epochs": 25,
"eval_batch_size": 2,
"eval_steps": null,
"evaluate_training_set": false,
"gradient_accumulation_steps": 16,
"gradient_clipping": {
"clipglobalnorm": 0.5,
"clipnorm": null,
"clipvalue": null
},
"increase_batch_size_eval_metric": "loss",
"increase_batch_size_eval_split": "training",
"increase_batch_size_on_plateau": 0,
"increase_batch_size_on_plateau_patience": 5,
"increase_batch_size_on_plateau_rate": 2.0,
"learning_rate": 0.0004,
"learning_rate_scaling": "linear",
"learning_rate_scheduler": {
"decay": "cosine",
"decay_rate": 0.96,
"decay_steps": 10000,
"eta_min": 0,
"reduce_eval_metric": "loss",
"reduce_eval_split": "training",
"reduce_on_plateau": 0,
"reduce_on_plateau_patience": 10,
"reduce_on_plateau_rate": 0.1,
"staircase": false,
"t_0": null,
"t_mult": 1,
"warmup_evaluations": 0,
"warmup_fraction": 0.03
},
"max_batch_size": 1099511627776,
"optimizer": {
"amsgrad": false,
"betas": [
0.9,
0.999
],
"block_wise": true,
"eps": 1e-08,
"percentile_clipping": 100,
"type": "paged_adam",
"weight_decay": 0.0
},
"profiler": {
"active": 3,
"repeat": 5,
"skip_first": 0,
"wait": 1,
"warmup": 1
},
"regularization_lambda": 0.0,
"regularization_type": "l2",
"should_shuffle": true,
"skip_all_evaluation": false,
"steps_per_checkpoint": 0,
"train_steps": null,
"type": "finetune",
"use_mixed_precision": false,
"validation_field": "record_id",
"validation_metric": "loss"
}
},
"data_format": "<class 'pandas.core.frame.DataFrame'>",
"ludwig_version": "0.10.2",
"random_seed": 42,
"torch_version": "2.2.1+cu121"
}