{ "command": "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py -f /root/.local/share/jupyter/runtime/kernel-7aca3806-fa5e-4ad1-ad15-080bec30928b.json", "compute": { "arch_list": [ "sm_50", "sm_60", "sm_70", "sm_75", "sm_80", "sm_86", "sm_90" ], "devices": { "0": { "device_capability": [ 8, 0 ], "device_properties": "_CudaDeviceProperties(name='NVIDIA A100-SXM4-40GB', major=8, minor=0, total_memory=40513MB, multi_processor_count=108)", "gpu_type": "NVIDIA A100-SXM4-40GB" } }, "gencode_flags": "-gencode compute=compute_50,code=sm_50 -gencode compute=compute_60,code=sm_60 -gencode compute=compute_70,code=sm_70 -gencode compute=compute_75,code=sm_75 -gencode compute=compute_80,code=sm_80 -gencode compute=compute_86,code=sm_86 -gencode compute=compute_90,code=sm_90", "gpus_per_node": 1, "num_nodes": 1 }, "config": { "adapter": { "alpha": 16, "bias_type": "none", "dropout": 0.05, "postprocessor": { "merge_adapter_into_base_model": false, "progressbar": false }, "pretrained_adapter_weights": null, "r": 8, "target_modules": null, "type": "lora" }, "backend": null, "base_model": "mistralai/Mistral-7B-Instruct-v0.2", "defaults": { "text": { "decoder": { "fc_activation": "relu", "fc_bias_initializer": "zeros", "fc_dropout": 0.0, "fc_layers": null, "fc_norm": null, "fc_norm_params": null, "fc_output_size": 256, "fc_use_bias": true, "fc_weights_initializer": "xavier_uniform", "input_size": null, "max_new_tokens": null, "num_fc_layers": 0, "pretrained_model_name_or_path": "", "tokenizer": "hf_tokenizer", "type": "text_extractor", "vocab_file": "" }, "encoder": { "skip": false, "type": "passthrough" }, "loss": { "class_similarities": null, "class_similarities_temperature": 0, "class_weights": null, "confidence_penalty": 0, "robust_lambda": 0, "type": "next_token_softmax_cross_entropy", "unique": false, "weight": 1.0 }, "preprocessing": { "cache_encoder_embeddings": false, "compute_idf": false, "computed_fill_value": "", "fill_value": "", "lowercase": false, "max_sequence_length": 256, "missing_value_strategy": "fill_with_const", "most_common": 20000, "ngram_size": 2, "padding": "right", "padding_symbol": "", "pretrained_model_name_or_path": null, "prompt": { "retrieval": { "index_name": null, "k": 0, "model_name": null, "type": null }, "task": null, "template": null }, "sequence_length": null, "tokenizer": "space_punct", "unknown_symbol": "", "vocab_file": null } } }, "generation": { "bad_words_ids": null, "begin_suppress_tokens": null, "bos_token_id": null, "diversity_penalty": 0.0, "do_sample": true, "early_stopping": false, "encoder_repetition_penalty": 1.0, "eos_token_id": null, "epsilon_cutoff": 0.0, "eta_cutoff": 0.0, "exponential_decay_length_penalty": null, "force_words_ids": null, "forced_bos_token_id": null, "forced_decoder_ids": null, "forced_eos_token_id": null, "guidance_scale": null, "length_penalty": 1.0, "max_length": 32, "max_new_tokens": 512, "max_time": null, "min_length": 0, "min_new_tokens": null, "no_repeat_ngram_size": 0, "num_beam_groups": 1, "num_beams": 1, "pad_token_id": null, "penalty_alpha": null, "prompt_lookup_num_tokens": null, "remove_invalid_values": false, "renormalize_logits": false, "repetition_penalty": 1.0, "sequence_bias": null, "suppress_tokens": null, "temperature": 0.1, "top_k": 50, "top_p": 1.0, "typical_p": 1.0, "use_cache": true }, "hyperopt": null, "input_features": [ { "active": true, "column": "question", "encoder": { "skip": false, "type": "passthrough" }, "name": "question", "preprocessing": { "cache_encoder_embeddings": false, "compute_idf": false, "computed_fill_value": "", "fill_value": "", "lowercase": false, "max_sequence_length": null, "missing_value_strategy": "fill_with_const", "most_common": 20000, "ngram_size": 2, "padding": "left", "padding_symbol": "", "pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2", "sequence_length": null, "tokenizer": "hf_tokenizer", "unknown_symbol": "", "vocab_file": null }, "proc_column": "question_Nlu_HO", "tied": null, "type": "text" } ], "ludwig_version": "0.10.1", "model_parameters": null, "model_type": "llm", "output_features": [ { "active": true, "class_similarities": null, "column": "answer", "decoder": { "fc_activation": "relu", "fc_bias_initializer": "zeros", "fc_dropout": 0.0, "fc_layers": null, "fc_norm": null, "fc_norm_params": null, "fc_output_size": 256, "fc_use_bias": true, "fc_weights_initializer": "xavier_uniform", "input_size": null, "max_new_tokens": 512, "num_fc_layers": 0, "pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2", "tokenizer": "hf_tokenizer", "type": "text_extractor", "vocab_file": "" }, "default_validation_metric": "loss", "dependencies": [], "input_size": null, "loss": { "class_similarities": null, "class_similarities_temperature": 0, "class_weights": null, "confidence_penalty": 0, "robust_lambda": 0, "type": "next_token_softmax_cross_entropy", "unique": false, "weight": 1.0 }, "name": "answer", "num_classes": null, "preprocessing": { "cache_encoder_embeddings": false, "compute_idf": false, "computed_fill_value": "", "fill_value": "", "lowercase": false, "max_sequence_length": null, "missing_value_strategy": "drop_row", "most_common": 20000, "ngram_size": 2, "padding": "left", "padding_symbol": "", "pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2", "sequence_length": null, "tokenizer": "hf_tokenizer", "unknown_symbol": "", "vocab_file": null }, "proc_column": "answer_D_Znvc", "reduce_dependencies": "sum", "reduce_input": "sum", "type": "text" } ], "preprocessing": { "global_max_sequence_length": 512, "oversample_minority": null, "sample_ratio": 1.0, "sample_size": null, "split": { "probabilities": [ 1.0, 0.0, 0.0 ], "type": "random" }, "undersample_majority": null }, "prompt": { "retrieval": { "index_name": null, "k": 0, "model_name": null, "type": null }, "task": null, "template": "Below is an instruction that describes a task, paired with an input that provides further context with language code and country code. Write a answer that appropriately answers the question with respect to the country code and language code.\n### country_code: {country_code}\n### language_code: {language_code}\n### Instruction: {question}\n### answer:" }, "quantization": { "bits": 4, "bnb_4bit_compute_dtype": "float16", "bnb_4bit_quant_type": "nf4", "bnb_4bit_use_double_quant": true, "llm_int8_has_fp16_weight": false, "llm_int8_threshold": 6.0 }, "trainer": { "base_learning_rate": 0.0, "batch_size": 1, "bucketing_field": null, "checkpoints_per_epoch": 0, "compile": false, "early_stop": 5, "effective_batch_size": "auto", "enable_gradient_checkpointing": false, "enable_profiling": false, "epochs": 50, "eval_batch_size": 2, "eval_steps": null, "evaluate_training_set": false, "gradient_accumulation_steps": 16, "gradient_clipping": { "clipglobalnorm": 0.5, "clipnorm": null, "clipvalue": null }, "increase_batch_size_eval_metric": "loss", "increase_batch_size_eval_split": "training", "increase_batch_size_on_plateau": 0, "increase_batch_size_on_plateau_patience": 5, "increase_batch_size_on_plateau_rate": 2.0, "learning_rate": 0.0004, "learning_rate_scaling": "linear", "learning_rate_scheduler": { "decay": "cosine", "decay_rate": 0.96, "decay_steps": 10000, "eta_min": 0, "reduce_eval_metric": "loss", "reduce_eval_split": "training", "reduce_on_plateau": 0, "reduce_on_plateau_patience": 10, "reduce_on_plateau_rate": 0.1, "staircase": false, "t_0": null, "t_mult": 1, "warmup_evaluations": 0, "warmup_fraction": 0.03 }, "max_batch_size": 1099511627776, "optimizer": { "amsgrad": false, "betas": [ 0.9, 0.999 ], "block_wise": true, "eps": 1e-08, "percentile_clipping": 100, "type": "paged_adam", "weight_decay": 0.0 }, "profiler": { "active": 3, "repeat": 5, "skip_first": 0, "wait": 1, "warmup": 1 }, "regularization_lambda": 0.0, "regularization_type": "l2", "should_shuffle": true, "skip_all_evaluation": false, "steps_per_checkpoint": 0, "train_steps": null, "type": "finetune", "use_mixed_precision": false, "validation_field": "answer", "validation_metric": "loss" } }, "data_format": "", "ludwig_version": "0.10.1", "random_seed": 42, "torch_version": "2.2.1+cu121" }