--- description: All the settings needed for creating an experiment are explored in this page. --- import GeneralSettingsDataset from '../../tooltips/experiments/_dataset.mdx'; import GeneralSettingsProblemType from '../../tooltips/experiments/_problem-type.mdx'; import GSImportConfigFromYaml from '../../tooltips/experiments/_import-config-from-yaml.mdx'; import GSExperimentName from '../../tooltips/experiments/_experiment-name.mdx'; import GSLLMBackbone from '../../tooltips/experiments/_llm-backbone.mdx'; import DSTrainDataframe from '../../tooltips/experiments/_train-dataframe.mdx'; import DSvalidationStrategy from '../../tooltips/experiments/_validation-strategy.mdx'; import DSvalidationSize from '../../tooltips/experiments/_validation-size.mdx'; import DSdataSample from '../../tooltips/experiments/_data-sample.mdx'; import DSpromptColumn from '../../tooltips/experiments/_prompt-column.mdx'; import DSPromptColumnSeparator from '../../tooltips/experiments/_prompt-column-separator.mdx'; import DSsystemColumn from '../../tooltips/experiments/_system-column.mdx'; import DSanswerColumn from '../../tooltips/experiments/_answer-column.mdx'; import DSparentIdColumn from '../../tooltips/experiments/_parent-id-column.mdx'; import DStextPromptStart from '../../tooltips/experiments/_text-prompt-start.mdx'; import DStextAnswerSeparator from '../../tooltips/experiments/_text-answer-separator.mdx'; import DSaddEosTokentoprompt from '../../tooltips/experiments/_add-eos-token-to-prompt.mdx'; import DSaddEosTokentoanswer from '../../tooltips/experiments/_add-eos-token-to-answer.mdx'; import DSmaskPromptlabels from '../../tooltips/experiments/_mask-prompt-labels.mdx'; import TSmaxLength from '../../tooltips/experiments/_max-length.mdx'; import TSaddpromptanswertokens from '../../tooltips/experiments/_add-prompt-answer-tokens.mdx'; import TSpaddingQuantile from '../../tooltips/experiments/_padding-quantile.mdx'; import ASBackboneDtype from '../../tooltips/experiments/_backbone-dtype.mdx'; import ASGradientcheckpointing from '../../tooltips/experiments/_gradient-checkpointing.mdx'; import ASintermediateDropout from '../../tooltips/experiments/_intermediate-dropout.mdx'; import ASpretrainedWeights from '../../tooltips/experiments/_pretrained-weights.mdx'; import TSoptimizer from '../../tooltips/experiments/_optimizer.mdx'; import TSlossfunction from '../../tooltips/experiments/_loss-function.mdx'; import TSlearningRate from '../../tooltips/experiments/_learning-rate.mdx'; import TSdifferentialLearningRateLayers from '../../tooltips/experiments/_differential-learning-rate-layers.mdx'; import TSfreezeLayers from '../../tooltips/experiments/_freeze-layers.mdx'; import TSattentionImplementation from '../../tooltips/experiments/_attention-implementation.mdx'; import TSbatchSize from '../../tooltips/experiments/_batch-size.mdx'; import TSepochs from '../../tooltips/experiments/_epochs.mdx'; import TSschedule from '../../tooltips/experiments/_schedule.mdx'; import TSminLearningRateRatio from '../../tooltips/experiments/_min-learning-rate-ratio.mdx'; import TSwarmupEpochs from '../../tooltips/experiments/_warmup-epochs.mdx'; import TSweightDecay from '../../tooltips/experiments/_weight-decay.mdx'; import TSGradientclip from '../../tooltips/experiments/_gradient-clip.mdx'; import TSgradAccumulation from '../../tooltips/experiments/_grad-accumulation.mdx'; import TSlora from '../../tooltips/experiments/_lora.mdx'; import TSuseDora from '../../tooltips/experiments/_use-dora.mdx'; import TSloraR from '../../tooltips/experiments/_lora-r.mdx'; import TSloraAlpha from '../../tooltips/experiments/_lora-alpha.mdx'; import TSloraDropout from '../../tooltips/experiments/_lora-dropout.mdx'; import TSuseRSlora from '../../tooltips/experiments/_use-rslora.mdx'; import TSloraTargetModules from '../../tooltips/experiments/_lora-target-modules.mdx'; import TSloraUnfreezeLayers from '../../tooltips/experiments/_lora-unfreeze-layers.mdx'; import TSsavecheckpoint from '../../tooltips/experiments/_save-checkpoint.mdx'; import TSevaluationepochs from '../../tooltips/experiments/_evaluation-epochs.mdx'; import TSevaluationbeforetraining from '../../tooltips/experiments/_evaluate-before-training.mdx'; import TStrainvalidationdata from '../../tooltips/experiments/_train-validation-data.mdx'; import AStokenmaskprobability from '../../tooltips/experiments/_token-mask-probability.mdx'; import ASskipParentprobability from '../../tooltips/experiments/_skip-parent-probability.mdx'; import ASrandomparentprobability from '../../tooltips/experiments/_random-parent-probability.mdx'; import ASneftunenoisealpha from '../../tooltips/experiments/_neftune_noise_alpha.mdx'; import PSmetric from '../../tooltips/experiments/_metric.mdx'; import PSmetricgptmodel from '../../tooltips/experiments/_metric-gpt-model.mdx'; import PSmetricgpttemplate from '../../tooltips/experiments/_metric-gpt-template.mdx'; import PSminlengthinference from '../../tooltips/experiments/_min-length-inference.mdx'; import PSmaxlengthinference from '../../tooltips/experiments/_max-length-inference.mdx'; import PSbatchsizeinference from '../../tooltips/experiments/_batch-size-inference.mdx'; import PSdosample from '../../tooltips/experiments/_do-sample.mdx'; import PSnumbeams from '../../tooltips/experiments/_num-beams.mdx'; import PStemperature from '../../tooltips/experiments/_temperature.mdx'; import PSrepetitionpenalty from '../../tooltips/experiments/_repetition-penalty.mdx'; import PSstoptokens from '../../tooltips/experiments/_stop-tokens.mdx'; import PStopk from '../../tooltips/experiments/_top-k.mdx'; import PStopp from '../../tooltips/experiments/_top-p.mdx'; import ESgpus from '../../tooltips/experiments/_gpus.mdx'; import ESmixedprecision from '../../tooltips/experiments/_mixed-precision.mdx'; import EScompilemodel from '../../tooltips/experiments/_compile-model.mdx'; import ESfindunusedparameters from '../../tooltips/experiments/_find-unused-parameters.mdx'; import EStrustremotecode from '../../tooltips/experiments/_trust-remote-code.mdx'; import EShuggingfacebranch from '../../tooltips/experiments/_huggingface-branch.mdx'; import ESnumofworkers from '../../tooltips/experiments/_number-of-workers.mdx'; import ESseed from '../../tooltips/experiments/_seed.mdx'; import LSlogstepsize from '../../tooltips/experiments/_log-step-size.mdx'; import LSlogallranks from '../../tooltips/experiments/_log-all-ranks.mdx'; import LSlogger from '../../tooltips/experiments/_logger.mdx'; import LSneptuneproject from '../../tooltips/experiments/_neptune-project.mdx'; import LSwandbproject from '../../tooltips/experiments/_wandb-project.mdx'; import LSwandbentity from '../../tooltips/experiments/_wandb-entity.mdx'; import NumClasses from '../../tooltips/experiments/_num-classes.mdx'; # Experiment settings The settings for creating an experiment are grouped into the following sections: - [General settings](#general-settings) - [Dataset settings](#dataset-settings) - [Tokenizer settings](#tokenizer-settings) - [Architecture settings](#architecture-settings) - [Training settings](#training-settings) - [Augmentation settings](#augmentation-settings) - [Prediction settings](#prediction-settings) - [Environment settings](#environment-settings) - [Logging settings](#logging-settings) The settings under each category are listed and described below. ## General settings ### Dataset ### Problem type ### Import config from YAML ### Experiment name ### LLM backbone ## Dataset settings ### Train dataframe ### Validation strategy ### Validation size ### Data sample ### System column ### Prompt column ### Prompt column separator ### Answer column ### Parent ID column ### ID column ### Text prompt start ### Text answer separator ### Add EOS token to prompt ### Add EOS token to answer ### Mask prompt labels ### Num classes The **Num classes** field should be set to the total number of classes in the [answer column](../datasets/import-dataset.md#answer-column) of the dataset. ## Tokenizer settings ### Max length ### Add prompt answer tokens ### Padding quantile ## Architecture settings ### Backbone Dtype ### Gradient Checkpointing ### Intermediate dropout ### Pretrained weights ## Training settings ### Loss function For multiclass classification problems, set the loss function to **Cross-entropy**. ### Optimizer ### Learning rate ### Differential learning rate layers By default, H2O LLM Studio applies **Differential learning rate Layers**, with the learning rate for the `classification_head` being 10 times smaller than the learning rate for the rest of the model. ### Freeze layers ### Attention Implementation ### Batch size ### Epochs ### Schedule ### Min Learning Rate Ratio ### Warmup epochs ### Weight decay ### Gradient clip ### Grad accumulation ### Lora ### Use Dora ### Lora R ### Lora Alpha ### Lora dropout ### Use RS Lora ### Lora target modules ### Lora unfreeze layers ### Save checkpoint ### Evaluation epochs ### Evaluate before training ### Train validation data ## Augmentation settings ### Token mask probability ### Skip parent probability ### Random parent probability ### Neftune noise alpha ## Prediction settings ### Metric ### Metric GPT model ### Metric GPT template ### Min length inference ### Max length inference ### Batch size inference ### Do sample ### Num beams ### Temperature ### Repetition penalty ### Stop tokens ### Top K ### Top P ## Environment settings ### GPUs ### Mixed precision ### Compile model ### Find unused parameters ### Trust remote code ### Hugging Face branch ### Number of workers ### Seed ## Logging settings ### Log step size ### Log all ranks ### Logger ### Neptune project ### W&B project ### W&B entity