Mila_Global_Moth_Classifier / job_split_dataset.sh
adityajain07's picture
Upload folder using huggingface_hub
d6c6696 verified
raw
history blame
821 Bytes
#!/bin/bash
#SBATCH --job-name=split_dataset
#SBATCH --ntasks=1
#SBATCH --time=2:00:00
#SBATCH --partition=long-cpu # Ask for long-cpu job
#SBATCH --cpus-per-task=2 # Ask for 2 CPUs
#SBATCH --mem=6G # Ask for 6 GB of RAM
#SBATCH --output=split_dataset_%j.out
# 1. Load the required modules
module load miniconda/3
# 2. Load your environment
conda activate ami-ml
# 3. Load the environment variables outside of python script
set -o allexport
source .env
set +o allexport
# Keep track of time
SECONDS=0
# 4. Launch your script
ami-dataset split-dataset \
--dataset-csv $FINAL_CLEAN_DATASET \
--split-prefix $SPLIT_PREFIX \
--max-instances 1000 \
--min-instances 4
# Print time taken to execute the script
echo "Time taken to split the dataset: $SECONDS seconds"