Virt-io
/

SillyTavern-Presets

Model card Files Files and versions Community

SillyTavern-Presets / Scripts /kobold-server.sh

virt-10

LLAMA-3 v2.0

9a16162 10 months ago

history blame contribute delete

2.49 kB

	#!/usr/bin/bash


	# requires the gum package
	# GUM: https://github.com/charmbracelet/gum#installation
	# Uses conda & koboldcpp built from source
	# Assumes you are using nvidia

	# activates conda env named kobold
	# if you get a conda error try this: `conda install conda-forge::libstdcxx-ng`
	eval "$(conda shell.bash hook)"
	conda activate kobold

	# USER variables
	# CHANGE
	KOBOLD_DIR=~/Git/koboldcpp/
	MODEL_FOLDER_DIR=~/Downloads/GGUF
	API_PORT=8001
	API_HOST=192.168.1.20
	BLAS_THREADS=16
	THREADS=8

	gum style \
	--foreground 212 --border-foreground 212 --border double \
	--align center --width 100 --margin "1 2" --padding "2 4" --bold \
	'Beep...Boop... Script created by Virt-io'

	# refer to https://github.com/LostRuins/koboldcpp?tab=readme-ov-file#osx-and-linux-manual-compiling for dependencies

	# CD to koboldcpp dir
	cd $KOBOLD_DIR

	# edit make flags to fit your needs
	echo "Git pull & build?"
	KOBOLD_BUILD=$(gum choose --selected.bold --selected.underline "NO" "YES")

	if [ "$KOBOLD_BUILD" = "YES" ]; then
	gum spin --spinner monkey --title "Pulling latest changes..." -- git pull && make clean
	gum spin --show-output --spinner monkey --title "Re-building Koboldcpp..." -- make -j8 LLAMA_NO_LLAMAFILE=0 LLAMA_CUDA=1 LLAMA_CUBLAS=1 LLAMA_OPENBLAS=1 LLAMA_FAST=1 LLAMA_NO_CCACHE=1 LLAMA_CUDA_MMV_Y=2 LLAMA_CUDA_DMMV_X=64 LLAMA_CUDA_DMMV_F16=1 LLAMA_CUDA_F16=1 LLAMA_NATIVE=1 LLAMA_LTO=1 LLAMA_AVX=1 LLAMA_AVX2=1 LLAMA_FMA=1 LLAMA_F16C=1
	echo "Rebuild completed"
	elif [ "$KOBOLD_BUILD" = "NO" ]; then
	echo "Skipping rebuild."
	fi

	# lists models
	echo "Select Model"
	MODEL=$(gum choose --height=30 --selected.bold --selected.underline $(ls $MODEL_FOLDER_DIR))
	# uncomment if you use sharded models. It will take the first file in a dir and load it.
	#SHARDED_MODEL=$(ls -p $MODEL_FOLDER_DIR/$MODEL \| grep -v / \| head -1)
	#MODEL=$MODEL/$SHARDED_MODEL
	echo "$MODEL has been selected"

	echo "Layers to Offload"
	LAYERS=$(gum input --placeholder "99")
	echo "$LAYERS layers have been offloaded"

	echo "Context Size"
	CONTEXT=$(gum choose --height=10 --selected.bold --selected.underline "4096" "8192" "12288" "16384" "32768")
	echo "Using a context size of $CONTEXT"

	# combined user flags
	USER_FLAGS="--host $API_HOST --port $API_PORT --flashattention --blasbatchsize 2048 --threads $THREADS --blasthreads $BLAS_THREADS --usecublas normal --contextsize $CONTEXT --gpulayers $LAYERS --model $MODEL_FOLDER_DIR/$MODEL"

	# runs koboldcpp
	python koboldcpp.py --skiplauncher $USER_FLAGS