######################## #install nginx #sudo apt update #sudo apt install nginx #sudo vi /etc/nginx/sites-available/default #edit #sudo systemctl start nginx ########################### #lauch local server cd /data/2024/1018chatbotarena/llama.cpp/download export CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server --model cyberagent/calm3-22b-chat \ --max-model-len 4096 --port 8011 \ --gpu-memory-utilization 0.4 --trust-remote-code \ --quantization bitsandbytes --load-format bitsandbytes \ --api-key $VLLM_API_KEY #vllm tanuki8 export CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server --model weblab-GENIAC/Tanuki-8B-dpo-v1.0 --max-model-len 4096 --port 8012 --gpu-memory-utilization 0.2 --trust-remote-code --quantization bitsandbytes --load-format bitsandbytes --api-key $VLLM_API_KEY #llama.cpp swallow 8b export CUDA_VISIBLE_DEVICES=0 ../llama-server -m tokyotech-llm-Llama-3.1-Swallow-8B-Instruct-v0.1-Q8_0.gguf --n_gpu_layers 100 --port 8010 #llmjp13b export CUDA_VISIBLE_DEVICES=0 ../llama-server -m llm-jp-3-13b-instruct-Q8_0.gguf --n_gpu_layers 100 --port 8016 #swallow70 export CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --model tokyotech-llm/Llama-3.1-Swallow-70B-Instruct-v0.1 --max-model-len 4096 --port 8019 --gpu-memory-utilization 0.6 --trust-remote-code --quantization bitsandbytes --load-format bitsandbytes --api-key $VLLM_API_KEY ######################### #launch ngrok ngrok http http://localhost:8765