seanpedrickcase's picture
Updated Dockerfile so it actually works. Modified config file to include/exclude models and change relevant options as needed. Fixed thumbs up/down feedback.
96d818b
import torch
# Currently set gpu_layers to 0 even with cuda due to persistent bugs in implementation with cuda
if torch.cuda.is_available():
torch_device = "cuda"
gpu_layers = 100
else:
torch_device = "cpu"
gpu_layers = 0
print("Running on device:", torch_device)
threads = 8 #torch.get_num_threads()
print("CPU threads:", threads)
# Qwen 2 0.5B (small, fast) Model parameters
temperature: float = 0.1
top_k: int = 3
top_p: float = 1
repetition_penalty: float = 1.15
#flan_alpaca_repetition_penalty: float = 1.3
last_n_tokens: int = 64
max_new_tokens: int = 1024
seed: int = 42
reset: bool = False
stream: bool = True
threads: int = threads
batch_size:int = 128
context_length:int = 4096
sample = True
# Bedrock parameters
max_tokens = 4096
class CtransInitConfig_gpu:
def __init__(self,
last_n_tokens=last_n_tokens,
seed=seed,
n_threads=threads,
n_batch=batch_size,
n_ctx=max_tokens,
n_gpu_layers=gpu_layers):
self.last_n_tokens = last_n_tokens
self.seed = seed
self.n_threads = n_threads
self.n_batch = n_batch
self.n_ctx = n_ctx
self.n_gpu_layers = n_gpu_layers
# self.stop: list[str] = field(default_factory=lambda: [stop_string])
def update_gpu(self, new_value):
self.n_gpu_layers = new_value
class CtransInitConfig_cpu(CtransInitConfig_gpu):
def __init__(self):
super().__init__()
self.n_gpu_layers = 0
gpu_config = CtransInitConfig_gpu()
cpu_config = CtransInitConfig_cpu()
class CtransGenGenerationConfig:
def __init__(self, temperature=temperature,
top_k=top_k,
top_p=top_p,
repeat_penalty=repetition_penalty,
seed=seed,
stream=stream,
max_tokens=max_new_tokens
):
self.temperature = temperature
self.top_k = top_k
self.top_p = top_p
self.repeat_penalty = repeat_penalty
self.seed = seed
self.max_tokens=max_tokens
self.stream = stream
def update_temp(self, new_value):
self.temperature = new_value