kajdun commited on
Commit
0d80971
·
1 Parent(s): 0d585c4

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +3 -3
handler.py CHANGED
@@ -7,7 +7,7 @@ os.environ['FORCE_CMAKE'] = "1"
7
  import sys
8
  import subprocess
9
  subprocess.check_call([sys.executable, '-m', 'pip', 'install',
10
- 'llama-cpp-python==0.1.78'])
11
 
12
  from llama_cpp import Llama
13
 
@@ -22,13 +22,13 @@ cpu_count: int = int(_) if _ else 1
22
 
23
 
24
 
25
- MAX_INPUT_TOKEN_LENGTH = 4000
26
  MAX_MAX_NEW_TOKENS = 2048
27
  DEFAULT_MAX_NEW_TOKENS = 1024
28
 
29
  class EndpointHandler():
30
  def __init__(self, path=""):
31
- self.llm = Llama(model_path="/repository/iubaris-13b-v3_ggml_Q4_K_S.bin", n_ctx=4000, n_gpu_layers=50, n_threads=cpu_count, verbose=True)
32
 
33
  def get_input_token_length(self, message: str) -> int:
34
  input_ids = self.model([message.encode('utf-8')])
 
7
  import sys
8
  import subprocess
9
  subprocess.check_call([sys.executable, '-m', 'pip', 'install',
10
+ 'llama-cpp-python'])
11
 
12
  from llama_cpp import Llama
13
 
 
22
 
23
 
24
 
25
+ MAX_INPUT_TOKEN_LENGTH = 3072
26
  MAX_MAX_NEW_TOKENS = 2048
27
  DEFAULT_MAX_NEW_TOKENS = 1024
28
 
29
  class EndpointHandler():
30
  def __init__(self, path=""):
31
+ self.llm = Llama(model_path="/repository/iubaris-13b-v3_ggml_Q4_K_M.gguf", n_ctx=MAX_INPUT_TOKEN_LENGTH, n_gpu_layers=50, n_threads=cpu_count, verbose=True)
32
 
33
  def get_input_token_length(self, message: str) -> int:
34
  input_ids = self.model([message.encode('utf-8')])