from langchain.llms import CTransformers import box import yaml from langchain.llms import LlamaCpp config={'max_new_tokens': 2000, 'temperature': 0.01, "context_length" : 4000} # Import config vars with open('config.yml', 'r', encoding='utf8') as ymlfile: cfg = box.Box(yaml.safe_load(ymlfile)) def setup_llm(): # llm = CTransformers(model=cfg.MODEL_BIN_PATH, # model_type=cfg.MODEL_TYPE, # max_new_tokens=cfg.MAX_NEW_TOKENS, # temperature=cfg.TEMPERATURE # ) llm = LlamaCpp( streaming = True, model_path=cfg.MODEL_BIN_PATH,#"mistral-7b-instruct-v0.1.Q4_K_M.gguf", temperature=0.75, top_p=1, verbose=True, n_ctx=4096 ) return llm