from langchain.llms import CTransformers
import box
import yaml
from langchain.llms import LlamaCpp
config={'max_new_tokens': 2000,
        'temperature': 0.01,
        "context_length" : 4000}
# Import config vars
with open('config.yml', 'r', encoding='utf8') as ymlfile:
    cfg = box.Box(yaml.safe_load(ymlfile))


def setup_llm():
    # llm = CTransformers(model=cfg.MODEL_BIN_PATH,
    #                     model_type=cfg.MODEL_TYPE,
    #                     max_new_tokens=cfg.MAX_NEW_TOKENS,
    #                     temperature=cfg.TEMPERATURE
    # )
    llm = LlamaCpp(
    streaming = True,
    model_path=cfg.MODEL_BIN_PATH,#"mistral-7b-instruct-v0.1.Q4_K_M.gguf",
    temperature=0.75,
    top_p=1, 
    verbose=True,
    n_ctx=4096
    )
    

    return llm