Namitg02 commited on
Commit
76d7181
·
verified ·
1 Parent(s): 1042c4f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -54,20 +54,20 @@ llm_model = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
54
  tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
55
  #initiate model and tokenizer
56
 
57
- generation_config = AutoConfig.from_pretrained(
58
- "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
59
- max_new_tokens= 300,
60
  # do_sample=True,
61
  # stream = streamer,
62
- top_p=0.95,
63
- temperature=0.4,
64
- stream = True
65
  # eos_token_id=terminators
66
  )
67
  # send additional parameters to model for generation
68
 
69
  #model = llama_cpp.Llama(model_path = tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf AutoModelForCausalLM.from_pretrained(llm_model, model_file = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", model_type="llama", gpu_layers=0, config = generation_config)
70
- model = Llama(
71
  model_path="./tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
72
  chat_format="llama-2",
73
  n_gpu_layers = 0,
 
54
  tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
55
  #initiate model and tokenizer
56
 
57
+ #generation_config = AutoConfig.from_pretrained(
58
+ # "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
59
+ # max_new_tokens= 300,
60
  # do_sample=True,
61
  # stream = streamer,
62
+ # top_p=0.95,
63
+ # temperature=0.4,
64
+ # stream = True
65
  # eos_token_id=terminators
66
  )
67
  # send additional parameters to model for generation
68
 
69
  #model = llama_cpp.Llama(model_path = tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf AutoModelForCausalLM.from_pretrained(llm_model, model_file = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", model_type="llama", gpu_layers=0, config = generation_config)
70
+ model = Llama.from_pretrained(
71
  model_path="./tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
72
  chat_format="llama-2",
73
  n_gpu_layers = 0,