Namitg02 commited on
Commit
3376a6f
·
verified ·
1 Parent(s): bfcdf28

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -31
app.py CHANGED
@@ -8,13 +8,7 @@ import json
8
  import pandas as pd
9
  from llama_cpp import Llama
10
  from langchain_community.llms import LlamaCpp
11
-
12
- from transformers import AutoTokenizer, GenerationConfig #, AutoModelForCausalLM
13
- #from transformers import AutoModelForCausalLM, AutoModel
14
- from transformers import TextIteratorStreamer
15
  from threading import Thread
16
- from ctransformers import AutoModelForCausalLM, AutoConfig, Config #, AutoTokenizer
17
-
18
  from huggingface_hub import Repository, upload_file
19
  import os
20
 
@@ -50,27 +44,6 @@ If you don't know the answer, just say "I do not know." Don't make up an answer.
50
  llm_model = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
51
  # TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF and tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf not working, TinyLlama/TinyLlama-1.1B-Chat-v0.6, andrijdavid/TinyLlama-1.1B-Chat-v1.0-GGUF"
52
 
53
- #tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
54
- #initiate model and tokenizer
55
-
56
- #generation_config = AutoConfig.from_pretrained(
57
- # "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
58
- # max_new_tokens= 300,
59
- # do_sample=True,
60
- # stream = streamer,
61
- # top_p=0.95,
62
- # temperature=0.4,
63
- # stream = True
64
- # eos_token_id=terminators
65
- #)
66
- # send additional parameters to model for generation
67
- #terminators = [
68
- # tokenizer.eos_token_id, # End-of-Sequence Token that indicates where the model should consider the text sequence to be complete
69
- # tokenizer.convert_tokens_to_ids("<|eot_id|>") # Converts a token strings in a single/ sequence of integer id using the vocabulary
70
- #]
71
- # indicates the end of a sequence
72
-
73
- #model = llama_cpp.Llama(model_path = tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf AutoModelForCausalLM.from_pretrained(llm_model, model_file = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", model_type="llama", gpu_layers=0, config = generation_config)
74
  model = Llama(
75
  model_path="./tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
76
  # chat_format="llama-2",
@@ -82,6 +55,7 @@ model = Llama(
82
  # callback_manager=callback_manager,
83
  # verbose=True, # Verbose is required to pass to the callback manager
84
  )
 
85
 
86
  def search(query: str, k: int = 2 ):
87
  """a function that embeds a new query and returns the most probable results"""
@@ -136,15 +110,11 @@ def talk(prompt, history):
136
  # formatted_prompt_with_history = formatted_prompt_with_history[:600] # to avoid memory issue
137
  # print(formatted_prompt_with_history)
138
  messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}]
139
- # messages = "role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}]
140
- print(messages)
141
  # binding the system context and new prompt for LLM
142
  # the chat template structure should be based on text generation model format
143
- print("check6")
144
 
145
  # indicates the end of a sequence
146
  stream = model.create_chat_completion(messages = messages, max_tokens=1000, stop=["</s>"], stream=False)
147
- # stream = model(prompt = messages, max_tokens=1000, stop=["</s>"],echo=True, stream=False)
148
  print(f"{stream}")
149
  print("check 7")
150
  print(stream['choices'][0]['message']['content'])
 
8
  import pandas as pd
9
  from llama_cpp import Llama
10
  from langchain_community.llms import LlamaCpp
 
 
 
 
11
  from threading import Thread
 
 
12
  from huggingface_hub import Repository, upload_file
13
  import os
14
 
 
44
  llm_model = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
45
  # TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF and tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf not working, TinyLlama/TinyLlama-1.1B-Chat-v0.6, andrijdavid/TinyLlama-1.1B-Chat-v1.0-GGUF"
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  model = Llama(
48
  model_path="./tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
49
  # chat_format="llama-2",
 
55
  # callback_manager=callback_manager,
56
  # verbose=True, # Verbose is required to pass to the callback manager
57
  )
58
+ #initiate model and tokenizer
59
 
60
  def search(query: str, k: int = 2 ):
61
  """a function that embeds a new query and returns the most probable results"""
 
110
  # formatted_prompt_with_history = formatted_prompt_with_history[:600] # to avoid memory issue
111
  # print(formatted_prompt_with_history)
112
  messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}]
 
 
113
  # binding the system context and new prompt for LLM
114
  # the chat template structure should be based on text generation model format
 
115
 
116
  # indicates the end of a sequence
117
  stream = model.create_chat_completion(messages = messages, max_tokens=1000, stop=["</s>"], stream=False)
 
118
  print(f"{stream}")
119
  print("check 7")
120
  print(stream['choices'][0]['message']['content'])