TheBobBob commited on
Commit
ee51c96
·
verified ·
1 Parent(s): 8f54dba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -8
app.py CHANGED
@@ -146,8 +146,15 @@ def create_vector_db(final_items):
146
  documents = []
147
  import torch
148
  from llama_cpp import Llama
149
- CONTEXT_SIZE = 1024
150
- llm = Llama(model="HuggingFaceH4/zephyr-7b-beta", n_ctx = CONTEXT_SIZE)
 
 
 
 
 
 
 
151
 
152
  for item in final_items:
153
  prompt = f"""
@@ -159,15 +166,16 @@ def create_vector_db(final_items):
159
 
160
  Here is the antimony segment to summarize: {item}
161
  """
162
-
163
- model_output = llm(
164
  prompt,
165
- max_tokens = None,
166
- temperature = 0.3,
167
- top_p = 0.1
 
168
  )
169
 
170
- final_result = model_output["choices"][0]["text"].strip()
171
  documents.append(final_result)
172
 
173
  if documents:
 
146
  documents = []
147
  import torch
148
  from llama_cpp import Llama
149
+
150
+ llm = Llama(
151
+ model_path - hf_hub_download(
152
+ repo_id = os.environ.get("REPO_ID", "TheBloke/Llama-2-7b-Chat-GGUF"),
153
+ filename = os.environ.get("MODEL_FILE", "llama-2-chat.Q5_0_gguf"),
154
+ ),
155
+ n_ctx = 2048,
156
+ n_gpu_layers = 10,
157
+ )
158
 
159
  for item in final_items:
160
  prompt = f"""
 
166
 
167
  Here is the antimony segment to summarize: {item}
168
  """
169
+
170
+ output = llm(
171
  prompt,
172
+ temperature = 0.1,
173
+ top_p = 0.9,
174
+ top_k = 20,
175
+ stream=True,
176
  )
177
 
178
+ final_result = output["choices"][0]["text"]
179
  documents.append(final_result)
180
 
181
  if documents: