ehristoforu commited on
Commit
0e57db6
Β·
verified Β·
1 Parent(s): 1b76772

Update webui.py

Browse files
Files changed (1) hide show
  1. webui.py +17 -16
webui.py CHANGED
@@ -24,6 +24,7 @@ from huggingface_hub import hf_hub_download
24
 
25
  dir = os.getcwd()
26
 
 
27
  def load_model(path, n_ctx, n_gpu_layers, n_threads, verbose, f16_kv, logits_all, vocab_only, use_mmap, use_mlock, n_batch, last_n_tokens_size, low_vram, rope_freq_base, rope_freq_scale):
28
  try:
29
  global llm
@@ -57,15 +58,15 @@ def list_models(name):
57
  def render_md(text):
58
  return f"{text}"
59
 
60
- def download_model(repo_id, filename):
61
- hf_hub_download(
62
- repo_id=repo_id,
63
- filename=filename,
64
- local_dir="models",
65
- force_download=True, resume_download=False,
66
- cache_dir=".cache",
67
- )
68
- return f"Downloaded!"
69
 
70
  history = []
71
 
@@ -76,7 +77,7 @@ If a question does not make any sense, or is not factually coherent, explain why
76
  """
77
  '''
78
 
79
-
80
  def generate_text(message, history, system_prompt, preset, temperature, max_tokens, top_p, top_k, repeat_penalty):
81
  temp = ""
82
  input_prompt = f"[INST] <<SYS>>\nYou are {preset}. {system_prompt}.\n<</SYS>>\n\n "
@@ -149,11 +150,11 @@ with gr.Blocks(theme="theme-repo/STONE_Theme", title="TensorLM", css="style.css"
149
 
150
 
151
  with gr.Tab("πŸ’½"):
152
- gr.Markdown("## Download model from πŸ€— HuggingFace.co")
153
  with gr.Row():
154
  repo_id = gr.Textbox(label="REPO_ID", value="ehristoforu/LLMs", lines=1, max_lines=1, interactive=False)
155
- filename = gr.Dropdown(label="FILENAME", interactive=True, choices=["llama-2-7b-chat.ggmlv3.q2_K.bin", "llama-2-13b-chat.ggmlv3.q2_K.bin", "codellama-7b-instruct.ggmlv3.Q2_K.bin", "codellama-13b-instruct.ggmlv3.Q2_K.bin", "saiga-13b.ggmlv3.Q4_1.bin", "saiga-30b.ggmlv3.Q3_K.bin"], value="", allow_custom_value=False)
156
- download_btn = gr.Button(value="Download")
157
  logs=gr.Markdown()
158
  with gr.Tab("πŸ“’"):
159
  with gr.Tab("Notebook"):
@@ -165,6 +166,7 @@ with gr.Blocks(theme="theme-repo/STONE_Theme", title="TensorLM", css="style.css"
165
  markdown = gr.Markdown()
166
 
167
  with gr.Tab("βš™οΈ"):
 
168
  with gr.Row():
169
  with gr.Column():
170
  #with gr.Row():
@@ -172,7 +174,7 @@ with gr.Blocks(theme="theme-repo/STONE_Theme", title="TensorLM", css="style.css"
172
  # chat_style = gr.Dropdown(label="Style of chat", choices=["bubble", "panel"], value="bubble", interactive=True, allow_custom_value=False)
173
  with gr.Row():
174
  gr.Markdown("### Engine")
175
- reload_model = gr.Button("Apply settings to model", interactive=True)
176
  n_ctx = gr.Slider(label="Number of CTX", minimum=1024, maximum=4056, value=2048, step=8, interactive=True)
177
  n_gpu_layers = gr.Slider(label="Number of GPU layers", minimum=0, maximum=36, value=0, step=1, interactive=True)
178
  n_threads = gr.Slider(label="Number of Threads", minimum=2, maximum=36, value=4, step=1, interactive=True)
@@ -220,8 +222,7 @@ with gr.Blocks(theme="theme-repo/STONE_Theme", title="TensorLM", css="style.css"
220
 
221
 
222
  demo.launch(
223
- inbrowser=True,
224
- server_port=5555,
225
  debug=False,
226
  quiet=True,
227
  favicon_path="assets/favicon.png",
 
24
 
25
  dir = os.getcwd()
26
 
27
+ @spaces.GPU
28
  def load_model(path, n_ctx, n_gpu_layers, n_threads, verbose, f16_kv, logits_all, vocab_only, use_mmap, use_mlock, n_batch, last_n_tokens_size, low_vram, rope_freq_base, rope_freq_scale):
29
  try:
30
  global llm
 
58
  def render_md(text):
59
  return f"{text}"
60
 
61
+
62
+ hf_hub_download(
63
+ repo_id="ehristoforu/LLMs",
64
+ filename="llama-2-7b-chat.ggmlv3.q2_K.bin",
65
+ local_dir="models",
66
+ force_download=True, resume_download=False,
67
+ cache_dir=".cache",
68
+ )
69
+
70
 
71
  history = []
72
 
 
77
  """
78
  '''
79
 
80
+ @spaces.GPU
81
  def generate_text(message, history, system_prompt, preset, temperature, max_tokens, top_p, top_k, repeat_penalty):
82
  temp = ""
83
  input_prompt = f"[INST] <<SYS>>\nYou are {preset}. {system_prompt}.\n<</SYS>>\n\n "
 
150
 
151
 
152
  with gr.Tab("πŸ’½"):
153
+ gr.Markdown("""## Download model from πŸ€— HuggingFace.co (DON'T WORK ON HF.space)""")
154
  with gr.Row():
155
  repo_id = gr.Textbox(label="REPO_ID", value="ehristoforu/LLMs", lines=1, max_lines=1, interactive=False)
156
+ filename = gr.Dropdown(label="FILENAME", interactive=False, choices=["llama-2-7b-chat.ggmlv3.q2_K.bin", "llama-2-13b-chat.ggmlv3.q2_K.bin", "codellama-7b-instruct.ggmlv3.Q2_K.bin", "codellama-13b-instruct.ggmlv3.Q2_K.bin", "saiga-13b.ggmlv3.Q4_1.bin", "saiga-30b.ggmlv3.Q3_K.bin"], value="", allow_custom_value=False)
157
+ download_btn = gr.Button(value="Download", interactive=False)
158
  logs=gr.Markdown()
159
  with gr.Tab("πŸ“’"):
160
  with gr.Tab("Notebook"):
 
166
  markdown = gr.Markdown()
167
 
168
  with gr.Tab("βš™οΈ"):
169
+ gr.Markdown("## Settings is DON'T WORK ON HF.space.")
170
  with gr.Row():
171
  with gr.Column():
172
  #with gr.Row():
 
174
  # chat_style = gr.Dropdown(label="Style of chat", choices=["bubble", "panel"], value="bubble", interactive=True, allow_custom_value=False)
175
  with gr.Row():
176
  gr.Markdown("### Engine")
177
+ reload_model = gr.Button("Apply settings to model", interactive=False)
178
  n_ctx = gr.Slider(label="Number of CTX", minimum=1024, maximum=4056, value=2048, step=8, interactive=True)
179
  n_gpu_layers = gr.Slider(label="Number of GPU layers", minimum=0, maximum=36, value=0, step=1, interactive=True)
180
  n_threads = gr.Slider(label="Number of Threads", minimum=2, maximum=36, value=4, step=1, interactive=True)
 
222
 
223
 
224
  demo.launch(
225
+ inbrowser=False,
 
226
  debug=False,
227
  quiet=True,
228
  favicon_path="assets/favicon.png",