Spaces:
Sleeping
Sleeping
Update webui.py
Browse files
webui.py
CHANGED
@@ -24,6 +24,7 @@ from huggingface_hub import hf_hub_download
|
|
24 |
|
25 |
dir = os.getcwd()
|
26 |
|
|
|
27 |
def load_model(path, n_ctx, n_gpu_layers, n_threads, verbose, f16_kv, logits_all, vocab_only, use_mmap, use_mlock, n_batch, last_n_tokens_size, low_vram, rope_freq_base, rope_freq_scale):
|
28 |
try:
|
29 |
global llm
|
@@ -57,15 +58,15 @@ def list_models(name):
|
|
57 |
def render_md(text):
|
58 |
return f"{text}"
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
|
70 |
history = []
|
71 |
|
@@ -76,7 +77,7 @@ If a question does not make any sense, or is not factually coherent, explain why
|
|
76 |
"""
|
77 |
'''
|
78 |
|
79 |
-
|
80 |
def generate_text(message, history, system_prompt, preset, temperature, max_tokens, top_p, top_k, repeat_penalty):
|
81 |
temp = ""
|
82 |
input_prompt = f"[INST] <<SYS>>\nYou are {preset}. {system_prompt}.\n<</SYS>>\n\n "
|
@@ -149,11 +150,11 @@ with gr.Blocks(theme="theme-repo/STONE_Theme", title="TensorLM", css="style.css"
|
|
149 |
|
150 |
|
151 |
with gr.Tab("π½"):
|
152 |
-
gr.Markdown("## Download model from π€ HuggingFace.co")
|
153 |
with gr.Row():
|
154 |
repo_id = gr.Textbox(label="REPO_ID", value="ehristoforu/LLMs", lines=1, max_lines=1, interactive=False)
|
155 |
-
filename = gr.Dropdown(label="FILENAME", interactive=
|
156 |
-
download_btn = gr.Button(value="Download")
|
157 |
logs=gr.Markdown()
|
158 |
with gr.Tab("π"):
|
159 |
with gr.Tab("Notebook"):
|
@@ -165,6 +166,7 @@ with gr.Blocks(theme="theme-repo/STONE_Theme", title="TensorLM", css="style.css"
|
|
165 |
markdown = gr.Markdown()
|
166 |
|
167 |
with gr.Tab("βοΈ"):
|
|
|
168 |
with gr.Row():
|
169 |
with gr.Column():
|
170 |
#with gr.Row():
|
@@ -172,7 +174,7 @@ with gr.Blocks(theme="theme-repo/STONE_Theme", title="TensorLM", css="style.css"
|
|
172 |
# chat_style = gr.Dropdown(label="Style of chat", choices=["bubble", "panel"], value="bubble", interactive=True, allow_custom_value=False)
|
173 |
with gr.Row():
|
174 |
gr.Markdown("### Engine")
|
175 |
-
reload_model = gr.Button("Apply settings to model", interactive=
|
176 |
n_ctx = gr.Slider(label="Number of CTX", minimum=1024, maximum=4056, value=2048, step=8, interactive=True)
|
177 |
n_gpu_layers = gr.Slider(label="Number of GPU layers", minimum=0, maximum=36, value=0, step=1, interactive=True)
|
178 |
n_threads = gr.Slider(label="Number of Threads", minimum=2, maximum=36, value=4, step=1, interactive=True)
|
@@ -220,8 +222,7 @@ with gr.Blocks(theme="theme-repo/STONE_Theme", title="TensorLM", css="style.css"
|
|
220 |
|
221 |
|
222 |
demo.launch(
|
223 |
-
inbrowser=
|
224 |
-
server_port=5555,
|
225 |
debug=False,
|
226 |
quiet=True,
|
227 |
favicon_path="assets/favicon.png",
|
|
|
24 |
|
25 |
dir = os.getcwd()
|
26 |
|
27 |
+
@spaces.GPU
|
28 |
def load_model(path, n_ctx, n_gpu_layers, n_threads, verbose, f16_kv, logits_all, vocab_only, use_mmap, use_mlock, n_batch, last_n_tokens_size, low_vram, rope_freq_base, rope_freq_scale):
|
29 |
try:
|
30 |
global llm
|
|
|
58 |
def render_md(text):
|
59 |
return f"{text}"
|
60 |
|
61 |
+
|
62 |
+
hf_hub_download(
|
63 |
+
repo_id="ehristoforu/LLMs",
|
64 |
+
filename="llama-2-7b-chat.ggmlv3.q2_K.bin",
|
65 |
+
local_dir="models",
|
66 |
+
force_download=True, resume_download=False,
|
67 |
+
cache_dir=".cache",
|
68 |
+
)
|
69 |
+
|
70 |
|
71 |
history = []
|
72 |
|
|
|
77 |
"""
|
78 |
'''
|
79 |
|
80 |
+
@spaces.GPU
|
81 |
def generate_text(message, history, system_prompt, preset, temperature, max_tokens, top_p, top_k, repeat_penalty):
|
82 |
temp = ""
|
83 |
input_prompt = f"[INST] <<SYS>>\nYou are {preset}. {system_prompt}.\n<</SYS>>\n\n "
|
|
|
150 |
|
151 |
|
152 |
with gr.Tab("π½"):
|
153 |
+
gr.Markdown("""## Download model from π€ HuggingFace.co (DON'T WORK ON HF.space)""")
|
154 |
with gr.Row():
|
155 |
repo_id = gr.Textbox(label="REPO_ID", value="ehristoforu/LLMs", lines=1, max_lines=1, interactive=False)
|
156 |
+
filename = gr.Dropdown(label="FILENAME", interactive=False, choices=["llama-2-7b-chat.ggmlv3.q2_K.bin", "llama-2-13b-chat.ggmlv3.q2_K.bin", "codellama-7b-instruct.ggmlv3.Q2_K.bin", "codellama-13b-instruct.ggmlv3.Q2_K.bin", "saiga-13b.ggmlv3.Q4_1.bin", "saiga-30b.ggmlv3.Q3_K.bin"], value="", allow_custom_value=False)
|
157 |
+
download_btn = gr.Button(value="Download", interactive=False)
|
158 |
logs=gr.Markdown()
|
159 |
with gr.Tab("π"):
|
160 |
with gr.Tab("Notebook"):
|
|
|
166 |
markdown = gr.Markdown()
|
167 |
|
168 |
with gr.Tab("βοΈ"):
|
169 |
+
gr.Markdown("## Settings is DON'T WORK ON HF.space.")
|
170 |
with gr.Row():
|
171 |
with gr.Column():
|
172 |
#with gr.Row():
|
|
|
174 |
# chat_style = gr.Dropdown(label="Style of chat", choices=["bubble", "panel"], value="bubble", interactive=True, allow_custom_value=False)
|
175 |
with gr.Row():
|
176 |
gr.Markdown("### Engine")
|
177 |
+
reload_model = gr.Button("Apply settings to model", interactive=False)
|
178 |
n_ctx = gr.Slider(label="Number of CTX", minimum=1024, maximum=4056, value=2048, step=8, interactive=True)
|
179 |
n_gpu_layers = gr.Slider(label="Number of GPU layers", minimum=0, maximum=36, value=0, step=1, interactive=True)
|
180 |
n_threads = gr.Slider(label="Number of Threads", minimum=2, maximum=36, value=4, step=1, interactive=True)
|
|
|
222 |
|
223 |
|
224 |
demo.launch(
|
225 |
+
inbrowser=False,
|
|
|
226 |
debug=False,
|
227 |
quiet=True,
|
228 |
favicon_path="assets/favicon.png",
|