Spaces:
Sleeping
Sleeping
Tobias Bergmann
commited on
Commit
·
9329033
1
Parent(s):
ef2124d
gguf pipe
Browse files- app.py +26 -13
- requirements.txt +2 -1
app.py
CHANGED
@@ -1,31 +1,44 @@
|
|
1 |
-
import
|
|
|
2 |
import gradio as gr
|
3 |
from typing import Tuple, List
|
4 |
|
5 |
deepsparse.cpu.print_hardware_capability()
|
6 |
|
7 |
-
MODEL_ID = "hf:mgoin/Meta-Llama-3-8B-Instruct-pruned50-quant-ds"
|
8 |
-
|
9 |
DESCRIPTION = f"""
|
10 |
-
# Chat with
|
11 |
-
|
12 |
-
Model ID: {MODEL_ID[len("hf:"):]}
|
13 |
"""
|
14 |
|
15 |
MAX_MAX_NEW_TOKENS = 1024
|
16 |
DEFAULT_MAX_NEW_TOKENS = 200
|
17 |
|
18 |
-
#
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
task="text-generation",
|
22 |
-
|
23 |
-
|
24 |
-
prompt_sequence_length=8,
|
25 |
-
num_cores=8,
|
26 |
)
|
27 |
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
def clear_and_save_textbox(message: str) -> Tuple[str, str]:
|
30 |
return "", message
|
31 |
|
|
|
1 |
+
from llama_cpp import Llama
|
2 |
+
from huggingface_hub import hf_hub_download
|
3 |
import gradio as gr
|
4 |
from typing import Tuple, List
|
5 |
|
6 |
deepsparse.cpu.print_hardware_capability()
|
7 |
|
|
|
|
|
8 |
DESCRIPTION = f"""
|
9 |
+
# Chat with Arco 500M as GGUF on CPU
|
|
|
|
|
10 |
"""
|
11 |
|
12 |
MAX_MAX_NEW_TOKENS = 1024
|
13 |
DEFAULT_MAX_NEW_TOKENS = 200
|
14 |
|
15 |
+
# Download the GGUF file
|
16 |
+
model_path = hf_hub_download(
|
17 |
+
repo_id="ijohn07/arco-plus-Q8_0-GGUF",
|
18 |
+
filename="arco-plus-q8_0.gguf",
|
19 |
+
repo_type="model"
|
20 |
+
)
|
21 |
+
# Load the GGUF model
|
22 |
+
llm = Llama(model_path=model_path)
|
23 |
+
|
24 |
+
# Setup the pipeline
|
25 |
+
pipe = pipeline(
|
26 |
task="text-generation",
|
27 |
+
model=llm, # Passes the loaded Llama model as the model
|
28 |
+
max_new_tokens=MAX_MAX_NEW_TOKENS, # Sets the maximum number of tokens the model generates
|
|
|
|
|
29 |
)
|
30 |
|
31 |
|
32 |
+
# Setup the engine
|
33 |
+
#pipe = Pipeline.create(
|
34 |
+
# task="text-generation",
|
35 |
+
# model_path=MODEL_ID,
|
36 |
+
# sequence_length=MAX_MAX_NEW_TOKENS,
|
37 |
+
# prompt_sequence_length=8,
|
38 |
+
# num_cores=8,
|
39 |
+
#)
|
40 |
+
|
41 |
+
|
42 |
def clear_and_save_textbox(message: str) -> Tuple[str, str]:
|
43 |
return "", message
|
44 |
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
-
deepsparse-nightly==1.8.0.20240502
|
2 |
transformers
|
|
|
|
|
3 |
gradio
|
|
|
|
|
1 |
transformers
|
2 |
+
llama_cpp
|
3 |
+
huggingface_hub
|
4 |
gradio
|