Tobias Bergmann commited on
Commit
9329033
·
1 Parent(s): ef2124d
Files changed (2) hide show
  1. app.py +26 -13
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,31 +1,44 @@
1
- import deepsparse
 
2
  import gradio as gr
3
  from typing import Tuple, List
4
 
5
  deepsparse.cpu.print_hardware_capability()
6
 
7
- MODEL_ID = "hf:mgoin/Meta-Llama-3-8B-Instruct-pruned50-quant-ds"
8
-
9
  DESCRIPTION = f"""
10
- # Chat with an Efficient Llama-3-8B-Instruct Model on CPU with DeepSparse
11
-
12
- Model ID: {MODEL_ID[len("hf:"):]}
13
  """
14
 
15
  MAX_MAX_NEW_TOKENS = 1024
16
  DEFAULT_MAX_NEW_TOKENS = 200
17
 
18
- # Setup the engine
19
- from deepsparse.legacy import Pipeline
20
- pipe = Pipeline.create(
 
 
 
 
 
 
 
 
21
  task="text-generation",
22
- model_path=MODEL_ID,
23
- sequence_length=MAX_MAX_NEW_TOKENS,
24
- prompt_sequence_length=8,
25
- num_cores=8,
26
  )
27
 
28
 
 
 
 
 
 
 
 
 
 
 
29
  def clear_and_save_textbox(message: str) -> Tuple[str, str]:
30
  return "", message
31
 
 
1
+ from llama_cpp import Llama
2
+ from huggingface_hub import hf_hub_download
3
  import gradio as gr
4
  from typing import Tuple, List
5
 
6
  deepsparse.cpu.print_hardware_capability()
7
 
 
 
8
  DESCRIPTION = f"""
9
+ # Chat with Arco 500M as GGUF on CPU
 
 
10
  """
11
 
12
  MAX_MAX_NEW_TOKENS = 1024
13
  DEFAULT_MAX_NEW_TOKENS = 200
14
 
15
+ # Download the GGUF file
16
+ model_path = hf_hub_download(
17
+ repo_id="ijohn07/arco-plus-Q8_0-GGUF",
18
+ filename="arco-plus-q8_0.gguf",
19
+ repo_type="model"
20
+ )
21
+ # Load the GGUF model
22
+ llm = Llama(model_path=model_path)
23
+
24
+ # Setup the pipeline
25
+ pipe = pipeline(
26
  task="text-generation",
27
+ model=llm, # Passes the loaded Llama model as the model
28
+ max_new_tokens=MAX_MAX_NEW_TOKENS, # Sets the maximum number of tokens the model generates
 
 
29
  )
30
 
31
 
32
+ # Setup the engine
33
+ #pipe = Pipeline.create(
34
+ # task="text-generation",
35
+ # model_path=MODEL_ID,
36
+ # sequence_length=MAX_MAX_NEW_TOKENS,
37
+ # prompt_sequence_length=8,
38
+ # num_cores=8,
39
+ #)
40
+
41
+
42
  def clear_and_save_textbox(message: str) -> Tuple[str, str]:
43
  return "", message
44
 
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
- deepsparse-nightly==1.8.0.20240502
2
  transformers
 
 
3
  gradio
 
 
1
  transformers
2
+ llama_cpp
3
+ huggingface_hub
4
  gradio