arco_gguf_inference

Sleeping

Tobias Bergmann commited on Dec 14, 2024

Commit

9329033

1 Parent(s): ef2124d

gguf pipe

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,31 +1,44 @@
-import deepsparse
 import gradio as gr
 from typing import Tuple, List
 deepsparse.cpu.print_hardware_capability()
-MODEL_ID = "hf:mgoin/Meta-Llama-3-8B-Instruct-pruned50-quant-ds"
 DESCRIPTION = f"""
-# Chat with an Efficient Llama-3-8B-Instruct Model on CPU with DeepSparse
-Model ID: {MODEL_ID[len("hf:"):]}
 """
 MAX_MAX_NEW_TOKENS = 1024
 DEFAULT_MAX_NEW_TOKENS = 200
-# Setup the engine
-from deepsparse.legacy import Pipeline
-pipe = Pipeline.create(
     task="text-generation",
-    model_path=MODEL_ID,
-    sequence_length=MAX_MAX_NEW_TOKENS,
-    prompt_sequence_length=8,
-    num_cores=8,
 )
 def clear_and_save_textbox(message: str) -> Tuple[str, str]:
     return "", message

+from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
 import gradio as gr
 from typing import Tuple, List
 deepsparse.cpu.print_hardware_capability()
 DESCRIPTION = f"""
+# Chat with Arco 500M as GGUF on CPU
 """
 MAX_MAX_NEW_TOKENS = 1024
 DEFAULT_MAX_NEW_TOKENS = 200
+# Download the GGUF file
+model_path = hf_hub_download(
+    repo_id="ijohn07/arco-plus-Q8_0-GGUF",
+    filename="arco-plus-q8_0.gguf",
+    repo_type="model"
+)
+# Load the GGUF model
+llm = Llama(model_path=model_path)
+# Setup the pipeline
+pipe = pipeline(
     task="text-generation",
+    model=llm, # Passes the loaded Llama model as the model
+    max_new_tokens=MAX_MAX_NEW_TOKENS, # Sets the maximum number of tokens the model generates
 )
+# Setup the engine
+#pipe = Pipeline.create(
+#   task="text-generation",
+#    model_path=MODEL_ID,
+#    sequence_length=MAX_MAX_NEW_TOKENS,
+#    prompt_sequence_length=8,
+#    num_cores=8,
+#)
 def clear_and_save_textbox(message: str) -> Tuple[str, str]:
     return "", message

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
-deepsparse-nightly==1.8.0.20240502
 transformers
 gradio

 transformers
+llama_cpp
+huggingface_hub
 gradio