Spaces:

lpetrl
/

demo-llm

Sleeping

Petro commited on Feb 28, 2024

Commit

461052c

1 Parent(s): 2d3c757

First model version

Files changed (2) hide show

main.py CHANGED Viewed

@@ -1,21 +1,18 @@
-from ctransformers import AutoModelForCausalLM
 from fastapi import FastAPI
 from pydantic import BaseModel
-file_name = "zephyr-7b-beta.Q4_K_S.gguf"
-llm = AutoModelForCausalLM.from_pretrained(file_name,
-    model_type='mistral',
-    max_new_tokens = 1096,
-    threads = 3,
-)
-#Pydantic object
 class validation(BaseModel):
     prompt: str
-#Fast API
 app = FastAPI()
 @app.post("/llm_on_cpu")
 async def stream(item: validation):
     system_prompt = 'Below is an instruction that describes a task. Write a response that appropriately completes the request.'
@@ -23,4 +20,4 @@ async def stream(item: validation):
     user, assistant = "<|user|>", "<|assistant|>"
     prompt = f"{system_prompt}{E_INST}\n{user}\n{item.prompt}{E_INST}\n{assistant}\n"
-    return llm(prompt)

+from llama_cpp import Llama
 from fastapi import FastAPI
 from pydantic import BaseModel
+model_file = "zephyr-7b-beta.Q4_K_S.gguf"
+llm = Llama(model_path=model_file, n_ctx=512, n_batch=126)
 class validation(BaseModel):
     prompt: str
 app = FastAPI()
 @app.post("/llm_on_cpu")
 async def stream(item: validation):
     system_prompt = 'Below is an instruction that describes a task. Write a response that appropriately completes the request.'
     user, assistant = "<|user|>", "<|assistant|>"
     prompt = f"{system_prompt}{E_INST}\n{user}\n{item.prompt}{E_INST}\n{assistant}\n"
+    return llm("What is an LLM?", max_tokens=100)

requirements.txt CHANGED Viewed

@@ -5,4 +5,4 @@ uvicorn
 requests
 python-dotenv
 ctransformers
-huggingface-hub

 requests
 python-dotenv
 ctransformers
+llama-cpp-python