Spaces:

Do0rMaMu
/

LLaMa3-assistant

Build error

Do0rMaMu commited on May 24, 2024

Commit

75bdfde

verified ·

1 Parent(s): 106bbb2

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -8,14 +8,18 @@ from llama_cpp import Llama
 llm = Llama(
     model_path="Meta-Llama-3-8B-Instruct.Q4_K_M.gguf",  # Update the path as necessary
     n_ctx=4096,       # Maximum number of tokens for context (input + output)
-    n_threads=4,      # Number of CPU cores used
 )
 # Pydantic object for validation
 class Validation(BaseModel):
     user_prompt: str  # User's input prompt
     system_prompt: str  # System's guiding prompt
-    max_tokens: int
 # FastAPI application initialization
 app = FastAPI()
@@ -29,7 +33,7 @@ async def generate_response(item: Validation):
 { item.user_prompt }<|eot_id|> \n <|start_header_id|>assistant<|end_header_id|>"""
     # Call the Llama model to generate a response
-    output = llm(prompt, max_tokens = item.max_tokens,echo=True)  # Update parameters as needed
     # Extract and return the text from the response
     return output['choices'][0]['text']

 llm = Llama(
     model_path="Meta-Llama-3-8B-Instruct.Q4_K_M.gguf",  # Update the path as necessary
     n_ctx=4096,       # Maximum number of tokens for context (input + output)
+    n_threads=2,      # Number of CPU cores used
 )
 # Pydantic object for validation
 class Validation(BaseModel):
     user_prompt: str  # User's input prompt
     system_prompt: str  # System's guiding prompt
+    max_tokens: int = 1024,
+    temperature: int = 0.001,
+    top_p: int = 0.9,
+    repeat_penalty: int = 1.1,
+    top_k: int = 40
 # FastAPI application initialization
 app = FastAPI()
 { item.user_prompt }<|eot_id|> \n <|start_header_id|>assistant<|end_header_id|>"""
     # Call the Llama model to generate a response
+    output = llm(prompt, max_tokens = item.max_tokens,temperature = item.temperature , top_p = item.top_p , repeat_penalty = item.repeat_penalty, top_k = item.top_k ,echo=True)  # Update parameters as needed
     # Extract and return the text from the response
     return output['choices'][0]['text']