Do0rMaMu commited on
Commit
75bdfde
·
verified ·
1 Parent(s): 106bbb2

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +7 -3
main.py CHANGED
@@ -8,14 +8,18 @@ from llama_cpp import Llama
8
  llm = Llama(
9
  model_path="Meta-Llama-3-8B-Instruct.Q4_K_M.gguf", # Update the path as necessary
10
  n_ctx=4096, # Maximum number of tokens for context (input + output)
11
- n_threads=4, # Number of CPU cores used
12
  )
13
 
14
  # Pydantic object for validation
15
  class Validation(BaseModel):
16
  user_prompt: str # User's input prompt
17
  system_prompt: str # System's guiding prompt
18
- max_tokens: int
 
 
 
 
19
 
20
  # FastAPI application initialization
21
  app = FastAPI()
@@ -29,7 +33,7 @@ async def generate_response(item: Validation):
29
  { item.user_prompt }<|eot_id|> \n <|start_header_id|>assistant<|end_header_id|>"""
30
 
31
  # Call the Llama model to generate a response
32
- output = llm(prompt, max_tokens = item.max_tokens,echo=True) # Update parameters as needed
33
 
34
  # Extract and return the text from the response
35
  return output['choices'][0]['text']
 
8
  llm = Llama(
9
  model_path="Meta-Llama-3-8B-Instruct.Q4_K_M.gguf", # Update the path as necessary
10
  n_ctx=4096, # Maximum number of tokens for context (input + output)
11
+ n_threads=2, # Number of CPU cores used
12
  )
13
 
14
  # Pydantic object for validation
15
  class Validation(BaseModel):
16
  user_prompt: str # User's input prompt
17
  system_prompt: str # System's guiding prompt
18
+ max_tokens: int = 1024,
19
+ temperature: int = 0.001,
20
+ top_p: int = 0.9,
21
+ repeat_penalty: int = 1.1,
22
+ top_k: int = 40
23
 
24
  # FastAPI application initialization
25
  app = FastAPI()
 
33
  { item.user_prompt }<|eot_id|> \n <|start_header_id|>assistant<|end_header_id|>"""
34
 
35
  # Call the Llama model to generate a response
36
+ output = llm(prompt, max_tokens = item.max_tokens,temperature = item.temperature , top_p = item.top_p , repeat_penalty = item.repeat_penalty, top_k = item.top_k ,echo=True) # Update parameters as needed
37
 
38
  # Extract and return the text from the response
39
  return output['choices'][0]['text']