Do0rMaMu commited on
Commit
d97cf54
·
verified ·
1 Parent(s): 5cffeec

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +7 -11
main.py CHANGED
@@ -1,14 +1,12 @@
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
-
4
- # Assuming Llama class has been correctly imported and set up
5
  from llama_cpp import Llama
6
 
7
  # Model loading with specified path and configuration
8
  llm = Llama(
9
- model_path="phi-3-mini-4k-instruct-text-to-sql.Q4_K.gguf", # Update the path as necessary
10
- n_ctx=4096, # Maximum number of tokens for context (input + output)
11
- n_threads=2, # Number of CPU cores used
12
  )
13
 
14
  # Pydantic object for validation
@@ -24,13 +22,11 @@ app = FastAPI()
24
  # Endpoint for generating responses
25
  @app.post("/generate_response")
26
  async def generate_response(item: Validation):
27
- # Construct the complete prompt using the given system and user prompts
28
- prompt = f"""\nSystem\n
29
- { item.system_prompt } \nQuestion\n
30
- { item.user_prompt }"""
31
-
32
  # Call the Llama model to generate a response
33
- output = llm(prompt, max_tokens = item.max_tokens,temperature = item.temperature, echo=True)
34
 
35
  # Extract and return the text from the response
36
  return output['choices'][0]['text']
 
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
 
 
3
  from llama_cpp import Llama
4
 
5
  # Model loading with specified path and configuration
6
  llm = Llama(
7
+ model_path="phi-3-mini-4k-instruct.Q4_K.gguf", # Update the path as necessary
8
+ n_ctx=4096,
9
+ n_threads=2,
10
  )
11
 
12
  # Pydantic object for validation
 
22
  # Endpoint for generating responses
23
  @app.post("/generate_response")
24
  async def generate_response(item: Validation):
25
+ # Construct the complete prompt using the given system and user prompts in the required format
26
+ prompt = f"<|user|>\n{item.system_prompt}\n<|end|>\n<|user|>\n{item.user_prompt}\n<|end|>\n<|assistant|>"
27
+
 
 
28
  # Call the Llama model to generate a response
29
+ output = llm(prompt, max_tokens=item.max_tokens, temperature=item.temperature, echo=True)
30
 
31
  # Extract and return the text from the response
32
  return output['choices'][0]['text']