Update main.py
Browse files
main.py
CHANGED
@@ -1,14 +1,12 @@
|
|
1 |
from fastapi import FastAPI
|
2 |
from pydantic import BaseModel
|
3 |
-
|
4 |
-
# Assuming Llama class has been correctly imported and set up
|
5 |
from llama_cpp import Llama
|
6 |
|
7 |
# Model loading with specified path and configuration
|
8 |
llm = Llama(
|
9 |
-
model_path="phi-3-mini-4k-instruct
|
10 |
-
n_ctx=4096,
|
11 |
-
n_threads=2,
|
12 |
)
|
13 |
|
14 |
# Pydantic object for validation
|
@@ -24,13 +22,11 @@ app = FastAPI()
|
|
24 |
# Endpoint for generating responses
|
25 |
@app.post("/generate_response")
|
26 |
async def generate_response(item: Validation):
|
27 |
-
# Construct the complete prompt using the given system and user prompts
|
28 |
-
prompt = f"
|
29 |
-
|
30 |
-
{ item.user_prompt }"""
|
31 |
-
|
32 |
# Call the Llama model to generate a response
|
33 |
-
output = llm(prompt, max_tokens
|
34 |
|
35 |
# Extract and return the text from the response
|
36 |
return output['choices'][0]['text']
|
|
|
1 |
from fastapi import FastAPI
|
2 |
from pydantic import BaseModel
|
|
|
|
|
3 |
from llama_cpp import Llama
|
4 |
|
5 |
# Model loading with specified path and configuration
|
6 |
llm = Llama(
|
7 |
+
model_path="phi-3-mini-4k-instruct.Q4_K.gguf", # Update the path as necessary
|
8 |
+
n_ctx=4096,
|
9 |
+
n_threads=2,
|
10 |
)
|
11 |
|
12 |
# Pydantic object for validation
|
|
|
22 |
# Endpoint for generating responses
|
23 |
@app.post("/generate_response")
|
24 |
async def generate_response(item: Validation):
|
25 |
+
# Construct the complete prompt using the given system and user prompts in the required format
|
26 |
+
prompt = f"<|user|>\n{item.system_prompt}\n<|end|>\n<|user|>\n{item.user_prompt}\n<|end|>\n<|assistant|>"
|
27 |
+
|
|
|
|
|
28 |
# Call the Llama model to generate a response
|
29 |
+
output = llm(prompt, max_tokens=item.max_tokens, temperature=item.temperature, echo=True)
|
30 |
|
31 |
# Extract and return the text from the response
|
32 |
return output['choices'][0]['text']
|