Spaces:

Do0rMaMu
/

phi3-sql

Runtime error

Do0rMaMu commited on Jun 25, 2024

Commit

d97cf54

verified ·

1 Parent(s): 5cffeec

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -1,14 +1,12 @@
 from fastapi import FastAPI
 from pydantic import BaseModel
-# Assuming Llama class has been correctly imported and set up
 from llama_cpp import Llama
 # Model loading with specified path and configuration
 llm = Llama(
-    model_path="phi-3-mini-4k-instruct-text-to-sql.Q4_K.gguf",  # Update the path as necessary
-    n_ctx=4096,       # Maximum number of tokens for context (input + output)
-    n_threads=2,      # Number of CPU cores used
 )
 # Pydantic object for validation
@@ -24,13 +22,11 @@ app = FastAPI()
 # Endpoint for generating responses
 @app.post("/generate_response")
 async def generate_response(item: Validation):
-    # Construct the complete prompt using the given system and user prompts
-    prompt = f"""\nSystem\n
-{ item.system_prompt } \nQuestion\n
-{ item.user_prompt }"""
     # Call the Llama model to generate a response
-    output = llm(prompt, max_tokens = item.max_tokens,temperature = item.temperature, echo=True)
     # Extract and return the text from the response
     return output['choices'][0]['text']

 from fastapi import FastAPI
 from pydantic import BaseModel
 from llama_cpp import Llama
 # Model loading with specified path and configuration
 llm = Llama(
+    model_path="phi-3-mini-4k-instruct.Q4_K.gguf",  # Update the path as necessary
+    n_ctx=4096,
+    n_threads=2,
 )
 # Pydantic object for validation
 # Endpoint for generating responses
 @app.post("/generate_response")
 async def generate_response(item: Validation):
+    # Construct the complete prompt using the given system and user prompts in the required format
+    prompt = f"<|user|>\n{item.system_prompt}\n<|end|>\n<|user|>\n{item.user_prompt}\n<|end|>\n<|assistant|>"
     # Call the Llama model to generate a response
+    output = llm(prompt, max_tokens=item.max_tokens, temperature=item.temperature, echo=True)
     # Extract and return the text from the response
     return output['choices'][0]['text']