Spaces:

Do0rMaMu
/

phi3-sql

Runtime error

Do0rMaMu commited on Oct 30, 2024

Commit

f3c7e66

verified ·

1 Parent(s): a46aed5

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -1,28 +1,38 @@
 from fastapi import FastAPI
-from pydantic import BaseModel
 from llama_cpp import Llama
-# Model loading with specified path and configuration
-llm = Llama(
-    model_path="Llama-3.2-3B-Instruct-Q8_0.gguf",  # Update the path as necessary
-    n_ctx=4096,
-    n_threads=2,
 )
-# Pydantic object for validation
 class Validation(BaseModel):
-    user_prompt: str  # This will be the direct SQL query request or relevant prompt
-    max_tokens: int = 1024
-    temperature: float = 0.01
-# FastAPI application initialization
 app = FastAPI()
-# Endpoint for generating responses
 @app.post("/generate_response")
 async def generate_response(item: Validation):
-    # Call the Llama model to generate a response directly based on the user's prompt
-    output = llm(item.user_prompt, max_tokens=item.max_tokens, temperature=item.temperature, echo=False)
-    # Extract and return the text from the response
-    return output['choices'][0]['text']

 from fastapi import FastAPI
+from pydantic import BaseModel, Field
+from typing import List, Dict
 from llama_cpp import Llama
+# Load the Llama model with the specified path and configuration
+llm = Llama.from_pretrained(
+    repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF",  # Replace with the actual model repository ID
+    filename="Llama-3.2-3B-Instruct-Q8_0.gguf",      # Replace with your actual model filename if necessary
+    n_ctx=4096,
+    n_threads=2,
 )
+# Define a Pydantic model for request validation
+class Message(BaseModel):
+    role: str   # "user" or "assistant"
+    content: str  # The actual message content
 class Validation(BaseModel):
+    messages: List[Message] = Field(default_factory=list)  # List of previous messages in the conversation
+    max_tokens: int = 1024   # Maximum tokens for the response
+    temperature: float = 0.01  # Model response temperature for creativity
+# Initialize the FastAPI application
 app = FastAPI()
+# Define the endpoint for generating responses
 @app.post("/generate_response")
 async def generate_response(item: Validation):
+    # Generate a response using the Llama model with the chat history
+    response = llm.create_chat_completion(
+        messages=[{"role": msg.role, "content": msg.content} for msg in item.messages],
+        max_tokens=item.max_tokens,
+        temperature=item.temperature
+    )
+    # Extract and return the response text
+    return {"response": response['choices'][0]['message']['content']}