Spaces:
Runtime error
Runtime error
import os | |
import torch | |
from fastapi import FastAPI | |
from pydantic import BaseModel | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
import uvicorn | |
# Define a Pydantic model for request validation | |
class Query(BaseModel): | |
text: str | |
# Initialize FastAPI app | |
app = FastAPI(title="Financial Chatbot API") | |
# Load your fine-tuned model and tokenizer using the updated model name | |
model_name = "Phoenix21/meta-llama-Llama-3.2-3B-2025-03-13-checkpoints" | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
device_map="auto", | |
trust_remote_code=True | |
) | |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
tokenizer.pad_token = tokenizer.eos_token | |
# Create a text-generation pipeline | |
chat_pipe = pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
max_new_tokens=256, | |
temperature=0.7, | |
top_p=0.95, | |
) | |
# Define an endpoint for generating responses | |
def generate(query: Query): | |
prompt = f"Question: {query.text}\nAnswer: " | |
response = chat_pipe(prompt)[0]["generated_text"] | |
return {"response": response} | |
# Run the app using uvicorn; default port is 7860 (as expected by Hugging Face Spaces) | |
if __name__ == "__main__": | |
port = int(os.environ.get("PORT", 7860)) | |
uvicorn.run(app, host="0.0.0.0", port=port) | |