import os import torch from fastapi import FastAPI from pydantic import BaseModel from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import uvicorn # Define a Pydantic model for request validation class Query(BaseModel): text: str # Initialize FastAPI app app = FastAPI(title="Financial Chatbot API") # Load your fine-tuned model and tokenizer model_name = "Phoenix21/llama-3-2-3b-finetuned-finance" model = AutoModelForCausalLM.from_pretrained( model_name, device_map="auto", trust_remote_code=True ) tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) tokenizer.pad_token = tokenizer.eos_token # Create a text-generation pipeline chat_pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=256, temperature=0.7, top_p=0.95, ) # Define an endpoint for generating responses @app.post("/generate") def generate(query: Query): prompt = f"Question: {query.text}\nAnswer: " response = chat_pipe(prompt)[0]["generated_text"] return {"response": response} # Run the app using uvicorn. Hugging Face Spaces sets the PORT environment variable. if __name__ == "__main__": port = int(os.environ.get("PORT", 8000)) uvicorn.run(app, host="0.0.0.0", port=port)