from fastapi import FastAPI, Request from pydantic import BaseModel from huggingface_hub import InferenceClient # Initialize FastAPI app app = FastAPI() # Initialize Hugging Face Inference Client client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") # Define expected input format class InputData(BaseModel): message: str # User message sent from the frontend # Define the API endpoint @app.post("/api") async def get_ai_response(data: InputData): try: # Extract the user message from the request body user_message = data.message # Prepare messages for the model messages = [ {"role": "system", "content": "You are a friendly Chatbot."}, {"role": "user", "content": user_message} ] # Generate response using the Hugging Face Inference API response = "" for message in client.chat_completion( messages, max_tokens=512, stream=True, temperature=0.7, top_p=0.95, ): token = message.choices[0].delta.content response += token # Return the AI response as JSON return {"response": response.strip()} except Exception as e: # Handle errors gracefully return {"error": str(e)}