from fastapi import FastAPI from pydantic import BaseModel from langchain_community.llms import Ollama from langchain.callbacks.manager import CallbackManager from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler app = FastAPI() MODEL_NAME = 'tinyllama' def get_llm(): callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) return Ollama(model=MODEL_NAME, callback_manager=callback_manager) class Question(BaseModel): text: str @app.get("/") def read_root(): return {"message": f"Welcome to {MODEL_NAME} FastAPI"} @app.post("/ask") def ask_question(question: Question): llm = get_llm() response = llm(question.text) return {"response": response}