from fastapi import FastAPI from llama_cpp import Llama ## create a new FASTAPI app instance app=FastAPI() # Initialize the text generation pipeline #pipe = pipeline("text2text-generation", model="lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF",token=os.getenv('HF_KEY')) llm = Llama( model_path="Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf", ) @app.get("/") def home(): return {"message":"Hello World"} # Define a function to handle the GET request at `/generate` @app.get("/generate") def generate(text:str): ## use the pipeline to generate text from given input text output== llm(text, max_tokens=1000) ## return the generate text in Json reposne return {"output":output["choices"][0]["text"]}