from fastapi import FastAPI from llama_cpp import Llama import streamlit as st llm = Llama( model_path="Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf", ) prompt = st.chat_input("Say something") if prompt: st.write(f"User has sent the following prompt: {prompt}") ## create a new FASTAPI app instance # app=FastAPI() # Initialize the text generation pipeline #pipe = pipeline("text2text-generation", model="lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF",token=os.getenv('HF_KEY')) # @app.get("/") # def home(): # print("helloe here") # output= llm("What is the difference btw RAG and Fine tunning", max_tokens=1000) # print(output["choices"][0]["text"]) # ## return the generate text in Json reposne # return {"output":output["choices"][0]["text"]} # # Define a function to handle the GET request at `/generate` # @app.get("/generate") # def generate(text:str): # ## use the pipeline to generate text from given input text # print("Recieved prompt "+str(text)) # output= llm(text, max_tokens=1000) # print(output["choices"][0]["text"]) # ## return the generate text in Json reposne # return {"output":output["choices"][0]["text"]}