import streamlit as st from langchain_community.document_loaders.pdf import PyPDFDirectoryLoader from langchain.text_splitter import CharacterTextSplitter from langchain_community.embeddings import HuggingFaceInstructEmbeddings from langchain_community.vectorstores import FAISS from langchain.chains import RetrievalQA from langchain_community.llms import HuggingFaceHub def make_vectorstore(embeddings): loader = PyPDFDirectoryLoader("data") documents = loader.load() text_splitter = CharacterTextSplitter(chunk_size=400, chunk_overlap=0) texts = text_splitter.split_documents(documents) docsearch = FAISS.from_documents(texts, embeddings) return docsearch def get_conversation(vectorstore, model): conversation_chain = RetrievalQA.from_llm( llm=model, # chain_type="stuff", retriever=vectorstore.as_retriever()) return conversation_chain def get_response(conversation_chain, query): # get the response response = conversation_chain.invoke(query) return response def response_formatter(resp_list): queries = [] responses = [] for resp in resp_list: # find the '\nQuestion: ' and '\nHelpful Answer: ' and take the text right in front of them a new list of query and responses content = resp["result"] # find '\nQuestion: ' in the text question = content.split('\nQuestion: ')[1].split('\nHelpful Answer: ')[0] queries.append(question) # find '\nHelpful Answer: ' in the text answer = content.split('\nHelpful Answer: ')[1] responses.append(answer) return queries, responses def main(): st.title("BetterZila RAG Enabled LLM") st.sidebar.title("About") st.sidebar.info("This app is for an assignment for BetterZila, RAG Enabled LLM that responds to a pre-defined book 48 Laws of Power by Robert Greene.") st.sidebar.write( """ This Space uses the the Zephyr 7B Beta LLM from HuggingFace to answer questions from the book 48 Laws of Power by Robert Greene using RAG with a vectorstore database. Embeddings used for the vectorstore is from the Instructor Base model from HuggingFace. The generated responses are not perfect and are just for demonstration purposes since the model is a qunatized model used in inference mode. This space is created by Pratik Dwivedi. GitHub - Dekode1859 """ ) response_list=[] print("Downloading Embeddings Model") with st.spinner('Downloading Embeddings Model...'): embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-base", model_kwargs = {'device': 'cpu'}) print("Loading LLM from HuggingFace") with st.spinner('Loading LLM from HuggingFace...'): llm = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.7, "max_new_tokens":512, "top_p":0.95, "top_k":50}) print("Creating Vector Database of PDF file content") with st.spinner('Creating Vector Database of PDF file content...'): vectorstore = make_vectorstore(embeddings) print("Intializing LLM for inference with source material") with st.spinner('Intializing LLM for inference with source material...'): conversation_chain = get_conversation(vectorstore, llm) queries = ["Can you give me an example from history where the enemy was crushed totally from the book?", "What's the point of making myself less accessible?", "Can you tell me the story of Queen Elizabeth I from this 48 laws of power book?"] for query in queries: response = get_response(conversation_chain, query) response_list.append(response) queries, responses = response_formatter(response_list) for i in range(len(queries)): st.write("Query: ", queries[i]) st.write("Response: ", responses[i]) st.write("--------------------------------------------------") if __name__ == "__main__": main()