Spaces:
Sleeping
Sleeping
import streamlit as st | |
from langchain_community.document_loaders.pdf import PyPDFDirectoryLoader | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain_community.embeddings import HuggingFaceInstructEmbeddings | |
from langchain_community.vectorstores import FAISS | |
from langchain.chains import RetrievalQA | |
from langchain_community.llms import HuggingFaceHub | |
def make_vectorstore(embeddings): | |
loader = PyPDFDirectoryLoader("data") | |
documents = loader.load() | |
text_splitter = CharacterTextSplitter(chunk_size=400, chunk_overlap=0) | |
texts = text_splitter.split_documents(documents) | |
docsearch = FAISS.from_documents(texts, embeddings) | |
return docsearch | |
def get_conversation(vectorstore, model): | |
conversation_chain = RetrievalQA.from_llm( | |
llm=model, | |
# chain_type="stuff", | |
retriever=vectorstore.as_retriever()) | |
return conversation_chain | |
def get_response(conversation_chain, query): | |
# get the response | |
response = conversation_chain.invoke(query) | |
return response | |
def response_formatter(resp_list): | |
queries = [] | |
responses = [] | |
for resp in resp_list: | |
# find the '\nQuestion: ' and '\nHelpful Answer: ' and take the text right in front of them a new list of query and responses | |
content = resp["result"] | |
# find '\nQuestion: ' in the text | |
question = content.split('\nQuestion: ')[1].split('\nHelpful Answer: ')[0] | |
queries.append(question) | |
# find '\nHelpful Answer: ' in the text | |
answer = content.split('\nHelpful Answer: ')[1] | |
responses.append(answer) | |
return queries, responses | |
def main(): | |
st.title("BetterZila RAG Enabled LLM") | |
st.sidebar.title("About") | |
st.sidebar.info("This app is for an assignment for BetterZila, RAG Enabled LLM that responds to a pre-defined book 48 Laws of Power by Robert Greene.") | |
st.sidebar.write( | |
""" | |
This Space uses the the Zephyr 7B Beta LLM from HuggingFace to answer questions from the book 48 Laws of Power by Robert Greene using RAG with a vectorstore database. | |
Embeddings used for the vectorstore is from the Instructor Base model from HuggingFace. | |
The generated responses are not perfect and are just for demonstration purposes since the model is a qunatized model used in inference mode. | |
This space is created by Pratik Dwivedi. | |
GitHub - Dekode1859 | |
""" | |
) | |
response_list=[] | |
print("Downloading Embeddings Model") | |
with st.spinner('Downloading Embeddings Model...'): | |
embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-base", model_kwargs = {'device': 'cpu'}) | |
print("Loading LLM from HuggingFace") | |
with st.spinner('Loading LLM from HuggingFace...'): | |
llm = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.7, "max_new_tokens":512, "top_p":0.95, "top_k":50}) | |
print("Creating Vector Database of PDF file content") | |
with st.spinner('Creating Vector Database of PDF file content...'): | |
vectorstore = make_vectorstore(embeddings) | |
print("Intializing LLM for inference with source material") | |
with st.spinner('Intializing LLM for inference with source material...'): | |
conversation_chain = get_conversation(vectorstore, llm) | |
queries = ["Can you give me an example from history where the enemy was crushed totally from the book?", | |
"What's the point of making myself less accessible?", | |
"Can you tell me the story of Queen Elizabeth I from this 48 laws of power book?"] | |
for query in queries: | |
response = get_response(conversation_chain, query) | |
response_list.append(response) | |
queries, responses = response_formatter(response_list) | |
for i in range(len(queries)): | |
st.write("Query: ", queries[i]) | |
st.write("Response: ", responses[i]) | |
st.write("--------------------------------------------------") | |
if __name__ == "__main__": | |
main() |