Spaces:
Sleeping
Sleeping
File size: 4,018 Bytes
a2cccdb 8dfaca9 ffe87be 8dfaca9 a2cccdb 8dfaca9 c32b057 8dfaca9 a0aac2d 8dfaca9 9bf72c1 8dfaca9 a0aac2d 8dfaca9 a0aac2d 3d39149 9bf72c1 a0aac2d 5b0f27d a0aac2d 8dfaca9 5b0f27d a0aac2d 8dfaca9 a0aac2d d24f176 a0aac2d 5b0f27d a0aac2d 8dfaca9 a2cccdb 8dfaca9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import streamlit as st
from langchain_community.document_loaders.pdf import PyPDFDirectoryLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFaceHub
def make_vectorstore(embeddings):
loader = PyPDFDirectoryLoader("data")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=400, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
docsearch = FAISS.from_documents(texts, embeddings)
return docsearch
def get_conversation(vectorstore, model):
conversation_chain = RetrievalQA.from_llm(
llm=model,
# chain_type="stuff",
retriever=vectorstore.as_retriever())
return conversation_chain
def get_response(conversation_chain, query):
# get the response
response = conversation_chain.invoke(query)
return response
def response_formatter(resp_list):
queries = []
responses = []
for resp in resp_list:
# find the '\nQuestion: ' and '\nHelpful Answer: ' and take the text right in front of them a new list of query and responses
content = resp["result"]
# find '\nQuestion: ' in the text
question = content.split('\nQuestion: ')[1].split('\nHelpful Answer: ')[0]
queries.append(question)
# find '\nHelpful Answer: ' in the text
answer = content.split('\nHelpful Answer: ')[1]
responses.append(answer)
return queries, responses
def main():
st.title("BetterZila RAG Enabled LLM")
st.sidebar.title("About")
st.sidebar.info("This app is for an assignment for BetterZila, RAG Enabled LLM that responds to a pre-defined book 48 Laws of Power by Robert Greene.")
st.sidebar.write(
"""
This Space uses the the Zephyr 7B Beta LLM from HuggingFace to answer questions from the book 48 Laws of Power by Robert Greene using RAG with a vectorstore database.
Embeddings used for the vectorstore is from the Instructor Base model from HuggingFace.
The generated responses are not perfect and are just for demonstration purposes since the model is a qunatized model used in inference mode.
This space is created by Pratik Dwivedi.
GitHub - Dekode1859
"""
)
response_list=[]
print("Downloading Embeddings Model")
with st.spinner('Downloading Embeddings Model...'):
embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-base", model_kwargs = {'device': 'cpu'})
print("Loading LLM from HuggingFace")
with st.spinner('Loading LLM from HuggingFace...'):
llm = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.7, "max_new_tokens":512, "top_p":0.95, "top_k":50})
print("Creating Vector Database of PDF file content")
with st.spinner('Creating Vector Database of PDF file content...'):
vectorstore = make_vectorstore(embeddings)
print("Intializing LLM for inference with source material")
with st.spinner('Intializing LLM for inference with source material...'):
conversation_chain = get_conversation(vectorstore, llm)
queries = ["Can you give me an example from history where the enemy was crushed totally from the book?",
"What's the point of making myself less accessible?",
"Can you tell me the story of Queen Elizabeth I from this 48 laws of power book?"]
for query in queries:
response = get_response(conversation_chain, query)
response_list.append(response)
queries, responses = response_formatter(response_list)
for i in range(len(queries)):
st.write("Query: ", queries[i])
st.write("Response: ", responses[i])
st.write("--------------------------------------------------")
if __name__ == "__main__":
main() |