Spaces:
Sleeping
Sleeping
File size: 2,543 Bytes
a2cccdb 3cf26ac e4b7b4c f3ffdfc e4b7b4c a2cccdb e4b7b4c a2cccdb e4b7b4c 9bf72c1 e4b7b4c f3ffdfc e4b7b4c 9bf72c1 e4b7b4c 5b0f27d e4b7b4c 5b0f27d e4b7b4c f3ffdfc e4b7b4c 5b0f27d e4b7b4c 5b0f27d e4b7b4c a2cccdb e4b7b4c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import streamlit as st
from langchain_community.document_loaders.pdf import PyPDFDirectoryLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain_community.llms import HuggingFaceHub
from langchain.memory import ConversationBufferMemory
def make_vectorstore(embeddings):
# use glob to find all the pdf files in the data folder in the base directory
loader = PyPDFDirectoryLoader("data")
# load the documents
documents = loader.load()
# split the documents into chunks of 1400 characters with 0 overlap
text_splitter = CharacterTextSplitter(chunk_size=1400, chunk_overlap=0)
# split the documents into chunks of 1400 characters with 0 overlap
texts = text_splitter.split_documents(documents)
# create a vector store from the documents
docsearch = FAISS.from_documents(texts, embeddings)
return docsearch
def get_conversation(vectorstore):
# create a memory object to store the conversation history
memory = ConversationBufferMemory(memory_key="chat_history",return_messages=True,)
conversation_chain = ConversationalRetrievalChain.from_chain_type(
llm=HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512}, huggingfacehub_api_token = st.secrets["hf_token"]),
chain_type="stuff",
retriever=vectorstore.as_retriever(),
memory=memory)
return conversation_chain
def get_response(conversation_chain, query):
# get the response
response = conversation_chain.run(query)
return response
def main():
st.title("BetterZila RAG Enabled LLM")
embeddings = HuggingFaceInstructEmbeddings(model_name="google/t5-v1_1-xl", model_kwargs = {'device': 'cpu'})
vectorstore = make_vectorstore(embeddings)
conversation_chain = get_conversation(vectorstore)
queries = ["Can you give me an example from history where the enemy was crushed totally from the book?", "What's the point of making myself less accessible?", "Can you tell me the story of Queen Elizabeth I from this 48 laws of power book?"]
for query in queries:
st.subheader(f"Query: {query}")
response = get_response(conversation_chain, query)
st.write(query)
st.write(response["llm_response"])
st.success("Responses generated!")
if __name__ == "__main__":
main() |