Spaces:

Dekode
/

betterzila_assignment

Sleeping

App Files Files Community

Dekode commited on Jan 28, 2024

Commit

a0aac2d

verified ·

1 Parent(s): ffe87be

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -23

app.py CHANGED Viewed

@@ -7,44 +7,81 @@ from langchain.chains import RetrievalQA
 from langchain_community.llms import HuggingFaceHub
 def make_vectorstore(embeddings):
-    loader = PyPDFDirectoryLoader("data")
     documents = loader.load()
-    text_splitter = CharacterTextSplitter(chunk_size=1400, chunk_overlap=0)
     texts = text_splitter.split_documents(documents)
     docsearch = FAISS.from_documents(texts, embeddings)
     return docsearch
-def get_qa(vectorstore, llmb):
-    qa = RetrievalQA.from_llm(
-        llm=llmb,
-        chain_type="stuff",
         retriever=vectorstore.as_retriever())
-    return qa
-def get_response(qa, query):
-    response = qa.run(query)
     return response
 def main():
     st.title("BetterZila RAG Enabled LLM")
-    llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512}, huggingfacehub_api_token = st.secrets["hf_token"])
-    embeddings = HuggingFaceInstructEmbeddings(model_name="google/t5-v1_1-xl", model_kwargs = {'device': 'cpu'})
-    vectorstore = make_vectorstore(embeddings)
-    qa = get_qa(vectorstore, llm)
-    queries = ["Can you give me an example from history where the enemy was crushed totally from the book?", "What's the point of making myself less accessible?", "Can you tell me the story of Queen Elizabeth I from this 48 laws of power book?"]
     for query in queries:
-        st.subheader(f"Query: {query}")
-        response = get_response(qa, query)
-        st.write(query)
-        st.write(response)
-    st.success("Responses generated!")
 if __name__ == "__main__":
     main()

 from langchain_community.llms import HuggingFaceHub
 def make_vectorstore(embeddings):
+    loader = PyPDFDirectoryLoader("/content/data")
     documents = loader.load()
+    text_splitter = CharacterTextSplitter(chunk_size=400, chunk_overlap=0)
     texts = text_splitter.split_documents(documents)
     docsearch = FAISS.from_documents(texts, embeddings)
     return docsearch
+def get_conversation(vectorstore, model):
+    conversation_chain = RetrievalQA.from_llm(
+        llm=model,
+        # chain_type="stuff",
         retriever=vectorstore.as_retriever())
+    return conversation_chain
+def get_response(conversation_chain, query):
+    # get the response
+    response = conversation_chain.invoke(query)
     return response
+def response_formatter(resp_list):
+    queries = []
+    responses = []
+    for resp in resp_list:
+        # find the '\nQuestion: ' and '\nHelpful Answer: ' and take the text right in front of them a new list of query and responses
+        content = resp["result"]
+        # find '\nQuestion: ' in the text
+        question =  content.split('\nQuestion: ')[1].split('\nHelpful Answer: ')[0]
+        queries.append(question)
+        # find '\nHelpful Answer: ' in the text
+        answer = content.split('\nHelpful Answer: ')[1]
+        responses.append(answer)
+    return queries, responses
 def main():
     st.title("BetterZila RAG Enabled LLM")
+    # a sidebar taht titles a disclaimer and a description of the app
+    st.sidebar.title("About")
+    st.sidebar.info("This app is a demo of BetterZila RAG Enabled LLM")
+    # write a paragraph in the sidebar
+    st.sidebar.write("This app is a demo of BetterZila RAG Enabled LLM")
+    response_list=[]
+    print("Downloading Embeddings Model")
+    with st.spinner('Downloading Embeddings Model...'):
+        embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-base", model_kwargs = {'device': 'cpu'})
+    print("Loading LLM from HuggingFace")
+    with st.spinner('Loading LLM from HuggingFace...'):
+        llm = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.7, "max_new_tokens":512, "top_p":0.95, "top_k":50})
+    print("Creating Vector Database of PDF file content")
+    with st.spinner('Creating Vector Database of PDF file content...'):
+        vectorstore = make_vectorstore(embeddings)
+    print("Intializing LLM for inference with source material")
+    with st.spinner('Intializing LLM for inference with source material...'):
+        conversation_chain = get_conversation(vectorstore, llm)
+    queries = ["Can you give me an example from history where the enemy was crushed totally from the book?",
+               "What's the point of making myself less accessible?",
+               "Can you tell me the story of Queen Elizabeth I from this 48 laws of power book?"]
     for query in queries:
+        response = get_response(conversation_chain, query)
+        response_list.append(response)
+    queries, responses = response_formatter(response_list)
+    for i in range(len(queries)):
+        st.write("Query: ", queries[i])
+        st.write("Response: ", responses[i])
+        st.write("--------------------------------------------------")
 if __name__ == "__main__":
     main()