import streamlit as st import os from PyPDF2 import PdfReader from langchain.text_splitter import CharacterTextSplitter from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import FAISS from langchain.chains.question_answering import load_qa_chain from langchain.callbacks import get_openai_callback from langchain import HuggingFaceHub, LLMChain from langchain.embeddings import HuggingFaceHubEmbeddings,HuggingFaceInferenceAPIEmbeddings token = os.environ['HF_TOKEN'] repo_id = "sentence-transformers/all-mpnet-base-v2" hf = HuggingFaceHubEmbeddings( repo_id=repo_id, task="feature-extraction", huggingfacehub_api_token= token, ) # from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings # embeddings = HuggingFaceInferenceAPIEmbeddings( # api_key=token, model_name="sentence-transformers/all-MiniLM-l6-v2" # ) # hf = HuggingFaceHubEmbeddings( # repo_id=repo_id, # task="feature-extraction", # huggingfacehub_api_token= HUGGINGFACEHUB_API_TOKEN, # ) def main(): st.set_page_config(page_title="Ask your PDF") st.header("Ask your PDF 💬") # upload file pdf = st.file_uploader("Upload your PDF", type="pdf") # extract the text if pdf is not None: pdf_reader = PdfReader(pdf) text = "" for page in pdf_reader.pages: text += page.extract_text() # split into chunks text_splitter = CharacterTextSplitter( separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len ) chunks = text_splitter.split_text(text) # create embeddings # embeddings = OpenAIEmbeddings() # embeddings = query(chunks) # embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") knowledge_base = FAISS.from_texts(chunks, hf) # show user input user_question = st.text_input("Ask a question about your PDF:") if user_question: docs = knowledge_base.similarity_search(user_question) # llm = OpenAI() hub_llm = HuggingFaceHub( repo_id='HuggingFaceH4/zephyr-7b-beta', model_kwargs={'temperature':0.01,"max_length": 2048,}, huggingfacehub_api_token=token) llm = hub_llm chain = load_qa_chain(llm, chain_type="stuff") with get_openai_callback() as cb: response = chain.run(input_documents=docs, question=user_question) # print(cb) st.write(response) if __name__ == '__main__': main()