import streamlit as st from langchain.text_splitter import CharacterTextSplitter from langchain.docstore.document import Document from langchain.chains.summarize import load_summarize_chain # from langchain_community.llms import CTransformers # from langchain_community.llms import HuggingFaceHub from langchain.callbacks.manager import CallbackManager from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from pypdf import PdfReader from langchain_community.llms import LlamaCpp HUGGINGFACEHUB_API_TOKEN = st.secrets["HUGGINGFACEHUB_API_TOKEN"] # Page title st.set_page_config(page_title='🦜🔗 Text Summarization App') st.title('🦜🔗 Text Summarization App') # Function to read all PDF files and return text def get_pdf_text(pdf_docs): text = "" for pdf in pdf_docs: pdf_reader = PdfReader(pdf) for page in pdf_reader.pages: text += page.extract_text() return text # Function to split the text into smaller chunks and convert it into document format def chunks_and_document(txt): text_splitter = CharacterTextSplitter() texts = text_splitter.split_text(txt) docs = [Document(page_content=t) for t in texts] return docs # Loading the Llama 2's LLM def load_llm(): # We instantiate the callback with a streaming stdout handler callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) # Loading the LLM model # llm = CTransformers( # model="llama-2-7b-chat.ggmlv3.q2_K.bin", # model_type="llama", # config={'max_new_tokens': 600, # 'temperature': 0.5, # 'context_length': 700} # ) # llm = HuggingFaceHub( # repo_id="meta-llama/Llama-2-7b-chat-hf", # Official Llama-2 model # model_kwargs={"temperature": 0.5, "max_length": 500} # ) llm = LlamaCpp( model_path="TheBloke/Llama-2-7B-GGUF/llama-2-7b.Q8_0.gguf", # Path to your downloaded model temperature=0.5, max_tokens=500, n_ctx=1024 ) return llm # Function to apply the LLM model with our document def chains_and_response(docs): llm = load_llm() chain = load_summarize_chain(llm, chain_type='map_reduce') return chain.invoke(docs) def main(): # Initialize messages if not already present if "messages" not in st.session_state.keys(): st.session_state.messages = [] # Sidebar for uploading PDF files with st.sidebar: st.title("Menu:") pdf_docs = st.file_uploader( "Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True ) if st.button("Submit & Process"): with st.spinner("Processing..."): txt_input = get_pdf_text(pdf_docs) docs = chunks_and_document(txt_input) response = chains_and_response(docs) st.title('📝✅ Summarization Result') for res in response: st.info(res) main()