import streamlit as st from langchain.text_splitter import CharacterTextSplitter from langchain.docstore.document import Document from langchain.chains.summarize import load_summarize_chain from langchain_community.llms import CTransformers from langchain.callbacks.manager import CallbackManager from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from pypdf import PdfReader # Page title st.set_page_config(page_title='🦜🔗 Text Summarization App') st.title('🦜🔗 Text Summarization App') # Function to read all PDF files and return text def get_pdf_text(pdf_docs): text = "" for pdf in pdf_docs: pdf_reader = PdfReader(pdf) for page in pdf_reader.pages: text += page.extract_text() return text # Function to split the text into smaller chunks and convert it into document format def chunks_and_document(txt): text_splitter = CharacterTextSplitter() texts = text_splitter.split_text(txt) docs = [Document(page_content=t) for t in texts] return docs # Loading the Llama 2's LLM def load_llm(): # We instantiate the callback with a streaming stdout handler callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) # Loading the LLM model llm = CTransformers( model="llama-2-7b-chat.ggmlv3.q2_K.bin", model_type="llama", config={'max_new_tokens': 600, 'temperature': 0.5, 'context_length': 700} ) return llm # Function to apply the LLM model with our document def chains_and_response(docs): llm = load_llm() chain = load_summarize_chain(llm, chain_type='map_reduce') return chain.invoke(docs) def main(): # Initialize messages if not already present if "messages" not in st.session_state.keys(): st.session_state.messages = [] # Sidebar for uploading PDF files with st.sidebar: st.title("Menu:") pdf_docs = st.file_uploader( "Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True ) if st.button("Submit & Process"): with st.spinner("Processing..."): txt_input = get_pdf_text(pdf_docs) docs = chunks_and_document(txt_input) response = chains_and_response(docs) st.title('📝✅ Summarization Result') for res in response: st.info(res) main()