DEV-chat-with-pdf-openai

Sleeping

App Files Files Community

wholewhale commited on Oct 18, 2023

Commit

731dcdf

1 Parent(s): f340ee6

stuffing

Browse files

Files changed (1) hide show

app.py +40 -19

app.py CHANGED Viewed

@@ -8,6 +8,12 @@ from langchain.llms import OpenAI
 from langchain.embeddings import OpenAIEmbeddings
 from langchain.vectorstores import Chroma
 from langchain.chains import ConversationalRetrievalChain
 os.environ['OPENAI_API_KEY'] = os.getenv("Your_API_Key")
@@ -25,49 +31,64 @@ def summary(self):
 # PDF summary and query using stuffing
 def pdf_changes(pdf_doc):
-     try:
         loader = OnlinePDFLoader(pdf_doc.name)
         documents = loader.load()
         text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
         texts = text_splitter.split_documents(documents)
         # Initialize summary variable
         full_summary = ""
-        # Divide the text into smaller chunks, for example, 2 pages per chunk
         for i in range(0, len(texts), 2):
-            chunk = " ".join([doc.page_content for doc in texts[i:i+2]])  # Replace '.content' with the correct attribute
-            # Load the summarization chain with stuffing method
-            stuff_chain = load_summarize_chain(vertex_llm_text, chain_type="stuff", prompt=prompt)
-            # Generate summary for the chunk
-            chunk_summary = stuff_chain.run(chunk)
-            # Add the chunk summary to the full summary
             full_summary += f"Summary of pages {i+1}-{i+3}:\n{chunk_summary}\n"
         embeddings = OpenAIEmbeddings()
         global db
         db = Chroma.from_documents(texts, embeddings)
         retriever = db.as_retriever()
-        global qa
         qa = ConversationalRetrievalChain.from_llm(
             llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo", max_tokens=-1, n=2),
             retriever=retriever,
             return_source_documents=False
         )
         return f"Ready. Full Summary:\n{full_summary}"
     except Exception as e:
         return f"Error processing PDF: {str(e)}"
 def clear_data():
     global qa, db
     qa = None

 from langchain.embeddings import OpenAIEmbeddings
 from langchain.vectorstores import Chroma
 from langchain.chains import ConversationalRetrievalChain
+from langchain.chat_models import ChatOpenAI
+from langchain.document_loaders import WebBaseLoader
+from langchain.chains.summarize import load_summarize_chain
+from langchain.chains.llm import LLMChain
+from langchain.prompts import PromptTemplate
+from langchain.chains.combine_documents.stuff import StuffDocumentsChain
 os.environ['OPENAI_API_KEY'] = os.getenv("Your_API_Key")
 # PDF summary and query using stuffing
 def pdf_changes(pdf_doc):
+    try:
+        # Initialize loader and load documents
         loader = OnlinePDFLoader(pdf_doc.name)
         documents = loader.load()
+        # Split loaded documents into chunks
         text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
         texts = text_splitter.split_documents(documents)
+        # Define the prompt for summarization
+        prompt_template = """Write a concise summary of the following:
+        "{text}"
+        CONCISE SUMMARY:"""
+        prompt = PromptTemplate.from_template(prompt_template)
+        # Define the LLM chain with the specified prompt
+        llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
+        llm_chain = LLMChain(llm=llm, prompt=prompt)
+        # Initialize StuffDocumentsChain
+        stuff_chain = StuffDocumentsChain(
+            llm_chain=llm_chain, document_variable_name="text"
+        )
         # Initialize summary variable
         full_summary = ""
+        # Iterate through text chunks to summarize
         for i in range(0, len(texts), 2):
+            chunk = " ".join([doc.page_content for doc in texts[i:i + 2]])
+            # Generate summary using StuffDocumentsChain
+            chunk_summary = stuff_chain.run([chunk])
+            # Add chunk summary to full summary
             full_summary += f"Summary of pages {i+1}-{i+3}:\n{chunk_summary}\n"
+        # Other existing logic for Chroma, embeddings, and retrieval
         embeddings = OpenAIEmbeddings()
         global db
         db = Chroma.from_documents(texts, embeddings)
         retriever = db.as_retriever()
+        global qa
         qa = ConversationalRetrievalChain.from_llm(
             llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo", max_tokens=-1, n=2),
             retriever=retriever,
             return_source_documents=False
         )
         return f"Ready. Full Summary:\n{full_summary}"
     except Exception as e:
         return f"Error processing PDF: {str(e)}"
 def clear_data():
     global qa, db
     qa = None