DEV-chat-with-pdf-openai

Sleeping

App Files Files Community

wholewhale commited on Oct 20, 2023

Commit

2b90e18

1 Parent(s): 6360179

revert

Browse files

Files changed (1) hide show

app.py +9 -32

app.py CHANGED Viewed

@@ -35,49 +35,26 @@ summary_state = gr.State(initial_value="pending")
 # PDF summary and query using stuffing
 def pdf_changes(pdf_doc):
     try:
-        # Initialize loader and load documents
         loader = OnlinePDFLoader(pdf_doc.name)
         documents = loader.load()
-        # Define the prompt for summarization
-        prompt_template = """Write a concise summary of the following:
-        "{text}"
-        CONCISE SUMMARY:"""
-        prompt = PromptTemplate.from_template(prompt_template)
-        # Define the LLM chain with the specified prompt
-        llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
-        llm_chain = LLMChain(llm=llm, prompt=prompt)
-        # Initialize StuffDocumentsChain
-        stuff_chain = StuffDocumentsChain(
-            llm_chain=llm_chain, document_variable_name="text"
-        )
-        # Generate summary using StuffDocumentsChain
-        global full_summary
-        full_summary = stuff_chain.run(documents)
-        # Update the state variable
-        return {summary_state: full_summary}
-        # Other existing logic for Chroma, embeddings, and retrieval
         embeddings = OpenAIEmbeddings()
         global db
-        db = Chroma.from_documents(documents, embeddings)
         retriever = db.as_retriever()
         global qa
         qa = ConversationalRetrievalChain.from_llm(
-            llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo-16k", max_tokens=-1, n=2),
             retriever=retriever,
             return_source_documents=False
         )
-        summary_box.set_value(full_summary)
-        return f"Ready. Full Summary loaded."
     except Exception as e:
-        return f"Error processing PDF: {str(e)}"
 def clear_data():

 # PDF summary and query using stuffing
 def pdf_changes(pdf_doc):
     try:
+        if pdf_doc is None:
+            return "No PDF uploaded."
         loader = OnlinePDFLoader(pdf_doc.name)
         documents = loader.load()
+        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+        texts = text_splitter.split_documents(documents)
         embeddings = OpenAIEmbeddings()
         global db
+        db = Chroma.from_documents(texts, embeddings)
         retriever = db.as_retriever()
         global qa
         qa = ConversationalRetrievalChain.from_llm(
+            llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo", max_tokens=-1, n=2),
             retriever=retriever,
             return_source_documents=False
         )
+        return "Ready"
     except Exception as e:
+        return f"Error loading PDF: {e}"
 def clear_data():