DEV-chat-with-pdf-openai

Sleeping

File size: 6,093 Bytes

2f0e211
 
 
d05ba12
2f0e211
3e93b01
2f0e211
3e93b01
2f0e211
 
731dcdf
 
 
 
 
 
2f0e211
e455307
 
895d964
 
d05ba12
2f0e211
41297e0
2f0e211
8db718c
 
 
 
 
 
9c04c52
e455307
731dcdf
 
8db718c
 
731dcdf
 
8db718c
 
731dcdf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c04c52
 
731dcdf
 
f340ee6
731dcdf
 
 
 
9c04c52
731dcdf
9c04c52
731dcdf
 
8db718c
 
 
731dcdf
8db718c
731dcdf
8db718c
9c04c52
 
 
 
731dcdf
9c04c52
731dcdf
8db718c
 
 
2f0e211
9c04c52
731dcdf
a08bac4
369d9fb
a08bac4
369d9fb
a08bac4
 
2f0e211
d05ba12
 
2f0e211
 
 
 
 
271a194
 
895d964
 
00e09c1
2f0e211
2545c31
211d0af
 
 
 
895d964
211d0af
 
 
 
2545c31
 
2f0e211
d05ba12
2545c31
8db718c
d05ba12
2545c31
d05ba12
 
 
 
8db718c
d05ba12
 
 
e455307
2f0e211
 
 
 
 
b91cab8
2f0e211
895d964
 
2f0e211
 
 
 
 
 
 
 
 
 
 
b505ef9
a08bac4
2f0e211
bbb69a1
e455307
2f0e211
a08bac4
e455307
 
a08bac4
2f0e211
 
 
 
e455307
 
2f0e211
41297e0

import gradio as gr
import os
import time
import threading
from langchain.document_loaders import OnlinePDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import WebBaseLoader
from langchain.chains.summarize import load_summarize_chain
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain

os.environ['OPENAI_API_KEY'] = os.getenv("Your_API_Key")

# Global variable for tracking last interaction time
last_interaction_time = 0

def loading_pdf():
    return "Working on the upload. Also, pondering the usefulness of sporks..."

# Inside Chroma mod
def summary(self):
    num_documents = len(self.documents)
    avg_doc_length = sum(len(doc) for doc in self.documents) / num_documents
    return f"Number of documents: {num_documents}, Average document length: {avg_doc_length}"

# PDF summary and query using stuffing
def pdf_changes(pdf_doc):
    try:
        # Initialize loader and load documents
        loader = OnlinePDFLoader(pdf_doc.name)
        documents = loader.load()

        # Split loaded documents into chunks
        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
        texts = text_splitter.split_documents(documents)

        # Define the prompt for summarization
        prompt_template = """Write a concise summary of the following:
        "{text}"
        CONCISE SUMMARY:"""
        prompt = PromptTemplate.from_template(prompt_template)

        # Define the LLM chain with the specified prompt
        llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
        llm_chain = LLMChain(llm=llm, prompt=prompt)

        # Initialize StuffDocumentsChain
        stuff_chain = StuffDocumentsChain(
            llm_chain=llm_chain, document_variable_name="text"
        )

        # Initialize summary variable
        full_summary = ""

        # Iterate through text chunks to summarize
        for i in range(0, len(texts), 2):
            chunk = " ".join([doc.page_content for doc in texts[i:i + 2]])

            # Generate summary using StuffDocumentsChain
            chunk_summary = stuff_chain.run([chunk])
            
            # Add chunk summary to full summary
            full_summary += f"Summary of pages {i+1}-{i+3}:\n{chunk_summary}\n"

        # Other existing logic for Chroma, embeddings, and retrieval
        embeddings = OpenAIEmbeddings()
        global db
        db = Chroma.from_documents(texts, embeddings)

        retriever = db.as_retriever()
        global qa
        qa = ConversationalRetrievalChain.from_llm(
            llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo", max_tokens=-1, n=2),
            retriever=retriever,
            return_source_documents=False
        )

        return f"Ready. Full Summary:\n{full_summary}"

    except Exception as e:
        return f"Error processing PDF: {str(e)}"




def clear_data():
    global qa, db
    qa = None
    db = None
    return "Data cleared"

def add_text(history, text):
    global last_interaction_time
    last_interaction_time = time.time()
    history = history + [(text, None)]
    return history, ""

def bot(history):
    response = infer(history[-1][0], history)
    sentences = '  \n'.join(response.split('. '))
    formatted_response = f"**Bot:**\n\n{sentences}"
    history[-1][1] = formatted_response
    return history

def infer(question, history):
    try:
        res = []
        for human, ai in history[:-1]:
            pair = (human, ai)
            res.append(pair)
    
        chat_history = res
        query = question
        result = qa({"question": query, "chat_history": chat_history, "system": "This is a world-class summarizing AI, be helpful."})
        return result["answer"]
    except Exception as e:
        return f"Error querying chatbot: {str(e)}"

def auto_clear_data():
    global qa, da, last_interaction_time
    if time.time() - last_interaction_time > 1000:
        qa = None
        db = None

def periodic_clear():
    while True:
        auto_clear_data()
        time.sleep(1000)

threading.Thread(target=periodic_clear).start()

css = """
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
"""

title = """
<div style="text-align: center;max-width: 700px;">
    <h1>CauseWriter Chat with PDF • OpenAI</h1>
    <p style="text-align: center;">Upload a .PDF from your computer, click the "Load PDF to LangChain" button, <br />
    when everything is ready, you can start asking questions about the pdf. <br />
    This version is set to store chat history and uses OpenAI as LLM.</p>
</div>
"""

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML(title)
        
        with gr.Column():
            pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
            with gr.Row():
                langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
                load_pdf = gr.Button("Convert PDF to Magic AI language")
                clear_btn = gr.Button("Clear Data")
        
        chatbot = gr.Chatbot([], elem_id="chatbot").style(height=450)
        question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter")
        submit_btn = gr.Button("Send Message")

    load_pdf.click(loading_pdf, None, langchain_status, queue=False)
    load_pdf.click(pdf_changes, inputs=[pdf_doc], outputs=[langchain_status], queue=False)
    clear_btn.click(clear_data, outputs=[langchain_status], queue=False)
    question.submit(add_text, [chatbot, question], [chatbot, question]).then(
        bot, chatbot, chatbot
    )
    submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(
        bot, chatbot, chatbot
    )

demo.launch()