File size: 5,884 Bytes
2f0e211
 
 
d05ba12
2f0e211
3e93b01
2f0e211
3e93b01
2f0e211
 
4dcf9b3
 
 
 
 
 
2f0e211
e455307
 
895d964
 
d05ba12
2f0e211
41297e0
2f0e211
4dcf9b3
 
 
 
 
 
a7aa0eb
 
 
4dcf9b3
e455307
d8804c0
2b90e18
 
 
d8804c0
 
2b90e18
 
d8804c0
 
2b90e18
d8804c0
 
 
2b90e18
4dcf9b3
d8804c0
 
2b90e18
d8804c0
2b90e18
d8804c0
9c04c52
a08bac4
4dcf9b3
a08bac4
4dcf9b3
a08bac4
 
2f0e211
d05ba12
 
2f0e211
 
 
 
4dcf9b3
 
 
 
 
 
 
d8804c0
 
895d964
 
00e09c1
4dcf9b3
 
 
 
 
 
 
 
 
 
 
 
 
 
2f0e211
d64fc58
a7aa0eb
 
d64fc58
 
 
a7aa0eb
d64fc58
a7aa0eb
6360179
 
a7aa0eb
d05ba12
a7aa0eb
e455307
2f0e211
 
a7aa0eb
2f0e211
 
b91cab8
2f0e211
4dcf9b3
 
2f0e211
 
a7aa0eb
2f0e211
a7aa0eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f0e211
a7aa0eb
 
 
 
 
 
 
 
df75cae
2f0e211
6360179
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import gradio as gr
import os
import time
import threading
from langchain.document_loaders import OnlinePDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import WebBaseLoader
from langchain.chains.summarize import load_summarize_chain
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain

os.environ['OPENAI_API_KEY'] = os.getenv("Your_API_Key")

# Global variable for tracking last interaction time
last_interaction_time = 0

def loading_pdf():
    return "Working on the upload. Also, pondering the usefulness of sporks..."

# Inside Chroma mod
def summary(self):
    num_documents = len(self.documents)
    avg_doc_length = sum(len(doc) for doc in self.documents) / num_documents
    return f"Number of documents: {num_documents}, Average document length: {avg_doc_length}"

# Gradio state
summary_state = gr.State(initial_value="pending")

# PDF summary and query using stuffing
def pdf_changes(pdf_doc):
    try:
        if pdf_doc is None:
            return "No PDF uploaded."
        
        loader = OnlinePDFLoader(pdf_doc.name)
        documents = loader.load()
        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
        texts = text_splitter.split_documents(documents)
        embeddings = OpenAIEmbeddings()
        global db
        db = Chroma.from_documents(texts, embeddings)
        retriever = db.as_retriever()
        global qa
        qa = ConversationalRetrievalChain.from_llm(
            llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo", max_tokens=-1, n=2),
            retriever=retriever,
            return_source_documents=False
        )
        return "Ready"
    except Exception as e:
        return f"Error loading PDF: {e}"


def clear_data():
    global qa, db
    qa = None
    db = None
    return "Data cleared"

def add_text(history, text):
    global last_interaction_time
    last_interaction_time = time.time()
    history = history + [(text, None)]
    return history, ""

def bot(history):
    global full_summary  
    if 'summary' in history[-1][0].lower():  # Check if the last question asks for a summary
        response = full_summary
        return full_summary
    else:
        response = infer(history[-1][0], history)

    sentences = '  \n'.join(response.split('. '))
    formatted_response = f"**Bot:**\n\n{sentences}"
    history[-1][1] = formatted_response
    return history


def infer(question, history):
    try:
        res = []
        for human, ai in history[:-1]:
            pair = (human, ai)
            res.append(pair)
    
        chat_history = res
        query = question
        result = qa({"question": query, "chat_history": chat_history, "system": "This is a world-class summarizing AI, be helpful."})
        return result["answer"]
    except Exception as e:
        return f"Error querying chatbot: {str(e)}"

def auto_clear_data():
      global qa, db, last_interaction_time
      if time.time() - last_interaction_time > 1000:
        qa = None
        db = None
        print("Data cleared successfully.")  # Logging
        
def periodic_clear():
      while True:
        auto_clear_data()
        time.sleep(1000)
        
threading.Thread(target=periodic_clear).start()
        
css = """
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
"""
        
title = """
<div style="text-align: center;max-width: 700px;">
    <h1>CauseWriter Chat with PDF • OpenAI</h1>
    <p style="text-align: center;">Upload a .PDF from your computer, click the "Load PDF to LangChain" button, <br />
    when everything is ready, you can start asking questions about the pdf. Limit ~11k words. <br />
    This version is set to erase chat history automatically after page timeout and uses OpenAI.</p>
</div>
"""
        
with gr.Blocks(css=css) as demo:
        with gr.Column(elem_id="col-container"):
            gr.HTML(title)
                
            with gr.Column():
                  pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
                  with gr.Row():
                      langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
                      load_pdf = gr.Button("Convert PDF to Magic AI language")
                      clear_btn = gr.Button("Clear Data")
                    
                    # New Textbox to display summary
                  summary_box = gr.Textbox(
                      label="Document Summary",
                      placeholder="Summary will appear here.",
                      interactive=False,
                      rows=5,
                      elem_id="summary_box"  # Set the elem_id to match the state key
                    )
                
            chatbot = gr.Chatbot([], elem_id="chatbot").style(height=450)
            question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter")
            submit_btn = gr.Button("Send Message")
        
            # Step 2 and 3: Put the State object as an input and output
            load_pdf.click(pdf_changes, inputs=[pdf_doc, summary_state], outputs=[langchain_status, summary_state])
            clear_btn.click(clear_data, outputs=[langchain_status])
            question.submit(add_text, [chatbot, question], [chatbot, question]).then(
                  bot, chatbot, chatbot
            )
            submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(
                  bot, chatbot, chatbot
            )
        
demo.launch()