File size: 6,653 Bytes
2f0e211 d05ba12 2f0e211 3e93b01 2f0e211 3e93b01 2f0e211 4dcf9b3 2f0e211 e455307 895d964 d05ba12 2f0e211 41297e0 2f0e211 4dcf9b3 a7aa0eb 4dcf9b3 e455307 d8804c0 4dcf9b3 d8804c0 4dcf9b3 a7aa0eb 4dcf9b3 d8804c0 4dcf9b3 d8804c0 4dcf9b3 d8804c0 d4eee96 4dcf9b3 d8804c0 4dcf9b3 d8804c0 9c04c52 a08bac4 4dcf9b3 a08bac4 4dcf9b3 a08bac4 2f0e211 d05ba12 2f0e211 4dcf9b3 d8804c0 895d964 00e09c1 4dcf9b3 2f0e211 d64fc58 a7aa0eb d64fc58 a7aa0eb d64fc58 a7aa0eb 6360179 a7aa0eb d05ba12 a7aa0eb e455307 2f0e211 a7aa0eb 2f0e211 b91cab8 2f0e211 4dcf9b3 2f0e211 a7aa0eb 2f0e211 a7aa0eb 2f0e211 a7aa0eb df75cae 2f0e211 6360179 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
import gradio as gr
import os
import time
import threading
from langchain.document_loaders import OnlinePDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import WebBaseLoader
from langchain.chains.summarize import load_summarize_chain
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
os.environ['OPENAI_API_KEY'] = os.getenv("Your_API_Key")
# Global variable for tracking last interaction time
last_interaction_time = 0
def loading_pdf():
return "Working on the upload. Also, pondering the usefulness of sporks..."
# Inside Chroma mod
def summary(self):
num_documents = len(self.documents)
avg_doc_length = sum(len(doc) for doc in self.documents) / num_documents
return f"Number of documents: {num_documents}, Average document length: {avg_doc_length}"
# Gradio state
summary_state = gr.State(initial_value="pending")
# PDF summary and query using stuffing
def pdf_changes(pdf_doc):
try:
# Initialize loader and load documents
loader = OnlinePDFLoader(pdf_doc.name)
documents = loader.load()
# Define the prompt for summarization
prompt_template = """Write a concise summary of the following:
"{text}"
CONCISE SUMMARY:"""
prompt = PromptTemplate.from_template(prompt_template)
# Define the LLM chain with the specified prompt
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
llm_chain = LLMChain(llm=llm, prompt=prompt)
# Initialize StuffDocumentsChain
stuff_chain = StuffDocumentsChain(
llm_chain=llm_chain, document_variable_name="text"
)
# Generate summary using StuffDocumentsChain
global full_summary
full_summary = stuff_chain.run(documents)
# Update the state variable
return {summary_state: full_summary}
# Other existing logic for Chroma, embeddings, and retrieval
embeddings = OpenAIEmbeddings()
global db
db = Chroma.from_documents(documents, embeddings)
retriever = db.as_retriever()
global qa
qa = ConversationalRetrievalChain.from_llm(
llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo-16k", max_tokens=-1, n=2),
retriever=retriever,
return_source_documents=False
)
summary_box.set_value(full_summary)
return f"Ready. Full Summary loaded."
except Exception as e:
return f"Error processing PDF: {str(e)}"
def clear_data():
global qa, db
qa = None
db = None
return "Data cleared"
def add_text(history, text):
global last_interaction_time
last_interaction_time = time.time()
history = history + [(text, None)]
return history, ""
def bot(history):
global full_summary
if 'summary' in history[-1][0].lower(): # Check if the last question asks for a summary
response = full_summary
return full_summary
else:
response = infer(history[-1][0], history)
sentences = ' \n'.join(response.split('. '))
formatted_response = f"**Bot:**\n\n{sentences}"
history[-1][1] = formatted_response
return history
def infer(question, history):
try:
res = []
for human, ai in history[:-1]:
pair = (human, ai)
res.append(pair)
chat_history = res
query = question
result = qa({"question": query, "chat_history": chat_history, "system": "This is a world-class summarizing AI, be helpful."})
return result["answer"]
except Exception as e:
return f"Error querying chatbot: {str(e)}"
def auto_clear_data():
global qa, db, last_interaction_time
if time.time() - last_interaction_time > 1000:
qa = None
db = None
print("Data cleared successfully.") # Logging
def periodic_clear():
while True:
auto_clear_data()
time.sleep(1000)
threading.Thread(target=periodic_clear).start()
css = """
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
"""
title = """
<div style="text-align: center;max-width: 700px;">
<h1>CauseWriter Chat with PDF β’ OpenAI</h1>
<p style="text-align: center;">Upload a .PDF from your computer, click the "Load PDF to LangChain" button, <br />
when everything is ready, you can start asking questions about the pdf. Limit ~11k words. <br />
This version is set to erase chat history automatically after page timeout and uses OpenAI.</p>
</div>
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.HTML(title)
with gr.Column():
pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
with gr.Row():
langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
load_pdf = gr.Button("Convert PDF to Magic AI language")
clear_btn = gr.Button("Clear Data")
# New Textbox to display summary
summary_box = gr.Textbox(
label="Document Summary",
placeholder="Summary will appear here.",
interactive=False,
rows=5,
elem_id="summary_box" # Set the elem_id to match the state key
)
chatbot = gr.Chatbot([], elem_id="chatbot").style(height=450)
question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter")
submit_btn = gr.Button("Send Message")
# Step 2 and 3: Put the State object as an input and output
load_pdf.click(pdf_changes, inputs=[pdf_doc, summary_state], outputs=[langchain_status, summary_state])
clear_btn.click(clear_data, outputs=[langchain_status])
question.submit(add_text, [chatbot, question], [chatbot, question]).then(
bot, chatbot, chatbot
)
submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(
bot, chatbot, chatbot
)
demo.launch()
|