import gradio as gr import os import time import threading from langchain.document_loaders import OnlinePDFLoader from langchain.text_splitter import CharacterTextSplitter from langchain.llms import OpenAI from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain.chains import ConversationalRetrievalChain from langchain.chat_models import ChatOpenAI from langchain.document_loaders import WebBaseLoader from langchain.chains.summarize import load_summarize_chain from langchain.chains.llm import LLMChain from langchain.prompts import PromptTemplate from langchain.chains.combine_documents.stuff import StuffDocumentsChain os.environ['OPENAI_API_KEY'] = os.getenv("Your_API_Key") # Global variable for tracking last interaction time last_interaction_time = 0 def loading_pdf(): return "Working on the upload. Also, pondering the usefulness of sporks..." # Inside Chroma mod def summary(self): num_documents = len(self.documents) avg_doc_length = sum(len(doc) for doc in self.documents) / num_documents return f"Number of documents: {num_documents}, Average document length: {avg_doc_length}" # PDF summary and query using stuffing def pdf_changes(pdf_doc): try: # Initialize loader and load documents loader = OnlinePDFLoader(pdf_doc.name) documents = loader.load() # Define the prompt for summarization prompt_template = """Write a concise summary of the following: "{text}" CONCISE SUMMARY:""" prompt = PromptTemplate.from_template(prompt_template) # Define the LLM chain with the specified prompt llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k") llm_chain = LLMChain(llm=llm, prompt=prompt) # Initialize StuffDocumentsChain stuff_chain = StuffDocumentsChain( llm_chain=llm_chain, document_variable_name="text" ) # Generate summary using StuffDocumentsChain global full_summary full_summary = stuff_chain.run(documents) # Other existing logic for Chroma, embeddings, and retrieval embeddings = OpenAIEmbeddings() global db db = Chroma.from_documents(documents, embeddings) retriever = db.as_retriever() global qa qa = ConversationalRetrievalChain.from_llm( llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo", max_tokens=-1, n=2), retriever=retriever, return_source_documents=False ) return f"Ready. Full Summary loaded." except Exception as e: return f"Error processing PDF: {str(e)}" def clear_data(): global qa, db qa = None db = None return "Data cleared" def add_text(history, text): global last_interaction_time last_interaction_time = time.time() history = history + [(text, None)] return history, "" def bot(history): global full_summary if 'summary' in history[-1][0].lower(): # Check if the last question asks for a summary response = full_summary else: response = infer(history[-1][0], history) sentences = ' \n'.join(response.split('. ')) formatted_response = f"**Bot:**\n\n{sentences}" history[-1][1] = formatted_response return history def infer(question, history): try: res = [] for human, ai in history[:-1]: pair = (human, ai) res.append(pair) chat_history = res query = question result = qa({"question": query, "chat_history": chat_history, "system": "This is a world-class summarizing AI, be helpful."}) return result["answer"] except Exception as e: return f"Error querying chatbot: {str(e)}" def auto_clear_data(): global qa, da, last_interaction_time if time.time() - last_interaction_time > 1000: qa = None db = None def periodic_clear(): while True: auto_clear_data() time.sleep(1000) threading.Thread(target=periodic_clear).start() css = """ #col-container {max-width: 700px; margin-left: auto; margin-right: auto;} """ title = """

CauseWriter Chat with PDF • OpenAI

Upload a .PDF from your computer, click the "Load PDF to LangChain" button,
when everything is ready, you can start asking questions about the pdf.
This version is set to store chat history and uses OpenAI as LLM.

""" with gr.Blocks(css=css) as demo: with gr.Column(elem_id="col-container"): gr.HTML(title) with gr.Column(): pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file") with gr.Row(): langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False) load_pdf = gr.Button("Convert PDF to Magic AI language") clear_btn = gr.Button("Clear Data") chatbot = gr.Chatbot([], elem_id="chatbot").style(height=450) question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter") submit_btn = gr.Button("Send Message") load_pdf.click(loading_pdf, None, langchain_status, queue=False) load_pdf.click(pdf_changes, inputs=[pdf_doc], outputs=[langchain_status], queue=False) clear_btn.click(clear_data, outputs=[langchain_status], queue=False) question.submit(add_text, [chatbot, question], [chatbot, question]).then( bot, chatbot, chatbot ) submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then( bot, chatbot, chatbot ) demo.launch()