Update app.py
Browse files
app.py
CHANGED
@@ -1,140 +1,107 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
-
import logging
|
4 |
from langchain.document_loaders import PyPDFLoader
|
5 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
6 |
from langchain.embeddings import OpenAIEmbeddings
|
7 |
from langchain.vectorstores import FAISS
|
|
|
8 |
from langchain.chat_models import ChatOpenAI
|
9 |
-
from langchain.chains import LLMChain
|
10 |
from langchain.memory import ConversationBufferMemory
|
11 |
-
from langchain.prompts import PromptTemplate
|
12 |
-
from PyPDF2 import PdfReader
|
13 |
-
import spaces
|
14 |
|
|
|
15 |
|
16 |
-
class ContextAwareResponseGenerator:
|
17 |
-
def __init__(self, llm):
|
18 |
-
self.llm = llm
|
19 |
-
self.response_prompt = PromptTemplate(
|
20 |
-
input_variables=['context', 'query', 'chat_history'],
|
21 |
-
template="""Given the context, query, and chat history, generate the best response that is clear and helpful. Use structured responses in various formats such as Paragraphs, Headlines and subtexts, bullet points, Sections.
|
22 |
|
23 |
-
Context: {context}
|
24 |
-
Query: {query}
|
25 |
-
Chat History: {chat_history}
|
26 |
|
27 |
-
|
28 |
-
)
|
29 |
-
self.response_chain = LLMChain(llm=self.llm, prompt=self.response_prompt)
|
30 |
-
@spaces.GPU
|
31 |
-
def generate_response(self, context, query, chat_history=''):
|
32 |
-
try:
|
33 |
-
# Generate the response content with structure handled by the LLM itself
|
34 |
-
response = self.response_chain.run({
|
35 |
-
'context': context,
|
36 |
-
'query': query,
|
37 |
-
'chat_history': chat_history or "No previous context"
|
38 |
-
})
|
39 |
-
return response.strip() # LLM decides on the structure
|
40 |
-
except Exception as e:
|
41 |
-
logging.error(f"Response generation error: {e}")
|
42 |
-
return self._default_response(query)
|
43 |
-
|
44 |
-
def _default_response(self, query):
|
45 |
-
return f"I couldn't generate a response for: {query}"
|
46 |
|
47 |
class AdvancedPdfChatbot:
|
48 |
-
@spaces.GPU
|
49 |
def __init__(self, openai_api_key):
|
50 |
os.environ["OPENAI_API_KEY"] = openai_api_key
|
51 |
-
self.llm = ChatOpenAI(temperature=0.2, model_name='gpt-4o')
|
52 |
-
|
53 |
self.embeddings = OpenAIEmbeddings()
|
54 |
self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
|
|
55 |
|
56 |
self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
57 |
-
self.
|
58 |
-
|
59 |
-
self.
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
63 |
def load_and_process_pdf(self, pdf_path):
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
self.
|
76 |
-
|
77 |
-
|
78 |
-
return True
|
79 |
-
except Exception as e:
|
80 |
-
logging.error(f"PDF processing error: {e}")
|
81 |
-
return False
|
82 |
-
|
83 |
-
@spaces.GPU
|
84 |
-
def chat(self, query, is_new_question=False):
|
85 |
-
if not self.db:
|
86 |
-
return "Please upload a PDF first."
|
87 |
-
|
88 |
-
# Retrieve chat history
|
89 |
-
chat_history = self.memory.load_memory_variables({}).get('chat_history', [])
|
90 |
-
|
91 |
-
# Reset chat history for new questions
|
92 |
-
if is_new_question:
|
93 |
-
chat_history = [] # For new questions, reset the chat history
|
94 |
-
|
95 |
-
# Generate context-aware response
|
96 |
-
response = self.response_generator.generate_response(
|
97 |
-
context=self.document_context,
|
98 |
-
query=query,
|
99 |
-
chat_history=str(chat_history)
|
100 |
)
|
101 |
-
|
102 |
-
# Store conversation in memory
|
103 |
-
self.memory.save_context({"input": query}, {"output": response})
|
104 |
-
|
105 |
-
return response
|
106 |
|
107 |
-
|
108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
|
110 |
def upload_pdf(pdf_file):
|
111 |
-
if
|
112 |
-
return "
|
113 |
-
file_path = pdf_file.name
|
114 |
-
|
|
|
115 |
|
116 |
def respond(message, history):
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
|
|
126 |
with gr.Blocks() as demo:
|
127 |
-
gr.Markdown("#
|
|
|
128 |
with gr.Row():
|
129 |
pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
|
130 |
upload_button = gr.Button("Process PDF")
|
131 |
|
132 |
upload_status = gr.Textbox(label="Upload Status")
|
133 |
upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
|
134 |
-
|
|
|
135 |
chatbot_interface = gr.Chatbot()
|
136 |
-
msg = gr.Textbox(
|
|
|
|
|
137 |
msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])
|
|
|
|
|
138 |
|
139 |
if __name__ == "__main__":
|
140 |
demo.launch()
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
|
|
3 |
from langchain.document_loaders import PyPDFLoader
|
4 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
5 |
from langchain.embeddings import OpenAIEmbeddings
|
6 |
from langchain.vectorstores import FAISS
|
7 |
+
from langchain.chains import ConversationalRetrievalChain
|
8 |
from langchain.chat_models import ChatOpenAI
|
|
|
9 |
from langchain.memory import ConversationBufferMemory
|
|
|
|
|
|
|
10 |
|
11 |
+
from langchain.prompts import PromptTemplate
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
|
|
|
|
|
|
14 |
|
15 |
+
openai_api_key = os.environ.get("OPENAI_API_KEY")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
class AdvancedPdfChatbot:
|
|
|
18 |
def __init__(self, openai_api_key):
|
19 |
os.environ["OPENAI_API_KEY"] = openai_api_key
|
|
|
|
|
20 |
self.embeddings = OpenAIEmbeddings()
|
21 |
self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
22 |
+
self.llm = ChatOpenAI(temperature=0,model_name='gpt-4o-mini')
|
23 |
|
24 |
self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
25 |
+
self.qa_chain = None
|
26 |
+
self.pdf_path = None
|
27 |
+
self.template = """
|
28 |
+
Imagine you are a chat assistant for knowledge retrieval, specializing in providing detailed information with a deep understanding of context.
|
29 |
+
Your goal is to generate responses in a structured format that is both informative and engaging.
|
30 |
+
|
31 |
+
"""
|
32 |
+
self.prompt = PromptTemplate(template=self.template, input_variables=["context", "question"])
|
33 |
+
|
34 |
def load_and_process_pdf(self, pdf_path):
|
35 |
+
loader = PyPDFLoader(pdf_path)
|
36 |
+
documents = loader.load()
|
37 |
+
texts = self.text_splitter.split_documents(documents)
|
38 |
+
self.db = FAISS.from_documents(texts, self.embeddings)
|
39 |
+
self.pdf_path = pdf_path
|
40 |
+
self.setup_conversation_chain()
|
41 |
+
|
42 |
+
def setup_conversation_chain(self):
|
43 |
+
self.qa_chain = ConversationalRetrievalChain.from_llm(
|
44 |
+
self.llm,
|
45 |
+
retriever=self.db.as_retriever(),
|
46 |
+
memory=self.memory,
|
47 |
+
combine_docs_chain_kwargs={"prompt": self.prompt}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
)
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
+
def chat(self, query):
|
51 |
+
if not self.qa_chain:
|
52 |
+
return "Please upload a PDF first."
|
53 |
+
result = self.qa_chain({"question": query})
|
54 |
+
return result['answer']
|
55 |
+
|
56 |
+
def get_pdf_path(self):
|
57 |
+
# Return the stored PDF path
|
58 |
+
if self.pdf_path:
|
59 |
+
return self.pdf_path
|
60 |
+
else:
|
61 |
+
return "No PDF uploaded yet."
|
62 |
+
|
63 |
+
# Initialize the chatbot
|
64 |
+
pdf_chatbot = AdvancedPdfChatbot(openai_api_key)
|
65 |
|
66 |
def upload_pdf(pdf_file):
|
67 |
+
if pdf_file is None:
|
68 |
+
return "Please upload a PDF file."
|
69 |
+
file_path = pdf_file.name
|
70 |
+
pdf_chatbot.load_and_process_pdf(file_path)
|
71 |
+
return file_path
|
72 |
|
73 |
def respond(message, history):
|
74 |
+
bot_message = pdf_chatbot.chat(message)
|
75 |
+
history.append((message, bot_message))
|
76 |
+
return "", history
|
77 |
+
|
78 |
+
def clear_chatbot():
|
79 |
+
pdf_chatbot.memory.clear()
|
80 |
+
return []
|
81 |
+
|
82 |
+
def get_pdf_path():
|
83 |
+
# Call the method to return the current PDF path
|
84 |
+
return pdf_chatbot.get_pdf_path()
|
85 |
+
|
86 |
+
# Create the Gradio interface
|
87 |
with gr.Blocks() as demo:
|
88 |
+
gr.Markdown("# PDF Chatbot")
|
89 |
+
|
90 |
with gr.Row():
|
91 |
pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
|
92 |
upload_button = gr.Button("Process PDF")
|
93 |
|
94 |
upload_status = gr.Textbox(label="Upload Status")
|
95 |
upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
|
96 |
+
path_button = gr.Button("Get PDF Path")
|
97 |
+
pdf_path_display = gr.Textbox(label="Current PDF Path")
|
98 |
chatbot_interface = gr.Chatbot()
|
99 |
+
msg = gr.Textbox()
|
100 |
+
clear = gr.Button("Clear")
|
101 |
+
|
102 |
msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])
|
103 |
+
clear.click(clear_chatbot, outputs=[chatbot_interface])
|
104 |
+
path_button.click(get_pdf_path, outputs=[pdf_path_display])
|
105 |
|
106 |
if __name__ == "__main__":
|
107 |
demo.launch()
|