Update app.py
Browse files
app.py
CHANGED
@@ -5,10 +5,44 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
5 |
from langchain.embeddings import OpenAIEmbeddings
|
6 |
from langchain.vectorstores import FAISS
|
7 |
from langchain.chat_models import ChatOpenAI
|
8 |
-
from langchain.chains import ConversationalRetrievalChain
|
9 |
from langchain.memory import ConversationBufferMemory
|
10 |
from langchain.prompts import PromptTemplate
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
class AdvancedPdfChatbot:
|
13 |
def __init__(self, openai_api_key):
|
14 |
os.environ["OPENAI_API_KEY"] = openai_api_key
|
@@ -17,21 +51,18 @@ class AdvancedPdfChatbot:
|
|
17 |
self.llm = ChatOpenAI(temperature=0, model_name='gpt-4')
|
18 |
|
19 |
self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
|
|
20 |
self.db = None
|
21 |
self.chain = None
|
22 |
|
23 |
-
self.template = """
|
24 |
-
You are a study partner assistant helping students analyze PDF documents.
|
25 |
-
|
26 |
-
Answer the question based only on the most recent provided resources.
|
27 |
-
Provide the most relevant and concise answer possible. Give a structured response in parts when needed according to complexity of the question and details needed, have headlines or bullet points only when necessary
|
28 |
-
|
29 |
-
Context: {context}
|
30 |
-
Question: {question}
|
31 |
-
Answer:
|
32 |
-
"""
|
33 |
self.qa_prompt = PromptTemplate(
|
34 |
-
template=
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
input_variables=["context", "question"]
|
36 |
)
|
37 |
|
@@ -43,7 +74,7 @@ class AdvancedPdfChatbot:
|
|
43 |
|
44 |
self.chain = ConversationalRetrievalChain.from_llm(
|
45 |
llm=self.llm,
|
46 |
-
retriever=self.db.as_retriever(),
|
47 |
memory=self.memory,
|
48 |
combine_docs_chain_kwargs={"prompt": self.qa_prompt}
|
49 |
)
|
@@ -52,13 +83,26 @@ class AdvancedPdfChatbot:
|
|
52 |
if not self.chain:
|
53 |
return "Please upload a PDF first."
|
54 |
|
55 |
-
|
|
|
|
|
|
|
56 |
return result['answer']
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
def clear_memory(self):
|
59 |
self.memory.clear()
|
60 |
|
61 |
-
# Gradio
|
62 |
pdf_chatbot = AdvancedPdfChatbot(os.environ.get("OPENAI_API_KEY"))
|
63 |
|
64 |
def upload_pdf(pdf_file):
|
@@ -85,9 +129,9 @@ def clear_chatbot():
|
|
85 |
pdf_chatbot.clear_memory()
|
86 |
return []
|
87 |
|
88 |
-
# Gradio
|
89 |
with gr.Blocks() as demo:
|
90 |
-
gr.Markdown("# PDF Chatbot")
|
91 |
|
92 |
with gr.Row():
|
93 |
pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
|
@@ -95,9 +139,13 @@ with gr.Blocks() as demo:
|
|
95 |
|
96 |
upload_status = gr.Textbox(label="Upload Status")
|
97 |
upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
|
|
|
98 |
chatbot_interface = gr.Chatbot()
|
99 |
-
msg = gr.Textbox()
|
100 |
msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])
|
|
|
|
|
|
|
101 |
|
102 |
if __name__ == "__main__":
|
103 |
demo.launch()
|
|
|
5 |
from langchain.embeddings import OpenAIEmbeddings
|
6 |
from langchain.vectorstores import FAISS
|
7 |
from langchain.chat_models import ChatOpenAI
|
8 |
+
from langchain.chains import ConversationalRetrievalChain, LLMChain
|
9 |
from langchain.memory import ConversationBufferMemory
|
10 |
from langchain.prompts import PromptTemplate
|
11 |
|
12 |
+
class QueryRefiner:
|
13 |
+
def __init__(self):
|
14 |
+
self.refinement_llm = ChatOpenAI(temperature=0.2, model_name='gpt-3.5-turbo')
|
15 |
+
self.refinement_prompt = PromptTemplate(
|
16 |
+
input_variables=['query', 'context'],
|
17 |
+
template="""Refine and enhance the following query for maximum clarity and precision:
|
18 |
+
|
19 |
+
Original Query: {query}
|
20 |
+
Document Context: {context}
|
21 |
+
|
22 |
+
Enhanced Query Requirements:
|
23 |
+
- Clarify any ambiguous terms
|
24 |
+
- Add specific context-driven details
|
25 |
+
- Ensure precise information retrieval
|
26 |
+
- Restructure for optimal comprehension
|
27 |
+
|
28 |
+
Refined Query:"""
|
29 |
+
)
|
30 |
+
self.refinement_chain = LLMChain(
|
31 |
+
llm=self.refinement_llm,
|
32 |
+
prompt=self.refinement_prompt
|
33 |
+
)
|
34 |
+
|
35 |
+
def refine_query(self, original_query, context_hints=''):
|
36 |
+
try:
|
37 |
+
refined_query = self.refinement_chain.run({
|
38 |
+
'query': original_query,
|
39 |
+
'context': context_hints or "General academic document"
|
40 |
+
})
|
41 |
+
return refined_query.strip()
|
42 |
+
except Exception as e:
|
43 |
+
print(f"Query refinement error: {e}")
|
44 |
+
return original_query
|
45 |
+
|
46 |
class AdvancedPdfChatbot:
|
47 |
def __init__(self, openai_api_key):
|
48 |
os.environ["OPENAI_API_KEY"] = openai_api_key
|
|
|
51 |
self.llm = ChatOpenAI(temperature=0, model_name='gpt-4')
|
52 |
|
53 |
self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
54 |
+
self.query_refiner = QueryRefiner()
|
55 |
self.db = None
|
56 |
self.chain = None
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
self.qa_prompt = PromptTemplate(
|
59 |
+
template="""You are an expert academic assistant analyzing a document.
|
60 |
+
|
61 |
+
Context: {context}
|
62 |
+
Question: {question}
|
63 |
+
|
64 |
+
Provide a comprehensive, precise answer based strictly on the document's content.
|
65 |
+
If the answer isn't directly available, explain why.""",
|
66 |
input_variables=["context", "question"]
|
67 |
)
|
68 |
|
|
|
74 |
|
75 |
self.chain = ConversationalRetrievalChain.from_llm(
|
76 |
llm=self.llm,
|
77 |
+
retriever=self.db.as_retriever(search_kwargs={"k": 3}),
|
78 |
memory=self.memory,
|
79 |
combine_docs_chain_kwargs={"prompt": self.qa_prompt}
|
80 |
)
|
|
|
83 |
if not self.chain:
|
84 |
return "Please upload a PDF first."
|
85 |
|
86 |
+
context_hints = self._extract_document_type()
|
87 |
+
refined_query = self.query_refiner.refine_query(query, context_hints)
|
88 |
+
|
89 |
+
result = self.chain({"question": refined_query})
|
90 |
return result['answer']
|
91 |
|
92 |
+
def _extract_document_type(self):
|
93 |
+
"""Extract basic document characteristics"""
|
94 |
+
if not self.db:
|
95 |
+
return ""
|
96 |
+
try:
|
97 |
+
first_doc = list(self.db.docstore._dict.values())[0].page_content[:500]
|
98 |
+
return f"Document appears to cover: {first_doc[:100]}..."
|
99 |
+
except:
|
100 |
+
return "Academic/technical document"
|
101 |
+
|
102 |
def clear_memory(self):
|
103 |
self.memory.clear()
|
104 |
|
105 |
+
# Gradio Interface
|
106 |
pdf_chatbot = AdvancedPdfChatbot(os.environ.get("OPENAI_API_KEY"))
|
107 |
|
108 |
def upload_pdf(pdf_file):
|
|
|
129 |
pdf_chatbot.clear_memory()
|
130 |
return []
|
131 |
|
132 |
+
# Gradio UI
|
133 |
with gr.Blocks() as demo:
|
134 |
+
gr.Markdown("# Advanced PDF Chatbot")
|
135 |
|
136 |
with gr.Row():
|
137 |
pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
|
|
|
139 |
|
140 |
upload_status = gr.Textbox(label="Upload Status")
|
141 |
upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
|
142 |
+
|
143 |
chatbot_interface = gr.Chatbot()
|
144 |
+
msg = gr.Textbox(placeholder="Enter your query...")
|
145 |
msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])
|
146 |
+
|
147 |
+
clear_button = gr.Button("Clear Conversation")
|
148 |
+
clear_button.click(clear_chatbot, outputs=[chatbot_interface])
|
149 |
|
150 |
if __name__ == "__main__":
|
151 |
demo.launch()
|