Pavan178 commited on
Commit
6a6fbcd
·
verified ·
1 Parent(s): 886bfa8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -18
app.py CHANGED
@@ -5,10 +5,44 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain.embeddings import OpenAIEmbeddings
6
  from langchain.vectorstores import FAISS
7
  from langchain.chat_models import ChatOpenAI
8
- from langchain.chains import ConversationalRetrievalChain
9
  from langchain.memory import ConversationBufferMemory
10
  from langchain.prompts import PromptTemplate
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  class AdvancedPdfChatbot:
13
  def __init__(self, openai_api_key):
14
  os.environ["OPENAI_API_KEY"] = openai_api_key
@@ -17,21 +51,18 @@ class AdvancedPdfChatbot:
17
  self.llm = ChatOpenAI(temperature=0, model_name='gpt-4')
18
 
19
  self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
 
20
  self.db = None
21
  self.chain = None
22
 
23
- self.template = """
24
- You are a study partner assistant helping students analyze PDF documents.
25
-
26
- Answer the question based only on the most recent provided resources.
27
- Provide the most relevant and concise answer possible. Give a structured response in parts when needed according to complexity of the question and details needed, have headlines or bullet points only when necessary
28
-
29
- Context: {context}
30
- Question: {question}
31
- Answer:
32
- """
33
  self.qa_prompt = PromptTemplate(
34
- template=self.template,
 
 
 
 
 
 
35
  input_variables=["context", "question"]
36
  )
37
 
@@ -43,7 +74,7 @@ class AdvancedPdfChatbot:
43
 
44
  self.chain = ConversationalRetrievalChain.from_llm(
45
  llm=self.llm,
46
- retriever=self.db.as_retriever(),
47
  memory=self.memory,
48
  combine_docs_chain_kwargs={"prompt": self.qa_prompt}
49
  )
@@ -52,13 +83,26 @@ class AdvancedPdfChatbot:
52
  if not self.chain:
53
  return "Please upload a PDF first."
54
 
55
- result = self.chain({"question": query})
 
 
 
56
  return result['answer']
57
 
 
 
 
 
 
 
 
 
 
 
58
  def clear_memory(self):
59
  self.memory.clear()
60
 
61
- # Gradio interface setup remains mostly the same
62
  pdf_chatbot = AdvancedPdfChatbot(os.environ.get("OPENAI_API_KEY"))
63
 
64
  def upload_pdf(pdf_file):
@@ -85,9 +129,9 @@ def clear_chatbot():
85
  pdf_chatbot.clear_memory()
86
  return []
87
 
88
- # Gradio interface
89
  with gr.Blocks() as demo:
90
- gr.Markdown("# PDF Chatbot")
91
 
92
  with gr.Row():
93
  pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
@@ -95,9 +139,13 @@ with gr.Blocks() as demo:
95
 
96
  upload_status = gr.Textbox(label="Upload Status")
97
  upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
 
98
  chatbot_interface = gr.Chatbot()
99
- msg = gr.Textbox()
100
  msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])
 
 
 
101
 
102
  if __name__ == "__main__":
103
  demo.launch()
 
5
  from langchain.embeddings import OpenAIEmbeddings
6
  from langchain.vectorstores import FAISS
7
  from langchain.chat_models import ChatOpenAI
8
+ from langchain.chains import ConversationalRetrievalChain, LLMChain
9
  from langchain.memory import ConversationBufferMemory
10
  from langchain.prompts import PromptTemplate
11
 
12
+ class QueryRefiner:
13
+ def __init__(self):
14
+ self.refinement_llm = ChatOpenAI(temperature=0.2, model_name='gpt-3.5-turbo')
15
+ self.refinement_prompt = PromptTemplate(
16
+ input_variables=['query', 'context'],
17
+ template="""Refine and enhance the following query for maximum clarity and precision:
18
+
19
+ Original Query: {query}
20
+ Document Context: {context}
21
+
22
+ Enhanced Query Requirements:
23
+ - Clarify any ambiguous terms
24
+ - Add specific context-driven details
25
+ - Ensure precise information retrieval
26
+ - Restructure for optimal comprehension
27
+
28
+ Refined Query:"""
29
+ )
30
+ self.refinement_chain = LLMChain(
31
+ llm=self.refinement_llm,
32
+ prompt=self.refinement_prompt
33
+ )
34
+
35
+ def refine_query(self, original_query, context_hints=''):
36
+ try:
37
+ refined_query = self.refinement_chain.run({
38
+ 'query': original_query,
39
+ 'context': context_hints or "General academic document"
40
+ })
41
+ return refined_query.strip()
42
+ except Exception as e:
43
+ print(f"Query refinement error: {e}")
44
+ return original_query
45
+
46
  class AdvancedPdfChatbot:
47
  def __init__(self, openai_api_key):
48
  os.environ["OPENAI_API_KEY"] = openai_api_key
 
51
  self.llm = ChatOpenAI(temperature=0, model_name='gpt-4')
52
 
53
  self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
54
+ self.query_refiner = QueryRefiner()
55
  self.db = None
56
  self.chain = None
57
 
 
 
 
 
 
 
 
 
 
 
58
  self.qa_prompt = PromptTemplate(
59
+ template="""You are an expert academic assistant analyzing a document.
60
+
61
+ Context: {context}
62
+ Question: {question}
63
+
64
+ Provide a comprehensive, precise answer based strictly on the document's content.
65
+ If the answer isn't directly available, explain why.""",
66
  input_variables=["context", "question"]
67
  )
68
 
 
74
 
75
  self.chain = ConversationalRetrievalChain.from_llm(
76
  llm=self.llm,
77
+ retriever=self.db.as_retriever(search_kwargs={"k": 3}),
78
  memory=self.memory,
79
  combine_docs_chain_kwargs={"prompt": self.qa_prompt}
80
  )
 
83
  if not self.chain:
84
  return "Please upload a PDF first."
85
 
86
+ context_hints = self._extract_document_type()
87
+ refined_query = self.query_refiner.refine_query(query, context_hints)
88
+
89
+ result = self.chain({"question": refined_query})
90
  return result['answer']
91
 
92
+ def _extract_document_type(self):
93
+ """Extract basic document characteristics"""
94
+ if not self.db:
95
+ return ""
96
+ try:
97
+ first_doc = list(self.db.docstore._dict.values())[0].page_content[:500]
98
+ return f"Document appears to cover: {first_doc[:100]}..."
99
+ except:
100
+ return "Academic/technical document"
101
+
102
  def clear_memory(self):
103
  self.memory.clear()
104
 
105
+ # Gradio Interface
106
  pdf_chatbot = AdvancedPdfChatbot(os.environ.get("OPENAI_API_KEY"))
107
 
108
  def upload_pdf(pdf_file):
 
129
  pdf_chatbot.clear_memory()
130
  return []
131
 
132
+ # Gradio UI
133
  with gr.Blocks() as demo:
134
+ gr.Markdown("# Advanced PDF Chatbot")
135
 
136
  with gr.Row():
137
  pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
 
139
 
140
  upload_status = gr.Textbox(label="Upload Status")
141
  upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
142
+
143
  chatbot_interface = gr.Chatbot()
144
+ msg = gr.Textbox(placeholder="Enter your query...")
145
  msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])
146
+
147
+ clear_button = gr.Button("Clear Conversation")
148
+ clear_button.click(clear_chatbot, outputs=[chatbot_interface])
149
 
150
  if __name__ == "__main__":
151
  demo.launch()