Pavan178 commited on
Commit
58bf31d
·
verified ·
1 Parent(s): 75fd4bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -133
app.py CHANGED
@@ -11,147 +11,113 @@ from langchain.memory import ConversationBufferMemory
11
  from langchain.prompts import PromptTemplate
12
  from PyPDF2 import PdfReader
13
 
14
- logging.basicConfig(level=logging.INFO)
15
- logger = logging.getLogger(__name__)
16
-
17
- class ResponseStructureSelector:
18
  def __init__(self, llm):
19
  self.llm = llm
20
- self.structure_prompt = PromptTemplate(
21
- input_variables=['context', 'query'],
22
- template="""Analyze the context and query to determine the most appropriate response structure:
 
23
  Context: {context}
24
  Query: {query}
 
25
 
26
- Select the optimal response format:
27
- 1. Markdown with bullet points and headlines
28
- 2. Concise paragraph with key insights
29
- 3. Numbered list with detailed explanations
30
- 4. Technical breakdown with subheadings
31
- 5. Quick summary with critical points
32
 
33
- Choose the number (1-5) of the most suitable format:"""
34
  )
35
- self.structure_chain = LLMChain(llm=self.llm, prompt=self.structure_prompt)
36
 
37
- def select_structure(self, context, query):
38
  try:
39
- structure_choice = self.structure_chain.run({'context': context, 'query': query})
40
- return int(structure_choice.strip())
41
- except:
42
- return 1 # Default to Markdown structure
43
-
44
- class QueryRefiner:
45
- def __init__(self, llm):
46
- self.refinement_llm = llm
47
- self.refinement_prompt = PromptTemplate(
48
- input_variables=['query', 'context'],
49
- template="""Refine query for clarity and precision:
50
- Original Query: {query}
51
- Document Context: {context}
52
- Refined, Focused Query:"""
53
- )
54
- self.refinement_chain = LLMChain(llm=self.refinement_llm, prompt=self.refinement_prompt)
55
-
56
- def refine_query(self, original_query, context_hints=''):
57
- try:
58
- return self.refinement_chain.run({
59
- 'query': original_query,
60
- 'context': context_hints or "General document"
61
- }).strip()
62
  except Exception as e:
63
- logger.error(f"Query refinement error: {e}")
64
- return original_query
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
  class AdvancedPdfChatbot:
67
  def __init__(self, openai_api_key):
68
  os.environ["OPENAI_API_KEY"] = openai_api_key
69
- self.llm = ChatOpenAI(temperature=0, model_name='gpt-4o', max_tokens=1000)
70
 
71
  self.embeddings = OpenAIEmbeddings()
72
- self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
73
 
74
  self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
75
- self.query_refiner = QueryRefiner(self.llm)
76
- self.response_selector = ResponseStructureSelector(self.llm)
77
 
78
  self.db = None
79
- self.chain = None
80
- self.document_metadata = {}
81
-
82
- def _create_response_prompt(self, structure_choice):
83
- structure_templates = {
84
- 1: """Markdown Response with Structured Insights:
85
- ## {title}
86
- ### Key Highlights
87
- {content}
88
- ### Conclusion
89
- {conclusion}""",
90
- 2: """{title}: {content}. Key Takeaway: {conclusion}""",
91
- 3: """Structured Breakdown:
92
- 1. {title}
93
- - Main Point: {content}
94
- 2. Implications
95
- - {conclusion}""",
96
- 4: """Technical Analysis
97
- ## {title}
98
- ### Core Concept
99
- {content}
100
- ### Technical Implications
101
- {conclusion}""",
102
- 5: """Concise Summary: {title}. Key Points: {content}. Conclusion: {conclusion}."""
103
- }
104
- return PromptTemplate(
105
- template=structure_templates.get(structure_choice, structure_templates[1]),
106
- input_variables=["title", "content", "conclusion"]
107
- )
108
 
109
  def load_and_process_pdf(self, pdf_path):
110
  try:
111
- # Extract PDF metadata
112
  reader = PdfReader(pdf_path)
113
- self.document_metadata = {
114
- "title": reader.metadata.get("/Title", "Untitled Document"),
115
  "author": reader.metadata.get("/Author", "Unknown")
116
  }
117
-
118
- # Load and process PDF
119
  loader = PyPDFLoader(pdf_path)
120
  documents = loader.load()
121
  texts = self.text_splitter.split_documents(documents)
122
 
123
- # Create vector store with fewer documents to improve performance
124
- self.db = FAISS.from_documents(texts[:30], self.embeddings)
125
-
126
- # Setup conversational chain
127
- self.chain = ConversationalRetrievalChain.from_llm(
128
- llm=self.llm,
129
- retriever=self.db.as_retriever(search_kwargs={"k": 3}),
130
- memory=self.memory
131
- )
132
 
133
  return True
134
  except Exception as e:
135
- logger.error(f"PDF processing error: {e}")
136
  return False
137
 
138
  def chat(self, query):
139
- if not self.chain:
140
- return "Upload a PDF first."
141
 
142
- # Refine query
143
- context = f"Document: {self.document_metadata.get('title', 'Unknown')}"
144
- refined_query = self.query_refiner.refine_query(query, context)
145
 
146
- # Select response structure
147
- structure_choice = self.response_selector.select_structure(context, refined_query)
 
 
 
 
148
 
149
- # Perform retrieval and answer generation
150
- result = self.chain({"question": refined_query})
151
 
152
- return result['answer']
153
 
154
- # Gradio Interface (remains mostly the same)
155
  pdf_chatbot = AdvancedPdfChatbot(os.environ.get("OPENAI_API_KEY"))
156
 
157
  def upload_pdf(pdf_file):
@@ -168,36 +134,6 @@ def respond(message, history):
168
  except Exception as e:
169
  return f"Error: {e}", history
170
 
171
-
172
- # Gradio Interface
173
- pdf_chatbot = AdvancedPdfChatbot(os.environ.get("OPENAI_API_KEY"))
174
-
175
- def upload_pdf(pdf_file):
176
- if pdf_file is None:
177
- return "Please upload a PDF file."
178
- file_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file
179
- try:
180
- pdf_chatbot.load_and_process_pdf(file_path)
181
- return f"PDF processed successfully: {file_path}"
182
- except Exception as e:
183
- logger.error(f"PDF processing error: {e}")
184
- return f"Error processing PDF: {str(e)}"
185
-
186
- def respond(message, history):
187
- if not message:
188
- return "", history
189
- try:
190
- bot_message = pdf_chatbot.chat(message)
191
- history.append((message, bot_message))
192
- return "", history
193
- except Exception as e:
194
- logger.error(f"Chat response error: {e}")
195
- return f"Error: {str(e)}", history
196
-
197
- def clear_chatbot():
198
- pdf_chatbot.clear_memory()
199
- return []
200
-
201
  # Gradio UI
202
  with gr.Blocks() as demo:
203
  gr.Markdown("# Advanced PDF Chatbot")
@@ -207,11 +143,10 @@ with gr.Blocks() as demo:
207
 
208
  upload_status = gr.Textbox(label="Upload Status")
209
  upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
 
210
  chatbot_interface = gr.Chatbot()
211
  msg = gr.Textbox(placeholder="Enter your query...")
212
  msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])
213
- clear_button = gr.Button("Clear Conversation")
214
- clear_button.click(clear_chatbot, outputs=[chatbot_interface])
215
 
216
  if __name__ == "__main__":
217
- demo.launch()
 
11
  from langchain.prompts import PromptTemplate
12
  from PyPDF2 import PdfReader
13
 
14
+ class ContextAwareResponseGenerator:
 
 
 
15
  def __init__(self, llm):
16
  self.llm = llm
17
+ self.response_prompt = PromptTemplate(
18
+ input_variables=['context', 'query', 'chat_history'],
19
+ template="""Analyze the context, query, and chat history to generate an optimal response:
20
+
21
  Context: {context}
22
  Query: {query}
23
+ Chat History: {chat_history}
24
 
25
+ Response Structure Selection Criteria:
26
+ 1. Technical academic breakdown
27
+ 2. Concise summary with key points
28
+ 3. Markdown with hierarchical insights
29
+ 4. Narrative explanation
30
+ 5. Comparative analysis
31
 
32
+ Choose the most appropriate response structure (1-5) and generate the response accordingly:"""
33
  )
34
+ self.response_chain = LLMChain(llm=self.llm, prompt=self.response_prompt)
35
 
36
+ def generate_response(self, context, query, chat_history=''):
37
  try:
38
+ # Generate structured response
39
+ response = self.response_chain.run({
40
+ 'context': context,
41
+ 'query': query,
42
+ 'chat_history': chat_history or "No previous context"
43
+ })
44
+
45
+ # Parse the response to extract structure and content
46
+ structure_choice = int(response[0]) if response[0].isdigit() else 1
47
+ response_content = response[1:].strip()
48
+
49
+ return self._format_response(structure_choice, response_content)
 
 
 
 
 
 
 
 
 
 
 
50
  except Exception as e:
51
+ logging.error(f"Response generation error: {e}")
52
+ return self._default_response(query)
53
+
54
+ def _format_response(self, structure_choice, content):
55
+ structures = {
56
+ 1: f"## Technical Breakdown\n{content}",
57
+ 2: f"📍 Key Insights:\n{content}",
58
+ 3: f"### Structured Insights\n{content}",
59
+ 4: f"🔍 Narrative Explanation:\n{content}",
60
+ 5: f"🔬 Comparative Analysis:\n{content}"
61
+ }
62
+ return structures.get(structure_choice, structures[1])
63
+
64
+ def _default_response(self, query):
65
+ return f"I couldn't generate a structured response for: {query}"
66
 
67
  class AdvancedPdfChatbot:
68
  def __init__(self, openai_api_key):
69
  os.environ["OPENAI_API_KEY"] = openai_api_key
70
+ self.llm = ChatOpenAI(temperature=0.2, model_name='gpt-4o')
71
 
72
  self.embeddings = OpenAIEmbeddings()
73
+ self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
74
 
75
  self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
76
+ self.response_generator = ContextAwareResponseGenerator(self.llm)
 
77
 
78
  self.db = None
79
+ self.document_context = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  def load_and_process_pdf(self, pdf_path):
82
  try:
 
83
  reader = PdfReader(pdf_path)
84
+ metadata = {
85
+ "title": reader.metadata.get("/Title", "Untitled"),
86
  "author": reader.metadata.get("/Author", "Unknown")
87
  }
88
+
 
89
  loader = PyPDFLoader(pdf_path)
90
  documents = loader.load()
91
  texts = self.text_splitter.split_documents(documents)
92
 
93
+ self.db = FAISS.from_documents(texts[:50], self.embeddings)
94
+ self.document_context = f"Document: {metadata['title']} by {metadata['author']}"
 
 
 
 
 
 
 
95
 
96
  return True
97
  except Exception as e:
98
+ logging.error(f"PDF processing error: {e}")
99
  return False
100
 
101
  def chat(self, query):
102
+ if not self.db:
103
+ return "Please upload a PDF first."
104
 
105
+ # Retrieve chat history
106
+ chat_history = self.memory.load_memory_variables({}).get('chat_history', [])
 
107
 
108
+ # Generate context-aware response
109
+ response = self.response_generator.generate_response(
110
+ context=self.document_context,
111
+ query=query,
112
+ chat_history=str(chat_history)
113
+ )
114
 
115
+ # Store conversation in memory
116
+ self.memory.save_context({"input": query}, {"output": response})
117
 
118
+ return response
119
 
120
+ # Gradio Interface
121
  pdf_chatbot = AdvancedPdfChatbot(os.environ.get("OPENAI_API_KEY"))
122
 
123
  def upload_pdf(pdf_file):
 
134
  except Exception as e:
135
  return f"Error: {e}", history
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  # Gradio UI
138
  with gr.Blocks() as demo:
139
  gr.Markdown("# Advanced PDF Chatbot")
 
143
 
144
  upload_status = gr.Textbox(label="Upload Status")
145
  upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
146
+
147
  chatbot_interface = gr.Chatbot()
148
  msg = gr.Textbox(placeholder="Enter your query...")
149
  msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])
 
 
150
 
151
  if __name__ == "__main__":
152
+ demo.launch()