Pavan178 commited on
Commit
5099842
·
verified ·
1 Parent(s): 58027e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -101
app.py CHANGED
@@ -1,140 +1,107 @@
1
  import os
2
  import gradio as gr
3
- import logging
4
  from langchain.document_loaders import PyPDFLoader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain.embeddings import OpenAIEmbeddings
7
  from langchain.vectorstores import FAISS
 
8
  from langchain.chat_models import ChatOpenAI
9
- from langchain.chains import LLMChain
10
  from langchain.memory import ConversationBufferMemory
11
- from langchain.prompts import PromptTemplate
12
- from PyPDF2 import PdfReader
13
- import spaces
14
 
 
15
 
16
- class ContextAwareResponseGenerator:
17
- def __init__(self, llm):
18
- self.llm = llm
19
- self.response_prompt = PromptTemplate(
20
- input_variables=['context', 'query', 'chat_history'],
21
- template="""Given the context, query, and chat history, generate the best response that is clear and helpful. Use structured responses in various formats such as Paragraphs, Headlines and subtexts, bullet points, Sections.
22
 
23
- Context: {context}
24
- Query: {query}
25
- Chat History: {chat_history}
26
 
27
- Choose the most appropriate response structure and generate the response directly, without explicit guidance on which format to use. Your response should be based on the query and context provided."""
28
- )
29
- self.response_chain = LLMChain(llm=self.llm, prompt=self.response_prompt)
30
- @spaces.GPU
31
- def generate_response(self, context, query, chat_history=''):
32
- try:
33
- # Generate the response content with structure handled by the LLM itself
34
- response = self.response_chain.run({
35
- 'context': context,
36
- 'query': query,
37
- 'chat_history': chat_history or "No previous context"
38
- })
39
- return response.strip() # LLM decides on the structure
40
- except Exception as e:
41
- logging.error(f"Response generation error: {e}")
42
- return self._default_response(query)
43
-
44
- def _default_response(self, query):
45
- return f"I couldn't generate a response for: {query}"
46
 
47
  class AdvancedPdfChatbot:
48
- @spaces.GPU
49
  def __init__(self, openai_api_key):
50
  os.environ["OPENAI_API_KEY"] = openai_api_key
51
- self.llm = ChatOpenAI(temperature=0.2, model_name='gpt-4o')
52
-
53
  self.embeddings = OpenAIEmbeddings()
54
  self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
 
55
 
56
  self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
57
- self.response_generator = ContextAwareResponseGenerator(self.llm)
58
-
59
- self.db = None
60
- self.document_context = ""
61
-
62
- @spaces.GPU
 
 
 
63
  def load_and_process_pdf(self, pdf_path):
64
- try:
65
- reader = PdfReader(pdf_path)
66
- metadata = {
67
- "title": reader.metadata.get("/Title", "Untitled"),
68
- "author": reader.metadata.get("/Author", "Unknown")
69
- }
70
-
71
- loader = PyPDFLoader(pdf_path)
72
- documents = loader.load()
73
- texts = self.text_splitter.split_documents(documents)
74
-
75
- self.db = FAISS.from_documents(texts[:50], self.embeddings)
76
- self.document_context = f"Document: {metadata['title']} by {metadata['author']}"
77
-
78
- return True
79
- except Exception as e:
80
- logging.error(f"PDF processing error: {e}")
81
- return False
82
-
83
- @spaces.GPU
84
- def chat(self, query, is_new_question=False):
85
- if not self.db:
86
- return "Please upload a PDF first."
87
-
88
- # Retrieve chat history
89
- chat_history = self.memory.load_memory_variables({}).get('chat_history', [])
90
-
91
- # Reset chat history for new questions
92
- if is_new_question:
93
- chat_history = [] # For new questions, reset the chat history
94
-
95
- # Generate context-aware response
96
- response = self.response_generator.generate_response(
97
- context=self.document_context,
98
- query=query,
99
- chat_history=str(chat_history)
100
  )
101
-
102
- # Store conversation in memory
103
- self.memory.save_context({"input": query}, {"output": response})
104
-
105
- return response
106
 
107
- # Gradio Interface
108
- pdf_chatbot = AdvancedPdfChatbot(os.environ.get("OPENAI_API_KEY"))
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  def upload_pdf(pdf_file):
111
- if not pdf_file:
112
- return "Upload a PDF file."
113
- file_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file
114
- return "PDF processed successfully" if pdf_chatbot.load_and_process_pdf(file_path) else "Processing failed"
 
115
 
116
  def respond(message, history):
117
- try:
118
- is_new_question = len(history) == 0 # If history is empty, it's a new question
119
- bot_message = pdf_chatbot.chat(message, is_new_question)
120
- history.append((message, bot_message))
121
- return "", history
122
- except Exception as e:
123
- return f"Error: {e}", history
124
-
125
- # Gradio UI
 
 
 
 
126
  with gr.Blocks() as demo:
127
- gr.Markdown("# Advanced PDF Chatbot")
 
128
  with gr.Row():
129
  pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
130
  upload_button = gr.Button("Process PDF")
131
 
132
  upload_status = gr.Textbox(label="Upload Status")
133
  upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
134
-
 
135
  chatbot_interface = gr.Chatbot()
136
- msg = gr.Textbox(placeholder="Enter your query...")
 
 
137
  msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])
 
 
138
 
139
  if __name__ == "__main__":
140
  demo.launch()
 
1
  import os
2
  import gradio as gr
 
3
  from langchain.document_loaders import PyPDFLoader
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain.embeddings import OpenAIEmbeddings
6
  from langchain.vectorstores import FAISS
7
+ from langchain.chains import ConversationalRetrievalChain
8
  from langchain.chat_models import ChatOpenAI
 
9
  from langchain.memory import ConversationBufferMemory
 
 
 
10
 
11
+ from langchain.prompts import PromptTemplate
12
 
 
 
 
 
 
 
13
 
 
 
 
14
 
15
+ openai_api_key = os.environ.get("OPENAI_API_KEY")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  class AdvancedPdfChatbot:
 
18
  def __init__(self, openai_api_key):
19
  os.environ["OPENAI_API_KEY"] = openai_api_key
 
 
20
  self.embeddings = OpenAIEmbeddings()
21
  self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
22
+ self.llm = ChatOpenAI(temperature=0,model_name='gpt-4o-mini')
23
 
24
  self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
25
+ self.qa_chain = None
26
+ self.pdf_path = None
27
+ self.template = """
28
+ Imagine you are a chat assistant for knowledge retrieval, specializing in providing detailed information with a deep understanding of context.
29
+ Your goal is to generate responses in a structured format that is both informative and engaging.
30
+
31
+ """
32
+ self.prompt = PromptTemplate(template=self.template, input_variables=["context", "question"])
33
+
34
  def load_and_process_pdf(self, pdf_path):
35
+ loader = PyPDFLoader(pdf_path)
36
+ documents = loader.load()
37
+ texts = self.text_splitter.split_documents(documents)
38
+ self.db = FAISS.from_documents(texts, self.embeddings)
39
+ self.pdf_path = pdf_path
40
+ self.setup_conversation_chain()
41
+
42
+ def setup_conversation_chain(self):
43
+ self.qa_chain = ConversationalRetrievalChain.from_llm(
44
+ self.llm,
45
+ retriever=self.db.as_retriever(),
46
+ memory=self.memory,
47
+ combine_docs_chain_kwargs={"prompt": self.prompt}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  )
 
 
 
 
 
49
 
50
+ def chat(self, query):
51
+ if not self.qa_chain:
52
+ return "Please upload a PDF first."
53
+ result = self.qa_chain({"question": query})
54
+ return result['answer']
55
+
56
+ def get_pdf_path(self):
57
+ # Return the stored PDF path
58
+ if self.pdf_path:
59
+ return self.pdf_path
60
+ else:
61
+ return "No PDF uploaded yet."
62
+
63
+ # Initialize the chatbot
64
+ pdf_chatbot = AdvancedPdfChatbot(openai_api_key)
65
 
66
  def upload_pdf(pdf_file):
67
+ if pdf_file is None:
68
+ return "Please upload a PDF file."
69
+ file_path = pdf_file.name
70
+ pdf_chatbot.load_and_process_pdf(file_path)
71
+ return file_path
72
 
73
  def respond(message, history):
74
+ bot_message = pdf_chatbot.chat(message)
75
+ history.append((message, bot_message))
76
+ return "", history
77
+
78
+ def clear_chatbot():
79
+ pdf_chatbot.memory.clear()
80
+ return []
81
+
82
+ def get_pdf_path():
83
+ # Call the method to return the current PDF path
84
+ return pdf_chatbot.get_pdf_path()
85
+
86
+ # Create the Gradio interface
87
  with gr.Blocks() as demo:
88
+ gr.Markdown("# PDF Chatbot")
89
+
90
  with gr.Row():
91
  pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
92
  upload_button = gr.Button("Process PDF")
93
 
94
  upload_status = gr.Textbox(label="Upload Status")
95
  upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
96
+ path_button = gr.Button("Get PDF Path")
97
+ pdf_path_display = gr.Textbox(label="Current PDF Path")
98
  chatbot_interface = gr.Chatbot()
99
+ msg = gr.Textbox()
100
+ clear = gr.Button("Clear")
101
+
102
  msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])
103
+ clear.click(clear_chatbot, outputs=[chatbot_interface])
104
+ path_button.click(get_pdf_path, outputs=[pdf_path_display])
105
 
106
  if __name__ == "__main__":
107
  demo.launch()