Pijush2023 commited on
Commit
92b0167
·
verified ·
1 Parent(s): f9fb482

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -10
app.py CHANGED
@@ -12,6 +12,18 @@ from pinecone.grpc import PineconeGRPC as Pinecone
12
  from pinecone import ServerlessSpec
13
  from langchain_pinecone import PineconeVectorStore
14
  from datetime import datetime
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  # OpenAI API key
17
  openai_api_key = os.getenv("OPENAI_API_KEY")
@@ -26,6 +38,104 @@ index_name = "italy-kg"
26
  vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)
27
 
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
 
31
 
@@ -72,22 +182,19 @@ def process_pdf(pdf_file, uploaded_documents):
72
 
73
  # Gradio Interface
74
  with gr.Blocks() as demo:
75
- gr.Markdown("# PDF Uploader to Pinecone with Logs")
76
 
77
- # File upload component
78
  with gr.Column():
79
  file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
80
- # Button to trigger processing
81
  process_button = gr.Button("Process PDF and Upload")
82
-
83
- # Dataframe to display uploaded document records
84
  document_table = gr.Dataframe(headers=["Document Name", "Upload Time", "Chunks", "Pinecone Index"], interactive=False)
85
-
86
- # Output textbox for results
87
  output_textbox = gr.Textbox(label="Result")
88
-
89
- # Define button click action
 
 
 
90
  process_button.click(fn=process_pdf, inputs=[file_input, gr.State([])], outputs=[document_table, output_textbox])
 
91
 
92
- demo.queue()
93
  demo.launch(show_error=True)
 
12
  from pinecone import ServerlessSpec
13
  from langchain_pinecone import PineconeVectorStore
14
  from datetime import datetime
15
+ import os
16
+ from langchain.document_loaders import PyPDFLoader
17
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
18
+ from langchain.embeddings.openai import OpenAIEmbeddings
19
+ from langchain.vectorstores import Pinecone
20
+ from typing import TypedDict,List
21
+ from langgraph.graph import StateGraph
22
+ from langgraph.prebuilt import ToolNode
23
+ from langchain.schema import Document
24
+ from langchain.prompts import PromptTemplate
25
+ from langchain.tools import Tool
26
+ from langchain.llms import OpenAI
27
 
28
  # OpenAI API key
29
  openai_api_key = os.getenv("OPENAI_API_KEY")
 
38
  vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)
39
 
40
 
41
+ # Tool functions
42
+ def search_vector_db(query: str, k: int = 3) -> List[Document]:
43
+ docs = vectorstore.similarity_search(query, k=k)
44
+ return docs
45
+
46
+ def expand_query(query: str) -> str:
47
+ return query
48
+
49
+ def summarize_context(context: str) -> str:
50
+ prompt = PromptTemplate(template="""Summarize the following Context to provide a concise overview: {context}""")
51
+ summary = llm(prompt.format(context=context))
52
+ return summary.strip()
53
+
54
+ def generate_response(context: str, query: str) -> str:
55
+ prompt = PromptTemplate(template="""Question: {question}\nContext: {context}\nAnswer:""")
56
+ formatted_prompt = prompt.format(context=context, question=query)
57
+ response = llm(formatted_prompt)
58
+ return response.strip()
59
+
60
+ # Tool objects
61
+ expand_tool = Tool(
62
+ name="Expand Query",
63
+ func=expand_query,
64
+ description="Enhance the query with additional terms or context"
65
+ )
66
+
67
+ summarize_tool = Tool(
68
+ name="Summarize Context",
69
+ func=summarize_context,
70
+ description="Summarize the context to provide a concise overview"
71
+ )
72
+
73
+ search_tool = Tool(
74
+ name="Search Vector Database",
75
+ func=search_vector_db,
76
+ description="Search the vector database for relevant information"
77
+ )
78
+
79
+ generate_tool = Tool(
80
+ name="Generate Response",
81
+ func=generate_response,
82
+ description="Generate a response based on the context and query"
83
+ )
84
+
85
+ # State for the graph
86
+ class State(TypedDict):
87
+ question: str
88
+ context: List[Document]
89
+ response: str
90
+ expanded_query: str
91
+ summarized_context: str
92
+
93
+ # Workflow node definitions
94
+ def expand(state: State) -> State:
95
+ state["expanded_query"] = expand_tool.func(state["question"]) # Expand the query
96
+ return state
97
+
98
+ def search(state: State) -> State:
99
+ results = search_tool.func(state["expanded_query"]) # Search using the expanded query
100
+ state["context"] = results
101
+ print(f"Retrieved Documents: {[doc.page_content[:100] for doc in results]}")
102
+ return state
103
+
104
+ def summarize(state: State) -> State:
105
+ context = " ".join(doc.page_content for doc in state["context"]) if state["context"] else ""
106
+ state["summarized_context"] = summarize_tool.func(context)
107
+ print(f"Summarized Context: {state['summarized_context']}")
108
+ return state
109
+
110
+ def generate(state: State) -> State:
111
+ response = generate_tool.func(state["summarized_context"], state["question"])
112
+ state["response"] = response
113
+ print(f"Generated Response: {state['response']}")
114
+ return state
115
+
116
+ # Workflow graph
117
+ workflow = StateGraph(State)
118
+
119
+ workflow.add_node("expand", expand)
120
+ workflow.add_node("search", search)
121
+ workflow.add_node("summarize", summarize)
122
+ workflow.add_node("generate", generate)
123
+
124
+ workflow.set_entry_point("expand")
125
+ workflow.add_edge("expand", "search")
126
+ workflow.add_edge("search", "summarize")
127
+ workflow.add_edge("summarize", "generate")
128
+ workflow.set_finish_point("generate")
129
+
130
+ graph = workflow.compile()
131
+
132
+ # Function to run the graph
133
+ def run_graph(question: str):
134
+ result = graph.invoke({"question": question})
135
+ return result["response"]
136
+
137
+
138
+
139
 
140
 
141
 
 
182
 
183
  # Gradio Interface
184
  with gr.Blocks() as demo:
185
+ gr.Markdown("# PDF Uploader to Pinecone with Query Response")
186
 
 
187
  with gr.Column():
188
  file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
 
189
  process_button = gr.Button("Process PDF and Upload")
 
 
190
  document_table = gr.Dataframe(headers=["Document Name", "Upload Time", "Chunks", "Pinecone Index"], interactive=False)
 
 
191
  output_textbox = gr.Textbox(label="Result")
192
+
193
+ query_input = gr.Textbox(label="Enter your query:")
194
+ query_button = gr.Button("Get Response")
195
+ response_output = gr.Textbox(label="Response:")
196
+
197
  process_button.click(fn=process_pdf, inputs=[file_input, gr.State([])], outputs=[document_table, output_textbox])
198
+ query_button.click(fn=run_graph, inputs=query_input, outputs=response_output)
199
 
 
200
  demo.launch(show_error=True)