Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -12,6 +12,18 @@ from pinecone.grpc import PineconeGRPC as Pinecone
|
|
12 |
from pinecone import ServerlessSpec
|
13 |
from langchain_pinecone import PineconeVectorStore
|
14 |
from datetime import datetime
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
# OpenAI API key
|
17 |
openai_api_key = os.getenv("OPENAI_API_KEY")
|
@@ -26,6 +38,104 @@ index_name = "italy-kg"
|
|
26 |
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)
|
27 |
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
|
31 |
|
@@ -72,22 +182,19 @@ def process_pdf(pdf_file, uploaded_documents):
|
|
72 |
|
73 |
# Gradio Interface
|
74 |
with gr.Blocks() as demo:
|
75 |
-
gr.Markdown("# PDF Uploader to Pinecone with
|
76 |
|
77 |
-
# File upload component
|
78 |
with gr.Column():
|
79 |
file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
|
80 |
-
# Button to trigger processing
|
81 |
process_button = gr.Button("Process PDF and Upload")
|
82 |
-
|
83 |
-
# Dataframe to display uploaded document records
|
84 |
document_table = gr.Dataframe(headers=["Document Name", "Upload Time", "Chunks", "Pinecone Index"], interactive=False)
|
85 |
-
|
86 |
-
# Output textbox for results
|
87 |
output_textbox = gr.Textbox(label="Result")
|
88 |
-
|
89 |
-
|
|
|
|
|
|
|
90 |
process_button.click(fn=process_pdf, inputs=[file_input, gr.State([])], outputs=[document_table, output_textbox])
|
|
|
91 |
|
92 |
-
demo.queue()
|
93 |
demo.launch(show_error=True)
|
|
|
12 |
from pinecone import ServerlessSpec
|
13 |
from langchain_pinecone import PineconeVectorStore
|
14 |
from datetime import datetime
|
15 |
+
import os
|
16 |
+
from langchain.document_loaders import PyPDFLoader
|
17 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
18 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
19 |
+
from langchain.vectorstores import Pinecone
|
20 |
+
from typing import TypedDict,List
|
21 |
+
from langgraph.graph import StateGraph
|
22 |
+
from langgraph.prebuilt import ToolNode
|
23 |
+
from langchain.schema import Document
|
24 |
+
from langchain.prompts import PromptTemplate
|
25 |
+
from langchain.tools import Tool
|
26 |
+
from langchain.llms import OpenAI
|
27 |
|
28 |
# OpenAI API key
|
29 |
openai_api_key = os.getenv("OPENAI_API_KEY")
|
|
|
38 |
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)
|
39 |
|
40 |
|
41 |
+
# Tool functions
|
42 |
+
def search_vector_db(query: str, k: int = 3) -> List[Document]:
|
43 |
+
docs = vectorstore.similarity_search(query, k=k)
|
44 |
+
return docs
|
45 |
+
|
46 |
+
def expand_query(query: str) -> str:
|
47 |
+
return query
|
48 |
+
|
49 |
+
def summarize_context(context: str) -> str:
|
50 |
+
prompt = PromptTemplate(template="""Summarize the following Context to provide a concise overview: {context}""")
|
51 |
+
summary = llm(prompt.format(context=context))
|
52 |
+
return summary.strip()
|
53 |
+
|
54 |
+
def generate_response(context: str, query: str) -> str:
|
55 |
+
prompt = PromptTemplate(template="""Question: {question}\nContext: {context}\nAnswer:""")
|
56 |
+
formatted_prompt = prompt.format(context=context, question=query)
|
57 |
+
response = llm(formatted_prompt)
|
58 |
+
return response.strip()
|
59 |
+
|
60 |
+
# Tool objects
|
61 |
+
expand_tool = Tool(
|
62 |
+
name="Expand Query",
|
63 |
+
func=expand_query,
|
64 |
+
description="Enhance the query with additional terms or context"
|
65 |
+
)
|
66 |
+
|
67 |
+
summarize_tool = Tool(
|
68 |
+
name="Summarize Context",
|
69 |
+
func=summarize_context,
|
70 |
+
description="Summarize the context to provide a concise overview"
|
71 |
+
)
|
72 |
+
|
73 |
+
search_tool = Tool(
|
74 |
+
name="Search Vector Database",
|
75 |
+
func=search_vector_db,
|
76 |
+
description="Search the vector database for relevant information"
|
77 |
+
)
|
78 |
+
|
79 |
+
generate_tool = Tool(
|
80 |
+
name="Generate Response",
|
81 |
+
func=generate_response,
|
82 |
+
description="Generate a response based on the context and query"
|
83 |
+
)
|
84 |
+
|
85 |
+
# State for the graph
|
86 |
+
class State(TypedDict):
|
87 |
+
question: str
|
88 |
+
context: List[Document]
|
89 |
+
response: str
|
90 |
+
expanded_query: str
|
91 |
+
summarized_context: str
|
92 |
+
|
93 |
+
# Workflow node definitions
|
94 |
+
def expand(state: State) -> State:
|
95 |
+
state["expanded_query"] = expand_tool.func(state["question"]) # Expand the query
|
96 |
+
return state
|
97 |
+
|
98 |
+
def search(state: State) -> State:
|
99 |
+
results = search_tool.func(state["expanded_query"]) # Search using the expanded query
|
100 |
+
state["context"] = results
|
101 |
+
print(f"Retrieved Documents: {[doc.page_content[:100] for doc in results]}")
|
102 |
+
return state
|
103 |
+
|
104 |
+
def summarize(state: State) -> State:
|
105 |
+
context = " ".join(doc.page_content for doc in state["context"]) if state["context"] else ""
|
106 |
+
state["summarized_context"] = summarize_tool.func(context)
|
107 |
+
print(f"Summarized Context: {state['summarized_context']}")
|
108 |
+
return state
|
109 |
+
|
110 |
+
def generate(state: State) -> State:
|
111 |
+
response = generate_tool.func(state["summarized_context"], state["question"])
|
112 |
+
state["response"] = response
|
113 |
+
print(f"Generated Response: {state['response']}")
|
114 |
+
return state
|
115 |
+
|
116 |
+
# Workflow graph
|
117 |
+
workflow = StateGraph(State)
|
118 |
+
|
119 |
+
workflow.add_node("expand", expand)
|
120 |
+
workflow.add_node("search", search)
|
121 |
+
workflow.add_node("summarize", summarize)
|
122 |
+
workflow.add_node("generate", generate)
|
123 |
+
|
124 |
+
workflow.set_entry_point("expand")
|
125 |
+
workflow.add_edge("expand", "search")
|
126 |
+
workflow.add_edge("search", "summarize")
|
127 |
+
workflow.add_edge("summarize", "generate")
|
128 |
+
workflow.set_finish_point("generate")
|
129 |
+
|
130 |
+
graph = workflow.compile()
|
131 |
+
|
132 |
+
# Function to run the graph
|
133 |
+
def run_graph(question: str):
|
134 |
+
result = graph.invoke({"question": question})
|
135 |
+
return result["response"]
|
136 |
+
|
137 |
+
|
138 |
+
|
139 |
|
140 |
|
141 |
|
|
|
182 |
|
183 |
# Gradio Interface
|
184 |
with gr.Blocks() as demo:
|
185 |
+
gr.Markdown("# PDF Uploader to Pinecone with Query Response")
|
186 |
|
|
|
187 |
with gr.Column():
|
188 |
file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
|
|
|
189 |
process_button = gr.Button("Process PDF and Upload")
|
|
|
|
|
190 |
document_table = gr.Dataframe(headers=["Document Name", "Upload Time", "Chunks", "Pinecone Index"], interactive=False)
|
|
|
|
|
191 |
output_textbox = gr.Textbox(label="Result")
|
192 |
+
|
193 |
+
query_input = gr.Textbox(label="Enter your query:")
|
194 |
+
query_button = gr.Button("Get Response")
|
195 |
+
response_output = gr.Textbox(label="Response:")
|
196 |
+
|
197 |
process_button.click(fn=process_pdf, inputs=[file_input, gr.State([])], outputs=[document_table, output_textbox])
|
198 |
+
query_button.click(fn=run_graph, inputs=query_input, outputs=response_output)
|
199 |
|
|
|
200 |
demo.launch(show_error=True)
|