josondev commited on
Commit
fafef14
·
verified ·
1 Parent(s): 87340ea

Upload veryfinal.py

Browse files
Files changed (1) hide show
  1. veryfinal.py +242 -0
veryfinal.py ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, json
2
+ from dotenv import load_dotenv
3
+
4
+ # Load environment variables
5
+ load_dotenv()
6
+
7
+ # Imports
8
+ from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
9
+ from langchain_google_genai import ChatGoogleGenerativeAI
10
+ from langchain_community.tools.tavily_search import TavilySearchResults
11
+ from langchain_community.document_loaders import WikipediaLoader
12
+ from langchain_community.document_loaders import ArxivLoader
13
+ from langchain_community.vectorstores import FAISS
14
+ from langchain_core.messages import SystemMessage, HumanMessage
15
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
16
+ from langchain_core.tools import tool
17
+ from langchain.tools.retriever import create_retriever_tool
18
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
19
+ from langchain_community.document_loaders import JSONLoader
20
+ from langgraph.prebuilt import create_react_agent
21
+ from langgraph.checkpoint.memory import MemorySaver
22
+
23
+ # Define all tools
24
+ @tool
25
+ def multiply(a: int | float, b: int | float) -> int | float:
26
+ """Multiply two numbers.
27
+ Args:
28
+ a: first int | float
29
+ b: second int | float
30
+ """
31
+ return a * b
32
+
33
+ @tool
34
+ def add(a: int | float, b: int | float) -> int | float:
35
+ """Add two numbers.
36
+
37
+ Args:
38
+ a: first int | float
39
+ b: second int | float
40
+ """
41
+ return a + b
42
+
43
+ @tool
44
+ def subtract(a: int | float , b: int | float) -> int | float:
45
+ """Subtract two numbers.
46
+
47
+ Args:
48
+ a: first int | float
49
+ b: second int | float
50
+ """
51
+ return a - b
52
+
53
+ @tool
54
+ def divide(a: int | float, b: int | float) -> int | float:
55
+ """Divide two numbers.
56
+
57
+ Args:
58
+ a: first int | float
59
+ b: second int | float
60
+ """
61
+ if b == 0:
62
+ raise ValueError("Cannot divide by zero.")
63
+ return a / b
64
+
65
+ @tool
66
+ def modulus(a: int | float, b: int | float) -> int | float:
67
+ """Get the modulus of two numbers.
68
+
69
+ Args:
70
+ a: first int | float
71
+ b: second int | float
72
+ """
73
+ return a % b
74
+
75
+ @tool
76
+ def wiki_search(query: str) -> str:
77
+ """Search the wikipedia for a query and return the first paragraph
78
+ args:
79
+ query: the query to search for
80
+ """
81
+ loader = WikipediaLoader(query=query, load_max_docs=1)
82
+ data = loader.load()
83
+ formatted_search_docs = "\n\n---\n\n".join(
84
+ [
85
+ f'\n{doc.page_content}\n'
86
+ for doc in data
87
+ ])
88
+ return formatted_search_docs
89
+
90
+ @tool
91
+ def web_search(query: str) -> str:
92
+ """Search Tavily for a query and return maximum 3 results.
93
+
94
+ Args:
95
+ query: The search query.
96
+ """
97
+ search_docs = TavilySearchResults(max_results=3).invoke(query=query)
98
+ formatted_search_docs = "\n\n---\n\n".join(
99
+ [
100
+ f'\n{doc.get("content", "")}\n'
101
+ for doc in search_docs
102
+ ])
103
+ return formatted_search_docs
104
+
105
+ @tool
106
+ def arxiv_search(query: str) -> str:
107
+ """Search Arxiv for a query and return maximum 3 result.
108
+
109
+ Args:
110
+ query: The search query.
111
+ """
112
+ search_docs = ArxivLoader(query=query, load_max_docs=3).load()
113
+ formatted_search_docs = "\n\n---\n\n".join(
114
+ [
115
+ f'\n{doc.page_content[:1000]}\n'
116
+ for doc in search_docs
117
+ ])
118
+ return formatted_search_docs
119
+
120
+ # Load and process your JSONL data
121
+ jq_schema = """
122
+ {
123
+ page_content: .Question,
124
+ metadata: {
125
+ task_id: .task_id,
126
+ Level: .Level,
127
+ Final_answer: ."Final answer",
128
+ file_name: .file_name,
129
+ Steps: .["Annotator Metadata"].Steps,
130
+ Number_of_steps: .["Annotator Metadata"]["Number of steps"],
131
+ How_long: .["Annotator Metadata"]["How long did this take?"],
132
+ Tools: .["Annotator Metadata"].Tools,
133
+ Number_of_tools: .["Annotator Metadata"]["Number of tools"]
134
+ }
135
+ }
136
+ """
137
+
138
+ # Load documents and create vector database
139
+ json_loader = JSONLoader(file_path="metadata.jsonl", jq_schema=jq_schema, json_lines=True, text_content=False)
140
+ json_docs = json_loader.load()
141
+
142
+ # Split documents
143
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=200)
144
+ json_chunks = text_splitter.split_documents(json_docs)
145
+
146
+ # Create vector database
147
+ database = FAISS.from_documents(json_chunks, NVIDIAEmbeddings())
148
+
149
+ # Initialize LLM
150
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
151
+
152
+ # Create retriever and retriever tool
153
+ retriever = database.as_retriever(search_type="similarity", search_kwargs={"k": 3})
154
+
155
+ retriever_tool = create_retriever_tool(
156
+ retriever=retriever,
157
+ name="question_search",
158
+ description="Search for similar questions and their solutions from the knowledge base."
159
+ )
160
+
161
+ # Combine all tools
162
+ tools = [
163
+ multiply,
164
+ add,
165
+ subtract,
166
+ divide,
167
+ modulus,
168
+ wiki_search,
169
+ web_search,
170
+ arxiv_search,
171
+ retriever_tool
172
+ ]
173
+
174
+ # Create memory for conversation
175
+ memory = MemorySaver()
176
+
177
+ # Create the agent
178
+ agent_executor = create_react_agent(
179
+ model=llm,
180
+ tools=tools,
181
+ checkpointer=memory
182
+ )
183
+
184
+ # Function to run the agent
185
+ def run_agent(query, thread_id="conversation_1"):
186
+ """Run the agent with a query"""
187
+ config = {"configurable": {"thread_id": thread_id}}
188
+
189
+ system_msg = SystemMessage(content='''You are a helpful assistant tasked with answering questions using a set of tools.
190
+ Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
191
+ FINAL ANSWER: [YOUR FINAL ANSWER].
192
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
193
+ Your answer should only start with "FINAL ANSWER: ", then follows with the answer.''')
194
+
195
+ user_msg = HumanMessage(content=query)
196
+
197
+ print(f"User: {query}")
198
+ print("\nAgent:")
199
+
200
+ for step in agent_executor.stream(
201
+ {"messages": [system_msg, user_msg]},
202
+ config,
203
+ stream_mode="values"
204
+ ):
205
+ step["messages"][-1].pretty_print()
206
+
207
+ # Function to run agent with error handling
208
+ def robust_agent_run(query, thread_id="robust_conversation"):
209
+ """Run agent with error handling"""
210
+ config = {"configurable": {"thread_id": thread_id}}
211
+
212
+ try:
213
+ system_msg = SystemMessage(content='''You are a helpful assistant tasked with answering questions using a set of tools.
214
+ Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
215
+ FINAL ANSWER: [YOUR FINAL ANSWER].
216
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
217
+ Your answer should only start with "FINAL ANSWER: ", then follows with the answer.''')
218
+
219
+ user_msg = HumanMessage(content=query)
220
+ result = []
221
+
222
+ for step in agent_executor.stream(
223
+ {"messages": [system_msg, user_msg]},
224
+ config,
225
+ stream_mode="values"
226
+ ):
227
+ result = step["messages"]
228
+
229
+ return result[-1].content if result else "No response generated"
230
+
231
+ except Exception as e:
232
+ return f"Error occurred: {str(e)}"
233
+
234
+ # Main function
235
+ def main(query: str) -> str:
236
+ """Main function to run the agent"""
237
+ return(robust_agent_run(query))
238
+
239
+
240
+
241
+ # Or use the interactive version
242
+ # run_agent("What is 25 * 4 + 10?")