josondev commited on
Commit
1fa6961
·
verified ·
1 Parent(s): 16ce8eb

Update veryfinal.py

Browse files
Files changed (1) hide show
  1. veryfinal.py +242 -242
veryfinal.py CHANGED
@@ -1,242 +1,242 @@
1
- import os, json
2
- from dotenv import load_dotenv
3
-
4
- # Load environment variables
5
- load_dotenv()
6
-
7
- # Imports
8
- from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
9
- from langchain_google_genai import ChatGoogleGenerativeAI
10
- from langchain_community.tools.tavily_search import TavilySearchResults
11
- from langchain_community.document_loaders import WikipediaLoader
12
- from langchain_community.document_loaders import ArxivLoader
13
- from langchain_community.vectorstores import FAISS
14
- from langchain_core.messages import SystemMessage, HumanMessage
15
- from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
16
- from langchain_core.tools import tool
17
- from langchain.tools.retriever import create_retriever_tool
18
- from langchain_text_splitters import RecursiveCharacterTextSplitter
19
- from langchain_community.document_loaders import JSONLoader
20
- from langgraph.prebuilt import create_react_agent
21
- from langgraph.checkpoint.memory import MemorySaver
22
-
23
- # Define all tools
24
- @tool
25
- def multiply(a: int | float, b: int | float) -> int | float:
26
- """Multiply two numbers.
27
- Args:
28
- a: first int | float
29
- b: second int | float
30
- """
31
- return a * b
32
-
33
- @tool
34
- def add(a: int | float, b: int | float) -> int | float:
35
- """Add two numbers.
36
-
37
- Args:
38
- a: first int | float
39
- b: second int | float
40
- """
41
- return a + b
42
-
43
- @tool
44
- def subtract(a: int | float , b: int | float) -> int | float:
45
- """Subtract two numbers.
46
-
47
- Args:
48
- a: first int | float
49
- b: second int | float
50
- """
51
- return a - b
52
-
53
- @tool
54
- def divide(a: int | float, b: int | float) -> int | float:
55
- """Divide two numbers.
56
-
57
- Args:
58
- a: first int | float
59
- b: second int | float
60
- """
61
- if b == 0:
62
- raise ValueError("Cannot divide by zero.")
63
- return a / b
64
-
65
- @tool
66
- def modulus(a: int | float, b: int | float) -> int | float:
67
- """Get the modulus of two numbers.
68
-
69
- Args:
70
- a: first int | float
71
- b: second int | float
72
- """
73
- return a % b
74
-
75
- @tool
76
- def wiki_search(query: str) -> str:
77
- """Search the wikipedia for a query and return the first paragraph
78
- args:
79
- query: the query to search for
80
- """
81
- loader = WikipediaLoader(query=query, load_max_docs=1)
82
- data = loader.load()
83
- formatted_search_docs = "\n\n---\n\n".join(
84
- [
85
- f'\n{doc.page_content}\n'
86
- for doc in data
87
- ])
88
- return formatted_search_docs
89
-
90
- @tool
91
- def web_search(query: str) -> str:
92
- """Search Tavily for a query and return maximum 3 results.
93
-
94
- Args:
95
- query: The search query.
96
- """
97
- search_docs = TavilySearchResults(max_results=3).invoke(query=query)
98
- formatted_search_docs = "\n\n---\n\n".join(
99
- [
100
- f'\n{doc.get("content", "")}\n'
101
- for doc in search_docs
102
- ])
103
- return formatted_search_docs
104
-
105
- @tool
106
- def arxiv_search(query: str) -> str:
107
- """Search Arxiv for a query and return maximum 3 result.
108
-
109
- Args:
110
- query: The search query.
111
- """
112
- search_docs = ArxivLoader(query=query, load_max_docs=3).load()
113
- formatted_search_docs = "\n\n---\n\n".join(
114
- [
115
- f'\n{doc.page_content[:1000]}\n'
116
- for doc in search_docs
117
- ])
118
- return formatted_search_docs
119
-
120
- # Load and process your JSONL data
121
- jq_schema = """
122
- {
123
- page_content: .Question,
124
- metadata: {
125
- task_id: .task_id,
126
- Level: .Level,
127
- Final_answer: ."Final answer",
128
- file_name: .file_name,
129
- Steps: .["Annotator Metadata"].Steps,
130
- Number_of_steps: .["Annotator Metadata"]["Number of steps"],
131
- How_long: .["Annotator Metadata"]["How long did this take?"],
132
- Tools: .["Annotator Metadata"].Tools,
133
- Number_of_tools: .["Annotator Metadata"]["Number of tools"]
134
- }
135
- }
136
- """
137
-
138
- # Load documents and create vector database
139
- json_loader = JSONLoader(file_path="metadata.jsonl", jq_schema=jq_schema, json_lines=True, text_content=False)
140
- json_docs = json_loader.load()
141
-
142
- # Split documents
143
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=200)
144
- json_chunks = text_splitter.split_documents(json_docs)
145
-
146
- # Create vector database
147
- database = FAISS.from_documents(json_chunks, NVIDIAEmbeddings())
148
-
149
- # Initialize LLM
150
- llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
151
-
152
- # Create retriever and retriever tool
153
- retriever = database.as_retriever(search_type="similarity", search_kwargs={"k": 3})
154
-
155
- retriever_tool = create_retriever_tool(
156
- retriever=retriever,
157
- name="question_search",
158
- description="Search for similar questions and their solutions from the knowledge base."
159
- )
160
-
161
- # Combine all tools
162
- tools = [
163
- multiply,
164
- add,
165
- subtract,
166
- divide,
167
- modulus,
168
- wiki_search,
169
- web_search,
170
- arxiv_search,
171
- retriever_tool
172
- ]
173
-
174
- # Create memory for conversation
175
- memory = MemorySaver()
176
-
177
- # Create the agent
178
- agent_executor = create_react_agent(
179
- model=llm,
180
- tools=tools,
181
- checkpointer=memory
182
- )
183
-
184
- # Function to run the agent
185
- def run_agent(query, thread_id="conversation_1"):
186
- """Run the agent with a query"""
187
- config = {"configurable": {"thread_id": thread_id}}
188
-
189
- system_msg = SystemMessage(content='''You are a helpful assistant tasked with answering questions using a set of tools.
190
- Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
191
- FINAL ANSWER: [YOUR FINAL ANSWER].
192
- YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
193
- Your answer should only start with "FINAL ANSWER: ", then follows with the answer.''')
194
-
195
- user_msg = HumanMessage(content=query)
196
-
197
- print(f"User: {query}")
198
- print("\nAgent:")
199
-
200
- for step in agent_executor.stream(
201
- {"messages": [system_msg, user_msg]},
202
- config,
203
- stream_mode="values"
204
- ):
205
- step["messages"][-1].pretty_print()
206
-
207
- # Function to run agent with error handling
208
- def robust_agent_run(query, thread_id="robust_conversation"):
209
- """Run agent with error handling"""
210
- config = {"configurable": {"thread_id": thread_id}}
211
-
212
- try:
213
- system_msg = SystemMessage(content='''You are a helpful assistant tasked with answering questions using a set of tools.
214
- Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
215
- FINAL ANSWER: [YOUR FINAL ANSWER].
216
- YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
217
- Your answer should only start with "FINAL ANSWER: ", then follows with the answer.''')
218
-
219
- user_msg = HumanMessage(content=query)
220
- result = []
221
-
222
- for step in agent_executor.stream(
223
- {"messages": [system_msg, user_msg]},
224
- config,
225
- stream_mode="values"
226
- ):
227
- result = step["messages"]
228
-
229
- return result[-1].content if result else "No response generated"
230
-
231
- except Exception as e:
232
- return f"Error occurred: {str(e)}"
233
-
234
- # Main function
235
- def main(query: str) -> str:
236
- """Main function to run the agent"""
237
- return(robust_agent_run(query))
238
-
239
-
240
-
241
- # Or use the interactive version
242
- # run_agent("What is 25 * 4 + 10?")
 
1
+ import os, json
2
+ from dotenv import load_dotenv
3
+
4
+ # Load environment variables
5
+ load_dotenv()
6
+
7
+ # Imports
8
+ from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
9
+ from langchain_google_genai import ChatGoogleGenerativeAI
10
+ from langchain_community.tools.tavily_search import TavilySearchResults
11
+ from langchain_community.document_loaders import WikipediaLoader
12
+ from langchain_community.document_loaders import ArxivLoader
13
+ from langchain_community.vectorstores import FAISS
14
+ from langchain_core.messages import SystemMessage, HumanMessage
15
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
16
+ from langchain_core.tools import tool
17
+ from langchain.tools.retriever import create_retriever_tool
18
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
19
+ from langchain_community.document_loaders import JSONLoader
20
+ from langgraph.prebuilt import create_react_agent
21
+ from langgraph.checkpoint.memory import MemorySaver
22
+
23
+ # Define all tools
24
+ @tool
25
+ def multiply(a: int | float, b: int | float) -> int | float:
26
+ """Multiply two numbers.
27
+ Args:
28
+ a: first int | float
29
+ b: second int | float
30
+ """
31
+ return a * b
32
+
33
+ @tool
34
+ def add(a: int | float, b: int | float) -> int | float:
35
+ """Add two numbers.
36
+
37
+ Args:
38
+ a: first int | float
39
+ b: second int | float
40
+ """
41
+ return a + b
42
+
43
+ @tool
44
+ def subtract(a: int | float , b: int | float) -> int | float:
45
+ """Subtract two numbers.
46
+
47
+ Args:
48
+ a: first int | float
49
+ b: second int | float
50
+ """
51
+ return a - b
52
+
53
+ @tool
54
+ def divide(a: int | float, b: int | float) -> int | float:
55
+ """Divide two numbers.
56
+
57
+ Args:
58
+ a: first int | float
59
+ b: second int | float
60
+ """
61
+ if b == 0:
62
+ raise ValueError("Cannot divide by zero.")
63
+ return a / b
64
+
65
+ @tool
66
+ def modulus(a: int | float, b: int | float) -> int | float:
67
+ """Get the modulus of two numbers.
68
+
69
+ Args:
70
+ a: first int | float
71
+ b: second int | float
72
+ """
73
+ return a % b
74
+
75
+ @tool
76
+ def wiki_search(query: str) -> str:
77
+ """Search the wikipedia for a query and return the first paragraph
78
+ args:
79
+ query: the query to search for
80
+ """
81
+ loader = WikipediaLoader(query=query, load_max_docs=1)
82
+ data = loader.load()
83
+ formatted_search_docs = "\n\n---\n\n".join(
84
+ [
85
+ f'\n{doc.page_content}\n'
86
+ for doc in data
87
+ ])
88
+ return formatted_search_docs
89
+
90
+ @tool
91
+ def web_search(query: str) -> str:
92
+ """Search Tavily for a query and return maximum 3 results.
93
+
94
+ Args:
95
+ query: The search query.
96
+ """
97
+ search_docs = TavilySearchResults(max_results=3).invoke(query=query)
98
+ formatted_search_docs = "\n\n---\n\n".join(
99
+ [
100
+ f'\n{doc.get("content", "")}\n'
101
+ for doc in search_docs
102
+ ])
103
+ return formatted_search_docs
104
+
105
+ @tool
106
+ def arxiv_search(query: str) -> str:
107
+ """Search Arxiv for a query and return maximum 3 result.
108
+
109
+ Args:
110
+ query: The search query.
111
+ """
112
+ search_docs = ArxivLoader(query=query, load_max_docs=3).load()
113
+ formatted_search_docs = "\n\n---\n\n".join(
114
+ [
115
+ f'\n{doc.page_content[:1000]}\n'
116
+ for doc in search_docs
117
+ ])
118
+ return formatted_search_docs
119
+
120
+ # Load and process your JSONL data
121
+ jq_schema = """
122
+ {
123
+ page_content: .Question,
124
+ metadata: {
125
+ task_id: .task_id,
126
+ Level: .Level,
127
+ Final_answer: ."Final answer",
128
+ file_name: .file_name,
129
+ Steps: .["Annotator Metadata"].Steps,
130
+ Number_of_steps: .["Annotator Metadata"]["Number of steps"],
131
+ How_long: .["Annotator Metadata"]["How long did this take?"],
132
+ Tools: .["Annotator Metadata"].Tools,
133
+ Number_of_tools: .["Annotator Metadata"]["Number of tools"]
134
+ }
135
+ }
136
+ """
137
+
138
+ # Load documents and create vector database
139
+ json_loader = JSONLoader(file_path="metadata.jsonl", jq_schema=jq_schema, json_lines=True, text_content=False)
140
+ json_docs = json_loader.load()
141
+
142
+ # Split documents
143
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=200)
144
+ json_chunks = text_splitter.split_documents(json_docs)
145
+
146
+ # Create vector database
147
+ database = FAISS.from_documents(json_chunks, NVIDIAEmbeddings())
148
+
149
+ # Initialize LLM
150
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
151
+
152
+ # Create retriever and retriever tool
153
+ retriever = database.as_retriever(search_type="similarity", search_kwargs={"k": 3})
154
+
155
+ retriever_tool = create_retriever_tool(
156
+ retriever=retriever,
157
+ name="question_search",
158
+ description="Search for similar questions and their solutions from the knowledge base."
159
+ )
160
+
161
+ # Combine all tools
162
+ tools = [
163
+ multiply,
164
+ add,
165
+ subtract,
166
+ divide,
167
+ modulus,
168
+ wiki_search,
169
+ web_search,
170
+ arxiv_search,
171
+ retriever_tool
172
+ ]
173
+
174
+ # Create memory for conversation
175
+ memory = MemorySaver()
176
+
177
+ # Create the agent
178
+ agent_executor = create_react_agent(
179
+ model=llm,
180
+ tools=tools,
181
+ checkpointer=memory
182
+ )
183
+
184
+ # Function to run the agent
185
+ def run_agent(query, thread_id="conversation_1"):
186
+ """Run the agent with a query"""
187
+ config = {"configurable": {"thread_id": thread_id}}
188
+
189
+ system_msg = SystemMessage(content='''You are a helpful assistant tasked with answering questions using a set of tools.
190
+ Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
191
+ FINAL ANSWER: [YOUR FINAL ANSWER].
192
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
193
+ Your answer should only start with "FINAL ANSWER: ", then follows with the answer.''')
194
+
195
+ user_msg = HumanMessage(content=query)
196
+
197
+ print(f"User: {query}")
198
+ print("\nAgent:")
199
+
200
+ for step in agent_executor.stream(
201
+ {"messages": [system_msg, user_msg]},
202
+ config,
203
+ stream_mode="values"
204
+ ):
205
+ step["messages"][-1].pretty_print()
206
+
207
+ # Function to run agent with error handling
208
+ def robust_agent_run(query, thread_id="robust_conversation"):
209
+ """Run agent with error handling"""
210
+ config = {"configurable": {"thread_id": thread_id}}
211
+
212
+ try:
213
+ system_msg = SystemMessage(content='''You are a helpful assistant tasked with answering questions using a set of tools.
214
+ Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
215
+ FINAL ANSWER: [YOUR FINAL ANSWER].
216
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
217
+ Your answer should only start with "FINAL ANSWER: ", then follows with the answer.''')
218
+
219
+ user_msg = HumanMessage(content=query)
220
+ result = []
221
+
222
+ for step in agent_executor.stream(
223
+ {"messages": [system_msg, user_msg]},
224
+ config,
225
+ stream_mode="values"
226
+ ):
227
+ result = step["messages"]
228
+
229
+ return result[-1].content if result else "No response generated"
230
+
231
+ except Exception as e:
232
+ return f"Error occurred: {str(e)}"
233
+
234
+ # Main function
235
+ def main(query: str) -> str:
236
+ """Main function to run the agent"""
237
+ return(robust_agent_run(query))
238
+
239
+
240
+
241
+ # Or use the interactive version
242
+ # run_agent("What is 25 * 4 + 10?")