itskavya commited on
Commit
fa9ac9e
·
1 Parent(s): 06a4d84

update prompt n tools

Browse files
Files changed (2) hide show
  1. app.py +113 -34
  2. requirements.txt +1 -0
app.py CHANGED
@@ -6,7 +6,6 @@ import pandas as pd
6
  from typing import TypedDict, Annotated
7
  from langchain_core.messages import AnyMessage
8
  from langgraph.graph.message import add_messages
9
- from langchain_hyperbrowser import HyperbrowserBrowserUseTool
10
  from langgraph.graph import START, StateGraph
11
  from langgraph.prebuilt import ToolNode, tools_condition
12
  from langchain_core.messages import HumanMessage, SystemMessage
@@ -22,7 +21,10 @@ import os
22
  import shutil
23
  import uuid
24
  from langchain_tavily import TavilySearch
25
-
 
 
 
26
 
27
  # set_debug(True)
28
 
@@ -82,6 +84,7 @@ def interpret_image(image_name: str, question: str):
82
  }
83
  ])]
84
  response = vision_llm.invoke(messages)
 
85
  return response.content
86
  except Exception as e:
87
  print(str(e))
@@ -94,6 +97,7 @@ def transcribe_audio(file_name: str):
94
 
95
  model = whisper.load_model("small")
96
  result = model.transcribe(file_name)
 
97
  return result["text"]
98
 
99
  def download_youtube_video(url: str):
@@ -105,7 +109,7 @@ def download_youtube_video(url: str):
105
  ydl_opts = {
106
  'format': 'bestvideo+bestaudio/best',
107
  'outtmpl': output_path,
108
- 'merge_output_format': 'mp4', # Use mp4 as the final output format
109
  'quiet': True,
110
  }
111
 
@@ -174,12 +178,49 @@ def watch_video(file_name: str):
174
  print(captions)
175
  return captions
176
 
177
-
178
- browser_tool = HyperbrowserBrowserUseTool()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  # search_tool = DuckDuckGoSearchRun()
180
  search_tool = TavilySearch()
181
  code_executor_tool = ExecPython()
182
- tools = [search_tool, code_executor_tool, interpret_image, get_file, transcribe_audio, download_youtube_video, read_file, watch_video, read_excel]
183
  llm = ChatOpenAI(model="gpt-4o", temperature=0)
184
  llm_with_tools = llm.bind_tools(tools)
185
 
@@ -264,41 +305,75 @@ def assistant(state:AgentState):
264
  Returns:
265
  A list of captions for each frame.
266
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
 
268
  search_tool_description = search_tool.description
269
- code_executor_tool_description = code_executor_tool.description
270
 
271
  has_file = state["has_file"]
272
 
273
  system_message = SystemMessage(content=f"""
274
  You are a general AI assistant. I will ask you a question.
275
 
276
- Your response should be a number, OR as few words as possible, OR a comma-separated list of numbers and/or strings.
277
- You SHOULD NOT provide explanations in your response.
278
- If you are asked for a number, don't use a comma to write your number, neither use symbols such as $ or % unless specified otherwise.
279
- If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities).
280
- If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
281
- If you are including text from the question in your response, make sure to include the text exactly as it appears in the question (e.g. with adjective).
282
- Do NOT end your response with a period.
283
- Do NOT write numbers as text.
284
-
285
- You have access to the following tools, which you can use as needed to answer a question:
286
- - File downloading tool: {download_file_tool_description}
287
- - Image interpretation tool: {image_tool_description}
288
  - YouTube video downloader: {download_youtube_video_description}
289
- - Audio transcription tool: {audio_tool_description}
290
- - Text-based file reading tool: {read_file_tool_description}
291
- - Internet search tool: {search_tool_description}
292
- - Code execution tool: {code_executor_tool_description}
293
- - Watch video tool: {watch_video_tool_description}
294
- - Read Excel file tool: {excel_tool_description}
 
295
 
296
  You may download a file for a given task ONLY if it has a file by using its associated task ID.
297
  Always ensure you have downloaded a file before using a relevant tool.
298
- You MUST use the name of a particular downloaded file in your tool call. Do NOT use a file name mentioned in the question.
299
- When asked about a YouTube video, you can watch it and/or hear it.
300
- When writing code, avoid excess formatting and keep it clean.
301
- Do NOT make up answers, instead use a tool to answer the question.
 
 
 
 
 
 
 
 
 
 
302
 
303
  The current task ID is {task_id}.
304
  The current task has a file: {has_file}
@@ -335,6 +410,7 @@ class BasicAgent:
335
  answer = app.invoke(state)
336
  answer = answer["messages"][-1].content
337
  # print(f"Agent returning fixed answer: {fixed_answer}")
 
338
  print(f"Agent returning answer: {answer}")
339
  return answer
340
 
@@ -501,7 +577,7 @@ if __name__ == "__main__":
501
  # Check for SPACE_HOST and SPACE_ID at startup for information
502
  space_host_startup = os.getenv("SPACE_HOST")
503
  space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
504
-
505
  if space_host_startup:
506
  print(f"✅ SPACE_HOST found: {space_host_startup}")
507
  print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
@@ -521,22 +597,25 @@ if __name__ == "__main__":
521
 
522
  print("-"*(60 + len(" App Starting ")) + "\n")
523
 
524
- # try:
525
  # random_url = f"{DEFAULT_API_URL}/random-question"
526
  # response = requests.get(random_url, timeout=20)
527
  # response.raise_for_status()
528
  # question = response.json()
529
  # print(question)
530
- # agent = BasicAgent()
531
  # print(question.get("question"))
532
  # print(question.get("task_id"))
533
  # has_file=False
534
  # if question.get("file_name"):
535
  # has_file=True
536
  # print(agent(question.get("question"), question.get("task_id"), has_file))
 
 
 
537
 
538
- # except Exception as e:
539
- # print(str(e))
540
 
541
  print("Launching Gradio Interface for Basic Agent Evaluation...")
542
  demo.launch(debug=True, share=False)
 
6
  from typing import TypedDict, Annotated
7
  from langchain_core.messages import AnyMessage
8
  from langgraph.graph.message import add_messages
 
9
  from langgraph.graph import START, StateGraph
10
  from langgraph.prebuilt import ToolNode, tools_condition
11
  from langchain_core.messages import HumanMessage, SystemMessage
 
21
  import shutil
22
  import uuid
23
  from langchain_tavily import TavilySearch
24
+ import numpy as np
25
+ from markdownify import markdownify
26
+ import re
27
+ from io import StringIO
28
 
29
  # set_debug(True)
30
 
 
84
  }
85
  ])]
86
  response = vision_llm.invoke(messages)
87
+ print(response.content)
88
  return response.content
89
  except Exception as e:
90
  print(str(e))
 
97
 
98
  model = whisper.load_model("small")
99
  result = model.transcribe(file_name)
100
+ print(result["text"])
101
  return result["text"]
102
 
103
  def download_youtube_video(url: str):
 
109
  ydl_opts = {
110
  'format': 'bestvideo+bestaudio/best',
111
  'outtmpl': output_path,
112
+ 'merge_output_format': 'mp4',
113
  'quiet': True,
114
  }
115
 
 
178
  print(captions)
179
  return captions
180
 
181
+
182
+ def add_tool(numbers: list):
183
+ """
184
+ Calculate sum of numbers.
185
+ """
186
+ numbers = np.array(numbers)
187
+ return np.sum(numbers, dtype=float)
188
+
189
+ def visit_web_page(url: str):
190
+ """
191
+ Visit a webpage.
192
+ """
193
+ response = requests.get(url, timeout=20)
194
+ response.raise_for_status()
195
+ markdown_content = markdownify(response.text).strip()
196
+ markdown_content = re.sub(r"\n{3, }", "\n\n", markdown_content)
197
+ if len(markdown_content <= 20000):
198
+ return markdown_content
199
+ else:
200
+ print(markdown_content[:20000//2] + "\nThe content has been truncated to stay below 20000 characters.\n" + markdown_content[-20000//2:])
201
+ return markdown_content[:20000//2] + "\nThe content has been truncated to stay below 20000 characters.\n" + markdown_content[-20000//2:] # - to count from the end
202
+
203
+ def final_answer(text: str):
204
+ """
205
+ Extract the final answer.
206
+ """
207
+ text = text.split("FINAL ANSWER:")
208
+ return text[-1]
209
+
210
+ def markdown(content: str):
211
+ """
212
+ Interpret markdown representation of a table.
213
+ """
214
+ clean_content = "\n".join([line for i, line in enumerate(content.strip().splitlines()) if i!=1])
215
+ df = pd.read_csv(StringIO(clean_content), sep="|", engine="python")
216
+ df = df.drop(columns=[""])
217
+ print(df.to_string())
218
+ return df.to_string()
219
+
220
  # search_tool = DuckDuckGoSearchRun()
221
  search_tool = TavilySearch()
222
  code_executor_tool = ExecPython()
223
+ tools = [search_tool, interpret_image, get_file, transcribe_audio, download_youtube_video, read_file, read_excel, add_tool, visit_web_page, markdown]
224
  llm = ChatOpenAI(model="gpt-4o", temperature=0)
225
  llm_with_tools = llm.bind_tools(tools)
226
 
 
305
  Returns:
306
  A list of captions for each frame.
307
  """
308
+
309
+ add_tool_description = """
310
+ math_tool(numbers: list) -> float:
311
+ Calculate sum of numbers.
312
+
313
+ Args:
314
+ list: List of numbers to perform an operation on.
315
+
316
+ Returns:
317
+ The sum of the numbers.
318
+ """
319
+
320
+ visit_web_page_tool_description = """
321
+ visit_web_page(url: str) -> str:
322
+ Visit a web page.
323
+
324
+ Args:
325
+ url: The URL of the web page to visit as string.
326
+
327
+ Returns:
328
+ Markdown representation of the HTML content of the web page.
329
+ """
330
+
331
+ markdown_tool_description = """
332
+ markdown(content: str) -> str:
333
+ Interpret markdown representation of a table.
334
+
335
+ Args:
336
+ content: Markdown table as string.
337
+
338
+ Returns:
339
+ String representation of the extracted tabled.
340
+ """
341
 
342
  search_tool_description = search_tool.description
 
343
 
344
  has_file = state["has_file"]
345
 
346
  system_message = SystemMessage(content=f"""
347
  You are a general AI assistant. I will ask you a question.
348
 
349
+ You have access to the following tools, which you can use as needed to answer a question:
350
+ - File downloader: {download_file_tool_description}
351
+ - Image interpretation: {image_tool_description}
 
 
 
 
 
 
 
 
 
352
  - YouTube video downloader: {download_youtube_video_description}
353
+ - Audio transcription: {audio_tool_description}
354
+ - Read text-based file: {read_file_tool_description}
355
+ - Internet search: {search_tool_description}
356
+ - Read Excel file: {excel_tool_description}
357
+ - Math: {add_tool_description}
358
+ - Visit web page: {visit_web_page_tool_description}
359
+ - Markdown table interpretation: {markdown_tool_description}
360
 
361
  You may download a file for a given task ONLY if it has a file by using its associated task ID.
362
  Always ensure you have downloaded a file before using a relevant tool.
363
+ You MUST use the name of a particular downloaded file in your tool call. DO NOT use a file name mentioned in the question.
364
+ When asked about a YouTube video, you can hear it and/or check its description.
365
+ Use a tool only when needed and never re-do a tool call that you previously did with the exact same arguments.
366
+ If a tool call fails, try using another tool to reach an answer.
367
+ Avoid returning your response directly, instead verify your response with a tool when available.
368
+
369
+ Your response should be a number, OR as few words as possible, OR a comma-separated list of numbers and/or strings.
370
+ You SHOULD NOT provide explanations in your response.
371
+ If you are asked for a number, don't use a comma to write your number, nor use symbols such as $ or % unless specified otherwise.
372
+ If you are asked for a string, don't use articles, nor abbreviations (e.g., for cities).
373
+ If you are asked for a comma-separated list, apply the above rules depending on whether the element to be put in the list is a number or a string.
374
+ When including a phrase in your response from the input, always include the complete phrase with the adjective. For example, if the input contains the phrase "natural spring water", your response should include "fresh lemon juice", not just "lemon juice".
375
+ DO NOT end your response with a period.
376
+ DO NOT write numbers as text.
377
 
378
  The current task ID is {task_id}.
379
  The current task has a file: {has_file}
 
410
  answer = app.invoke(state)
411
  answer = answer["messages"][-1].content
412
  # print(f"Agent returning fixed answer: {fixed_answer}")
413
+ answer = final_answer(answer)
414
  print(f"Agent returning answer: {answer}")
415
  return answer
416
 
 
577
  # Check for SPACE_HOST and SPACE_ID at startup for information
578
  space_host_startup = os.getenv("SPACE_HOST")
579
  space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
580
+
581
  if space_host_startup:
582
  print(f"✅ SPACE_HOST found: {space_host_startup}")
583
  print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
 
597
 
598
  print("-"*(60 + len(" App Starting ")) + "\n")
599
 
600
+ try:
601
  # random_url = f"{DEFAULT_API_URL}/random-question"
602
  # response = requests.get(random_url, timeout=20)
603
  # response.raise_for_status()
604
  # question = response.json()
605
  # print(question)
606
+ agent = BasicAgent()
607
  # print(question.get("question"))
608
  # print(question.get("task_id"))
609
  # has_file=False
610
  # if question.get("file_name"):
611
  # has_file=True
612
  # print(agent(question.get("question"), question.get("task_id"), has_file))
613
+ x=(agent("How many at bats did the Yankee with the most walks in the 1977 regular season have that same season?", "3f57289b-8c60-48be-bd80-01f8099ca449", False))
614
+ print(x)
615
+ # print(final_answer(x))
616
 
617
+ except Exception as e:
618
+ print(str(e))
619
 
620
  print("Launching Gradio Interface for Basic Agent Evaluation...")
621
  demo.launch(debug=True, share=False)
requirements.txt CHANGED
@@ -9,3 +9,4 @@ yt-dlp
9
  rizaio
10
  langchain-openai
11
  langchain-tavily
 
 
9
  rizaio
10
  langchain-openai
11
  langchain-tavily
12
+ markdownify