m-ric HF staff commited on
Commit
a32e93b
·
1 Parent(s): c053f1e

Fix results

Browse files
Files changed (1) hide show
  1. app.py +89 -22
app.py CHANGED
@@ -25,7 +25,6 @@ from scripts.text_web_browser import (
25
  FindNextTool,
26
  PageDownTool,
27
  PageUpTool,
28
- SearchInformationTool,
29
  SimpleTextBrowser,
30
  VisitTool,
31
  )
@@ -42,7 +41,95 @@ from smolagents import (
42
  from smolagents.agent_types import AgentText, AgentImage, AgentAudio
43
  from smolagents.gradio_ui import pull_messages_from_step, handle_agent_output_types
44
 
 
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  AUTHORIZED_IMPORTS = [
47
  "requests",
48
  "zipfile",
@@ -74,8 +161,6 @@ login(os.getenv("HF_TOKEN"))
74
 
75
  append_answer_lock = threading.Lock()
76
 
77
- SET = "validation"
78
-
79
  custom_role_conversions = {"tool-call": "assistant", "tool-response": "user"}
80
 
81
  user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
@@ -104,7 +189,7 @@ ti_tool = TextInspectorTool(model, text_limit)
104
  browser = SimpleTextBrowser(**BROWSER_CONFIG)
105
 
106
  WEB_TOOLS = [
107
- SearchInformationTool(browser),
108
  VisitTool(browser),
109
  PageUpTool(browser),
110
  PageDownTool(browser),
@@ -128,24 +213,6 @@ def create_agent():
128
 
129
  document_inspection_tool = TextInspectorTool(model, 20000)
130
 
131
-
132
- # augmented_question = """You have one question to answer. It is paramount that you provide a correct answer.
133
- # Give it all you can: I know for a fact that you have access to all the relevant tools to solve it and find the correct answer (the answer does exist). Failure or 'I cannot answer' or 'None found' will not be tolerated, success will be rewarded.
134
- # Run verification steps if that's needed, you must make sure you find the correct answer!
135
- # Here is the task:
136
- # """ + example["question"]
137
-
138
- # if example["file_name"]:
139
- # prompt_use_files = "\n\nTo solve the task above, you will have to use this attached file:"
140
- # prompt_use_files += get_single_file_description(
141
- # example["file_name"], example["question"], visual_inspection_tool, document_inspection_tool
142
- # )
143
- # augmented_question += prompt_use_files
144
-
145
-
146
- # final_result = agent.run(augmented_question)
147
-
148
-
149
  def stream_to_gradio(
150
  agent,
151
  task: str,
 
25
  FindNextTool,
26
  PageDownTool,
27
  PageUpTool,
 
28
  SimpleTextBrowser,
29
  VisitTool,
30
  )
 
41
  from smolagents.agent_types import AgentText, AgentImage, AgentAudio
42
  from smolagents.gradio_ui import pull_messages_from_step, handle_agent_output_types
43
 
44
+ from smolagents import Tool
45
 
46
+
47
+ class GoogleSearchTool(Tool):
48
+ name = "web_search"
49
+ description = """Performs a google web search for your query then returns a string of the top search results."""
50
+ inputs = {
51
+ "query": {"type": "string", "description": "The search query to perform."},
52
+ "filter_year": {
53
+ "type": "integer",
54
+ "description": "Optionally restrict results to a certain year",
55
+ "nullable": True,
56
+ },
57
+ }
58
+ output_type = "string"
59
+
60
+ def __init__(self):
61
+ super().__init__(self)
62
+ import os
63
+
64
+ self.serpapi_key = os.getenv("SERPER_API_KEY")
65
+
66
+ def forward(self, query: str, filter_year: Optional[int] = None) -> str:
67
+ import requests
68
+
69
+ if self.serpapi_key is None:
70
+ raise ValueError("Missing SerpAPI key. Make sure you have 'SERPER_API_KEY' in your env variables.")
71
+
72
+ params = {
73
+ "engine": "google",
74
+ "q": query,
75
+ "api_key": self.serpapi_key,
76
+ "google_domain": "google.com",
77
+ }
78
+
79
+ headers = {
80
+ 'X-API-KEY': self.serpapi_key,
81
+ 'Content-Type': 'application/json'
82
+ }
83
+
84
+ if filter_year is not None:
85
+ params["tbs"] = f"cdr:1,cd_min:01/01/{filter_year},cd_max:12/31/{filter_year}"
86
+
87
+ response = requests.request("POST", "https://google.serper.dev/search", headers=headers, data=json.dumps(params))
88
+
89
+
90
+ if response.status_code == 200:
91
+ results = response.json()
92
+ else:
93
+ raise ValueError(response.json())
94
+
95
+ if "organic" not in results.keys():
96
+ print("REZZZ", results.keys())
97
+ if filter_year is not None:
98
+ raise Exception(
99
+ f"No results found for query: '{query}' with filtering on year={filter_year}. Use a less restrictive query or do not filter on year."
100
+ )
101
+ else:
102
+ raise Exception(f"No results found for query: '{query}'. Use a less restrictive query.")
103
+ if len(results["organic"]) == 0:
104
+ year_filter_message = f" with filter year={filter_year}" if filter_year is not None else ""
105
+ return f"No results found for '{query}'{year_filter_message}. Try with a more general query, or remove the year filter."
106
+
107
+ web_snippets = []
108
+ if "organic" in results:
109
+ for idx, page in enumerate(results["organic"]):
110
+ date_published = ""
111
+ if "date" in page:
112
+ date_published = "\nDate published: " + page["date"]
113
+
114
+ source = ""
115
+ if "source" in page:
116
+ source = "\nSource: " + page["source"]
117
+
118
+ snippet = ""
119
+ if "snippet" in page:
120
+ snippet = "\n" + page["snippet"]
121
+
122
+ redacted_version = f"{idx}. [{page['title']}]({page['link']}){date_published}{source}\n{snippet}"
123
+
124
+ redacted_version = redacted_version.replace("Your browser can't play this video.", "")
125
+ web_snippets.append(redacted_version)
126
+
127
+ return "## Search Results\n" + "\n\n".join(web_snippets)
128
+
129
+ # web_search = GoogleSearchTool()
130
+
131
+ # print(web_search(query="Donald Trump news"))
132
+ # quit()
133
  AUTHORIZED_IMPORTS = [
134
  "requests",
135
  "zipfile",
 
161
 
162
  append_answer_lock = threading.Lock()
163
 
 
 
164
  custom_role_conversions = {"tool-call": "assistant", "tool-response": "user"}
165
 
166
  user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
 
189
  browser = SimpleTextBrowser(**BROWSER_CONFIG)
190
 
191
  WEB_TOOLS = [
192
+ GoogleSearchTool(),
193
  VisitTool(browser),
194
  PageUpTool(browser),
195
  PageDownTool(browser),
 
213
 
214
  document_inspection_tool = TextInspectorTool(model, 20000)
215
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  def stream_to_gradio(
217
  agent,
218
  task: str,