File size: 14,892 Bytes
3943768
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
import os
import argparse
import json
from typing import Dict, Any
from serpapi import (
    SerpApiClient, GoogleSearch, BingSearch, BaiduSearch, YandexSearch,
    YahooSearch, EbaySearch, HomeDepotSearch, YoutubeSearch, GoogleScholarSearch,
    WalmartSearch, AppleAppStoreSearch, NaverSearch
)

SERPAPI_API_KEY = os.environ.get("SERPAPI_API_KEY")

# Dictionary to translate user-friendly service names to tbm values
GOOGLE_SERVICES = {
    "web": "",
    "image": "isch",
    "local": "lcl",
    "video": "vid",
    "news": "nws",
    "shopping": "shop",
    "patents": "pts",
}

# List of all supported language codes
# https://serpapi.com/google-languages
ALL_LANGUAGE_CODES = [
    "af", "ak", "sq", "ws", "am", "ar", "hy", "az", "eu", "be", "bem", "bn", "bh", "xx-bork", "bs", "br", "bg", "bt",
    "km", "ca", "chr", "ny", "zh-cn", "zh-tw", "co", "hr", "cs", "da", "nl", "xx-elmer", "en", "eo", "et", "ee", "fo",
    "tl", "fi", "fr", "fy", "gaa", "gl", "ka", "de", "el", "kl", "gn", "gu", "xx-hacker", "ht", "ha", "haw", "iw",
    "he", "hi", "hu", "is", "ig", "id", "ia", "ga", "it", "ja", "jw", "kn", "kk", "rw", "rn", "xx-klingon", "kg",
    "ko", "kri", "ku", "ckb", "ky", "lo", "la", "lv", "ln", "lt", "loz", "lg", "ach", "mk", "mg", "ms", "ml", "mt",
    "mv", "mi", "mr", "mfe", "mo", "mn", "sr-me", "my", "ne", "pcm", "nso", "no", "nn", "oc", "or", "om", "ps", "fa",
    "xx-pirate", "pl", "pt", "pt-br", "pt-pt", "pa", "qu", "ro", "rm", "nyn", "ru", "gd", "sr", "sh", "st", "tn",
    "crs", "sn", "sd", "si", "sk", "sl", "so", "es", "es-419", "su", "sw", "sv", "tg", "ta", "tt", "te", "th", "ti",
    "to", "lua", "tum", "tr", "tk", "tw", "ug", "uk", "ur", "uz", "vu", "vi", "cy", "wo", "xh", "yi", "yo", "zu"
]

# Top 10 most commonly used languages (you may want to adjust this list based on your specific use case)
TOP_10_LANGUAGES = [
    ("en", "English"),
    ("es", "Spanish"),
    ("zh-cn", "Chinese (Simplified)"),
    ("ar", "Arabic"),
    ("pt", "Portuguese"),
    ("id", "Indonesian"),
    ("fr", "French"),
    ("ja", "Japanese"),
    ("ru", "Russian"),
    ("de", "German")
]

# List of all supported country codes
# https://serpapi.com/google-countries
ALL_COUNTRY_CODES = [
    "af", "al", "dz", "as", "ad", "ao", "ai", "aq", "ag", "ar", "am", "aw", "au", "at", "az", "bs", "bh", "bd", "bb",
    "by", "be", "bz", "bj", "bm", "bt", "bo", "ba", "bw", "bv", "br", "io", "bn", "bg", "bf", "bi", "kh", "cm", "ca",
    "cv", "ky", "cf", "td", "cl", "cn", "cx", "cc", "co", "km", "cg", "cd", "ck", "cr", "ci", "hr", "cu", "cy", "cz",
    "dk", "dj", "dm", "do", "ec", "eg", "sv", "gq", "er", "ee", "et", "fk", "fo", "fj", "fi", "fr", "gf", "pf", "tf",
    "ga", "gm", "ge", "de", "gh", "gi", "gr", "gl", "gd", "gp", "gu", "gt", "gn", "gw", "gy", "ht", "hm", "va", "hn",
    "hk", "hu", "is", "in", "id", "ir", "iq", "ie", "il", "it", "jm", "jp", "jo", "kz", "ke", "ki", "kp", "kr", "kw",
    "kg", "la", "lv", "lb", "ls", "lr", "ly", "li", "lt", "lu", "mo", "mk", "mg", "mw", "my", "mv", "ml", "mt", "mh",
    "mq", "mr", "mu", "yt", "mx", "fm", "md", "mc", "mn", "ms", "ma", "mz", "mm", "na", "nr", "np", "nl", "an", "nc",
    "nz", "ni", "ne", "ng", "nu", "nf", "mp", "no", "om", "pk", "pw", "ps", "pa", "pg", "py", "pe", "ph", "pn", "pl",
    "pt", "pr", "qa", "re", "ro", "ru", "rw", "sh", "kn", "lc", "pm", "vc", "ws", "sm", "st", "sa", "sn", "rs", "sc",
    "sl", "sg", "sk", "si", "sb", "so", "za", "gs", "es", "lk", "sd", "sr", "sj", "sz", "se", "ch", "sy", "tw", "tj",
    "tz", "th", "tl", "tg", "tk", "to", "tt", "tn", "tr", "tm", "tc", "tv", "ug", "ua", "ae", "uk", "gb", "us", "um",
    "uy", "uz", "vu", "ve", "vn", "vg", "vi", "wf", "eh", "ye", "zm", "zw"
]

# Top 10 most common countries (you may want to adjust this list based on your specific use case)
TOP_10_COUNTRIES = [
    ("us", "United States"),
    ("gb", "United Kingdom"),
    ("ca", "Canada"),
    ("au", "Australia"),
    ("de", "Germany"),
    ("fr", "France"),
    ("in", "India"),
    ("jp", "Japan"),
    ("br", "Brazil"),
    ("es", "Spain")
]


def setup_argparse():
    parser = argparse.ArgumentParser(description="Multi-Engine Search Utility using SerpApi")
    parser.add_argument("-q", "--query", type=str, required=True, help="Search query")
    parser.add_argument("-e", "--engine",
                        choices=['google', 'bing', 'baidu', 'yandex', 'yahoo', 'ebay', 'homedepot', 'youtube',
                                 'scholar', 'walmart', 'appstore', 'naver'], default='google',
                        help="Search engine to use")
    parser.add_argument("-l", "--limit", type=int, default=5, help="Number of results to return")
    parser.add_argument("--google_domain", type=str, default="google.com", help="Google domain to use")
    parser.add_argument("--gl", type=str, default="us",
                        help="Country of the search (default: us). Top 10 common countries:\n" +
                             "\n".join(f"  {code}: {name}" for code, name in TOP_10_COUNTRIES) +
                             "\nFor a full list of supported countries, see the documentation.")
    parser.add_argument("--hl", type=str, default="en",
                        help="Language of the search (default: en). Top 10 common languages:\n" +
                             "\n".join(f"  {code}: {name}" for code, name in TOP_10_LANGUAGES) +
                             "\nFor a full list of supported languages, see the documentation.")
    parser.add_argument("--location", type=str, help="Location for the search (optional)")
    parser.add_argument("--type", type=str, default="web",
                        help="Type of Google search to perform. Options:\n"
                             "  web: Regular Google Search (default)\n"
                             "  image: Google Images\n"
                             "  local: Google Local\n"
                             "  video: Google Videos\n"
                             "  news: Google News\n"
                             "  shopping: Google Shopping\n"
                             "  patents: Google Patents\n")
    parser.add_argument("--tbs", type=str, help="Advanced search parameters")
    parser.add_argument("--safe", choices=['active', 'off'], default='off', help="Safe search setting")
    parser.add_argument("--start", type=int, default=0, help="Pagination offset")
    parser.add_argument("--device", choices=['desktop', 'tablet', 'mobile'], default='desktop',
                        help="Device to emulate")
    parser.add_argument("-j", "--json", action="store_true", help="Output results as JSON")
    parser.add_argument("--output", type=str, default='', help="Name of file to output JSON result to if set")
    parser.add_argument("--keys", nargs='+', help="Specific keys to display in the results")
    return parser.parse_args()


def validate_language(hl: str) -> str:
    if hl not in ALL_LANGUAGE_CODES:
        raise ValueError(f"Invalid language code: {hl}. Please use a valid language code.")
    return hl


def validate_country(gl: str) -> str:
    if gl not in ALL_COUNTRY_CODES:
        raise ValueError(f"Invalid country code: {gl}. Please use a valid country code.")
    return gl


def perform_search(args) -> Dict[str, Any]:
    """
    Perform a search using the specified engine and return the results.
    """
    params = {
        "q": args.query,
        "api_key": SERPAPI_API_KEY,
        "num": max(2, args.limit),
        "device": args.device,
    }

    if args.engine == "google":
        # Translate service to tbm
        tbm = GOOGLE_SERVICES.get(args.type.lower(), "")
        if tbm == 'pts':
            params['num'] = args.limit = min(max(args.limit, 10), 100)
        params.update({
            "google_domain": args.google_domain,
            "gl": validate_country(args.gl),
            "hl": validate_language(args.hl),
            "tbm": tbm,
            "tbs": args.tbs,
            "safe": args.safe,
            "start": args.start,
        })
        if args.location:
            params["location"] = args.location
    elif args.engine in ["bing", "yahoo"]:
        params.update({
            "cc": validate_country(args.gl),
            "setlang": validate_language(args.hl),
        })
    # Add specific parameters for other engines as needed

    # Remove None values
    params = {k: v for k, v in params.items() if v is not None}

    engines = {
        "google": GoogleSearch,
        "bing": BingSearch,
        "baidu": BaiduSearch,
        "yandex": YandexSearch,
        "yahoo": YahooSearch,
        "ebay": EbaySearch,
        "homedepot": HomeDepotSearch,
        "youtube": YoutubeSearch,
        "scholar": GoogleScholarSearch,
        "walmart": WalmartSearch,
        "appstore": AppleAppStoreSearch,
        "naver": NaverSearch,
    }

    search = engines[args.engine](params)
    return search.get_dict()


def save_results_to_file(results: Dict[str, Any], filename: str) -> None:
    """
    Save the full search results to a JSON file.
    """
    with open(filename, 'w') as f:
        json.dump(results, f, indent=2)
    print(
        f"""\n# Search results for specific the keys are in this JSON file: {filename}
* One can write python code to extract certain keys from the JSON file, but this file does not contain specific or detailed information for the query, you use should pass specific URLs to ask_question_about_documents.py for specific or detailed information.
""")


def print_results(results: Dict[str, Any], args):
    """
    Print the keys of the search results and a couple of entries for primary results.
    """
    if args.keys:
        print(f"Requested keys for query '{args.query}' using {args.engine} ({args.type} service):")
        for key in args.keys:
            if key in results:
                print(f"\n{key}:")
                print(json.dumps(results[key], indent=2))
            else:
                print(f"\n{key}: Not found in results")
    else:
        print(f"""To extract specific keys, you can repeat the same command and chose the keys you want by using the CLI optional arg: [--keys KEYS [KEYS ...]]
Keys available in the search results for query '{args.query}' using {args.engine} ({args.type} service):
""")

        for key in results.keys():
            print(f"- {key}")

        print("\nSample of primary results:")
        primary_keys = ["organic_results", "news_results", "jobs_results", "shopping_results", "images_results",
                        "video_results", "books_results", "finance_results", "local_results", "patents"]

        for key in primary_keys:
            if key in results and isinstance(results[key], list) and len(results[key]) > 0:
                print(f"\n{key.replace('_', ' ').title()}:")
                for i, result in enumerate(results[key][:args.limit], 1):  # Print first args.limit results
                    if 'title' in result:
                        print(f"  {i}. {result.get('title', '')}:")
                    if 'link' in result:
                        print(f"     URL: {result.get('link', '')}")
                    if 'original' in result:
                        print(f"     original: {result.get('original', '')}")
                    if 'links' in result and 'website' in result['links']:
                        print(f"     Website: {result['links']['website']}")
                    if 'product_link' in result:
                        print(f"     Product Link: {result['product_link']}")
                    if 'snippet' in result:
                        print(f"     Snippet: {result['snippet']}")
                    if 'top_stories' in result:
                        print(f"     Top Stories: {result['top_stories']}")
                break  # Only show sample for the first available primary key

    if args.json:
        if args.output:
            with open(args.output, 'wt') as f:
                json.dump(results, f, indent=2, default=str)
            print(f"\nFull JSON output saved to: {args.output}")
        else:
            print("\nFull JSON output:")
            print(json.dumps(results, indent=2, default=str))

    print("""\n\nRemember web snippets are short and often non-specific.
For specific information, you must use ask_question_about_documents.py on URLs or documents,
ask_question_about_image.py for images,
or download_web_video.py for videos, etc.
If you have not found a good response to the user's original query, continue to write executable code to do so.
""")


def google_search():
    args = setup_argparse()

    if not SERPAPI_API_KEY:
        raise ValueError("SERPAPI_API_KEY environment variable is not set.")

    results = perform_search(args)

    # Print results
    print_results(results, args)

    # Save full results to a file
    save_results_to_file(results, f"{args.engine}_{args.type}_search_results.json")


if __name__ == "__main__":
    google_search()

"""
# Test different search engines
python openai_server/agent_tools/google_search.py -q "artificial intelligence" -e google
python openai_server/agent_tools/google_search.py -q "machine learning" -e bing
python openai_server/agent_tools/google_search.py -q "deep learning" -e baidu
python openai_server/agent_tools/google_search.py -q "neural networks" -e yandex
python openai_server/agent_tools/google_search.py -q "data science" -e yahoo
python openai_server/agent_tools/google_search.py -q "data science" -e scholar

# Test different Google services
python openai_server/agent_tools/google_search.py -q "AI images" -e google --type image
python openai_server/agent_tools/google_search.py -q "AI startups near me" -e google --type local
python openai_server/agent_tools/google_search.py -q "AI tutorials" -e google --type video
python openai_server/agent_tools/google_search.py -q "AI breakthroughs" -e google --type news
python openai_server/agent_tools/google_search.py -q "AI products" -e google --type shopping
python openai_server/agent_tools/google_search.py -q "AI patents" -e google --type patents

# Test with specific keys
python openai_server/agent_tools/google_search.py -q "Python programming" -e google --keys organic_results search_information

# Test with different languages and countries
python openai_server/agent_tools/google_search.py -q "プログラミング" -e google --hl ja --gl jp
python openai_server/agent_tools/google_search.py -q "programmation" -e google --hl fr --gl fr

# Test with JSON output
python openai_server/agent_tools/google_search.py -q "data analysis" -e google -j

# Test pagination
python openai_server/agent_tools/google_search.py -q "machine learning algorithms" -e google --start 10 -n 5

# Test safe search
python openai_server/agent_tools/google_search.py -q "art" -e google --safe active

# Test different devices
python openai_server/agent_tools/google_search.py -q "responsive design" -e google --device mobile
"""