|
import os |
|
import argparse |
|
import json |
|
from typing import Dict, Any |
|
from serpapi import ( |
|
SerpApiClient, GoogleSearch, BingSearch, BaiduSearch, YandexSearch, |
|
YahooSearch, EbaySearch, HomeDepotSearch, YoutubeSearch, GoogleScholarSearch, |
|
WalmartSearch, AppleAppStoreSearch, NaverSearch |
|
) |
|
|
|
SERPAPI_API_KEY = os.environ.get("SERPAPI_API_KEY") |
|
|
|
|
|
GOOGLE_SERVICES = { |
|
"web": "", |
|
"image": "isch", |
|
"local": "lcl", |
|
"video": "vid", |
|
"news": "nws", |
|
"shopping": "shop", |
|
"patents": "pts", |
|
} |
|
|
|
|
|
|
|
ALL_LANGUAGE_CODES = [ |
|
"af", "ak", "sq", "ws", "am", "ar", "hy", "az", "eu", "be", "bem", "bn", "bh", "xx-bork", "bs", "br", "bg", "bt", |
|
"km", "ca", "chr", "ny", "zh-cn", "zh-tw", "co", "hr", "cs", "da", "nl", "xx-elmer", "en", "eo", "et", "ee", "fo", |
|
"tl", "fi", "fr", "fy", "gaa", "gl", "ka", "de", "el", "kl", "gn", "gu", "xx-hacker", "ht", "ha", "haw", "iw", |
|
"he", "hi", "hu", "is", "ig", "id", "ia", "ga", "it", "ja", "jw", "kn", "kk", "rw", "rn", "xx-klingon", "kg", |
|
"ko", "kri", "ku", "ckb", "ky", "lo", "la", "lv", "ln", "lt", "loz", "lg", "ach", "mk", "mg", "ms", "ml", "mt", |
|
"mv", "mi", "mr", "mfe", "mo", "mn", "sr-me", "my", "ne", "pcm", "nso", "no", "nn", "oc", "or", "om", "ps", "fa", |
|
"xx-pirate", "pl", "pt", "pt-br", "pt-pt", "pa", "qu", "ro", "rm", "nyn", "ru", "gd", "sr", "sh", "st", "tn", |
|
"crs", "sn", "sd", "si", "sk", "sl", "so", "es", "es-419", "su", "sw", "sv", "tg", "ta", "tt", "te", "th", "ti", |
|
"to", "lua", "tum", "tr", "tk", "tw", "ug", "uk", "ur", "uz", "vu", "vi", "cy", "wo", "xh", "yi", "yo", "zu" |
|
] |
|
|
|
|
|
TOP_10_LANGUAGES = [ |
|
("en", "English"), |
|
("es", "Spanish"), |
|
("zh-cn", "Chinese (Simplified)"), |
|
("ar", "Arabic"), |
|
("pt", "Portuguese"), |
|
("id", "Indonesian"), |
|
("fr", "French"), |
|
("ja", "Japanese"), |
|
("ru", "Russian"), |
|
("de", "German") |
|
] |
|
|
|
|
|
|
|
ALL_COUNTRY_CODES = [ |
|
"af", "al", "dz", "as", "ad", "ao", "ai", "aq", "ag", "ar", "am", "aw", "au", "at", "az", "bs", "bh", "bd", "bb", |
|
"by", "be", "bz", "bj", "bm", "bt", "bo", "ba", "bw", "bv", "br", "io", "bn", "bg", "bf", "bi", "kh", "cm", "ca", |
|
"cv", "ky", "cf", "td", "cl", "cn", "cx", "cc", "co", "km", "cg", "cd", "ck", "cr", "ci", "hr", "cu", "cy", "cz", |
|
"dk", "dj", "dm", "do", "ec", "eg", "sv", "gq", "er", "ee", "et", "fk", "fo", "fj", "fi", "fr", "gf", "pf", "tf", |
|
"ga", "gm", "ge", "de", "gh", "gi", "gr", "gl", "gd", "gp", "gu", "gt", "gn", "gw", "gy", "ht", "hm", "va", "hn", |
|
"hk", "hu", "is", "in", "id", "ir", "iq", "ie", "il", "it", "jm", "jp", "jo", "kz", "ke", "ki", "kp", "kr", "kw", |
|
"kg", "la", "lv", "lb", "ls", "lr", "ly", "li", "lt", "lu", "mo", "mk", "mg", "mw", "my", "mv", "ml", "mt", "mh", |
|
"mq", "mr", "mu", "yt", "mx", "fm", "md", "mc", "mn", "ms", "ma", "mz", "mm", "na", "nr", "np", "nl", "an", "nc", |
|
"nz", "ni", "ne", "ng", "nu", "nf", "mp", "no", "om", "pk", "pw", "ps", "pa", "pg", "py", "pe", "ph", "pn", "pl", |
|
"pt", "pr", "qa", "re", "ro", "ru", "rw", "sh", "kn", "lc", "pm", "vc", "ws", "sm", "st", "sa", "sn", "rs", "sc", |
|
"sl", "sg", "sk", "si", "sb", "so", "za", "gs", "es", "lk", "sd", "sr", "sj", "sz", "se", "ch", "sy", "tw", "tj", |
|
"tz", "th", "tl", "tg", "tk", "to", "tt", "tn", "tr", "tm", "tc", "tv", "ug", "ua", "ae", "uk", "gb", "us", "um", |
|
"uy", "uz", "vu", "ve", "vn", "vg", "vi", "wf", "eh", "ye", "zm", "zw" |
|
] |
|
|
|
|
|
TOP_10_COUNTRIES = [ |
|
("us", "United States"), |
|
("gb", "United Kingdom"), |
|
("ca", "Canada"), |
|
("au", "Australia"), |
|
("de", "Germany"), |
|
("fr", "France"), |
|
("in", "India"), |
|
("jp", "Japan"), |
|
("br", "Brazil"), |
|
("es", "Spain") |
|
] |
|
|
|
|
|
def setup_argparse(): |
|
parser = argparse.ArgumentParser(description="Multi-Engine Search Utility using SerpApi") |
|
parser.add_argument("-q", "--query", type=str, required=True, help="Search query") |
|
parser.add_argument("-e", "--engine", |
|
choices=['google', 'bing', 'baidu', 'yandex', 'yahoo', 'ebay', 'homedepot', 'youtube', |
|
'scholar', 'walmart', 'appstore', 'naver'], default='google', |
|
help="Search engine to use") |
|
parser.add_argument("-l", "--limit", type=int, default=5, help="Number of results to return") |
|
parser.add_argument("--google_domain", type=str, default="google.com", help="Google domain to use") |
|
parser.add_argument("--gl", type=str, default="us", |
|
help="Country of the search (default: us). Top 10 common countries:\n" + |
|
"\n".join(f" {code}: {name}" for code, name in TOP_10_COUNTRIES) + |
|
"\nFor a full list of supported countries, see the documentation.") |
|
parser.add_argument("--hl", type=str, default="en", |
|
help="Language of the search (default: en). Top 10 common languages:\n" + |
|
"\n".join(f" {code}: {name}" for code, name in TOP_10_LANGUAGES) + |
|
"\nFor a full list of supported languages, see the documentation.") |
|
parser.add_argument("--location", type=str, help="Location for the search (optional)") |
|
parser.add_argument("--type", type=str, default="web", |
|
help="Type of Google search to perform. Options:\n" |
|
" web: Regular Google Search (default)\n" |
|
" image: Google Images\n" |
|
" local: Google Local\n" |
|
" video: Google Videos\n" |
|
" news: Google News\n" |
|
" shopping: Google Shopping\n" |
|
" patents: Google Patents\n") |
|
parser.add_argument("--tbs", type=str, help="Advanced search parameters") |
|
parser.add_argument("--safe", choices=['active', 'off'], default='off', help="Safe search setting") |
|
parser.add_argument("--start", type=int, default=0, help="Pagination offset") |
|
parser.add_argument("--device", choices=['desktop', 'tablet', 'mobile'], default='desktop', |
|
help="Device to emulate") |
|
parser.add_argument("-j", "--json", action="store_true", help="Output results as JSON") |
|
parser.add_argument("--output", type=str, default='', help="Name of file to output JSON result to if set") |
|
parser.add_argument("--keys", nargs='+', help="Specific keys to display in the results") |
|
return parser.parse_args() |
|
|
|
|
|
def validate_language(hl: str) -> str: |
|
if hl not in ALL_LANGUAGE_CODES: |
|
raise ValueError(f"Invalid language code: {hl}. Please use a valid language code.") |
|
return hl |
|
|
|
|
|
def validate_country(gl: str) -> str: |
|
if gl not in ALL_COUNTRY_CODES: |
|
raise ValueError(f"Invalid country code: {gl}. Please use a valid country code.") |
|
return gl |
|
|
|
|
|
def perform_search(args) -> Dict[str, Any]: |
|
""" |
|
Perform a search using the specified engine and return the results. |
|
""" |
|
params = { |
|
"q": args.query, |
|
"api_key": SERPAPI_API_KEY, |
|
"num": max(2, args.limit), |
|
"device": args.device, |
|
} |
|
|
|
if args.engine == "google": |
|
|
|
tbm = GOOGLE_SERVICES.get(args.type.lower(), "") |
|
if tbm == 'pts': |
|
params['num'] = args.limit = min(max(args.limit, 10), 100) |
|
params.update({ |
|
"google_domain": args.google_domain, |
|
"gl": validate_country(args.gl), |
|
"hl": validate_language(args.hl), |
|
"tbm": tbm, |
|
"tbs": args.tbs, |
|
"safe": args.safe, |
|
"start": args.start, |
|
}) |
|
if args.location: |
|
params["location"] = args.location |
|
elif args.engine in ["bing", "yahoo"]: |
|
params.update({ |
|
"cc": validate_country(args.gl), |
|
"setlang": validate_language(args.hl), |
|
}) |
|
|
|
|
|
|
|
params = {k: v for k, v in params.items() if v is not None} |
|
|
|
engines = { |
|
"google": GoogleSearch, |
|
"bing": BingSearch, |
|
"baidu": BaiduSearch, |
|
"yandex": YandexSearch, |
|
"yahoo": YahooSearch, |
|
"ebay": EbaySearch, |
|
"homedepot": HomeDepotSearch, |
|
"youtube": YoutubeSearch, |
|
"scholar": GoogleScholarSearch, |
|
"walmart": WalmartSearch, |
|
"appstore": AppleAppStoreSearch, |
|
"naver": NaverSearch, |
|
} |
|
|
|
search = engines[args.engine](params) |
|
return search.get_dict() |
|
|
|
|
|
def save_results_to_file(results: Dict[str, Any], filename: str) -> None: |
|
""" |
|
Save the full search results to a JSON file. |
|
""" |
|
with open(filename, 'w') as f: |
|
json.dump(results, f, indent=2) |
|
print( |
|
f"""\n# Search results for specific the keys are in this JSON file: {filename} |
|
* One can write python code to extract certain keys from the JSON file, but this file does not contain specific or detailed information for the query, you use should pass specific URLs to ask_question_about_documents.py for specific or detailed information. |
|
""") |
|
|
|
|
|
def print_results(results: Dict[str, Any], args): |
|
""" |
|
Print the keys of the search results and a couple of entries for primary results. |
|
""" |
|
if args.keys: |
|
print(f"Requested keys for query '{args.query}' using {args.engine} ({args.type} service):") |
|
for key in args.keys: |
|
if key in results: |
|
print(f"\n{key}:") |
|
print(json.dumps(results[key], indent=2)) |
|
else: |
|
print(f"\n{key}: Not found in results") |
|
else: |
|
print(f"""To extract specific keys, you can repeat the same command and chose the keys you want by using the CLI optional arg: [--keys KEYS [KEYS ...]] |
|
Keys available in the search results for query '{args.query}' using {args.engine} ({args.type} service): |
|
""") |
|
|
|
for key in results.keys(): |
|
print(f"- {key}") |
|
|
|
print("\nSample of primary results:") |
|
primary_keys = ["organic_results", "news_results", "jobs_results", "shopping_results", "images_results", |
|
"video_results", "books_results", "finance_results", "local_results", "patents"] |
|
|
|
for key in primary_keys: |
|
if key in results and isinstance(results[key], list) and len(results[key]) > 0: |
|
print(f"\n{key.replace('_', ' ').title()}:") |
|
for i, result in enumerate(results[key][:args.limit], 1): |
|
if 'title' in result: |
|
print(f" {i}. {result.get('title', '')}:") |
|
if 'link' in result: |
|
print(f" URL: {result.get('link', '')}") |
|
if 'original' in result: |
|
print(f" original: {result.get('original', '')}") |
|
if 'links' in result and 'website' in result['links']: |
|
print(f" Website: {result['links']['website']}") |
|
if 'product_link' in result: |
|
print(f" Product Link: {result['product_link']}") |
|
if 'snippet' in result: |
|
print(f" Snippet: {result['snippet']}") |
|
if 'top_stories' in result: |
|
print(f" Top Stories: {result['top_stories']}") |
|
break |
|
|
|
if args.json: |
|
if args.output: |
|
with open(args.output, 'wt') as f: |
|
json.dump(results, f, indent=2, default=str) |
|
print(f"\nFull JSON output saved to: {args.output}") |
|
else: |
|
print("\nFull JSON output:") |
|
print(json.dumps(results, indent=2, default=str)) |
|
|
|
print("""\n\nRemember web snippets are short and often non-specific. |
|
For specific information, you must use ask_question_about_documents.py on URLs or documents, |
|
ask_question_about_image.py for images, |
|
or download_web_video.py for videos, etc. |
|
If you have not found a good response to the user's original query, continue to write executable code to do so. |
|
""") |
|
|
|
|
|
def google_search(): |
|
args = setup_argparse() |
|
|
|
if not SERPAPI_API_KEY: |
|
raise ValueError("SERPAPI_API_KEY environment variable is not set.") |
|
|
|
results = perform_search(args) |
|
|
|
|
|
print_results(results, args) |
|
|
|
|
|
save_results_to_file(results, f"{args.engine}_{args.type}_search_results.json") |
|
|
|
|
|
if __name__ == "__main__": |
|
google_search() |
|
|
|
""" |
|
# Test different search engines |
|
python openai_server/agent_tools/google_search.py -q "artificial intelligence" -e google |
|
python openai_server/agent_tools/google_search.py -q "machine learning" -e bing |
|
python openai_server/agent_tools/google_search.py -q "deep learning" -e baidu |
|
python openai_server/agent_tools/google_search.py -q "neural networks" -e yandex |
|
python openai_server/agent_tools/google_search.py -q "data science" -e yahoo |
|
python openai_server/agent_tools/google_search.py -q "data science" -e scholar |
|
|
|
# Test different Google services |
|
python openai_server/agent_tools/google_search.py -q "AI images" -e google --type image |
|
python openai_server/agent_tools/google_search.py -q "AI startups near me" -e google --type local |
|
python openai_server/agent_tools/google_search.py -q "AI tutorials" -e google --type video |
|
python openai_server/agent_tools/google_search.py -q "AI breakthroughs" -e google --type news |
|
python openai_server/agent_tools/google_search.py -q "AI products" -e google --type shopping |
|
python openai_server/agent_tools/google_search.py -q "AI patents" -e google --type patents |
|
|
|
# Test with specific keys |
|
python openai_server/agent_tools/google_search.py -q "Python programming" -e google --keys organic_results search_information |
|
|
|
# Test with different languages and countries |
|
python openai_server/agent_tools/google_search.py -q "プログラミング" -e google --hl ja --gl jp |
|
python openai_server/agent_tools/google_search.py -q "programmation" -e google --hl fr --gl fr |
|
|
|
# Test with JSON output |
|
python openai_server/agent_tools/google_search.py -q "data analysis" -e google -j |
|
|
|
# Test pagination |
|
python openai_server/agent_tools/google_search.py -q "machine learning algorithms" -e google --start 10 -n 5 |
|
|
|
# Test safe search |
|
python openai_server/agent_tools/google_search.py -q "art" -e google --safe active |
|
|
|
# Test different devices |
|
python openai_server/agent_tools/google_search.py -q "responsive design" -e google --device mobile |
|
""" |
|
|