Spaces:
Build error
Build error
from flask import Flask, render_template, request, jsonify | |
from time import sleep | |
from bs4 import BeautifulSoup | |
from requests import get | |
import urllib | |
app = Flask(__name__) | |
def _req(term, results, lang, start, proxies, timeout, safe, ssl_verify): | |
resp = get( | |
url="https://www.google.com/search", | |
headers={ | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62" | |
}, | |
params={ | |
"q": term, | |
"num": results + 2, # Prevents multiple requests | |
"hl": lang, | |
"start": start, | |
"safe": safe, | |
}, | |
proxies=proxies, | |
timeout=timeout, | |
verify=ssl_verify, | |
) | |
resp.raise_for_status() | |
return resp | |
class SearchResult: | |
def __init__(self, url, title, description): | |
self.url = url | |
self.title = title | |
self.description = description | |
def __repr__(self): | |
return f"SearchResult(url={self.url}, title={self.title}, description={self.description})" | |
def to_dict(self): # Add a method to convert to a dictionary | |
return { | |
"url": self.url, | |
"title": self.title, | |
"description": self.description, | |
} | |
def search(term, num_results=10, lang="en", proxy=None, advanced=False, sleep_interval=0, timeout=5, safe="active", ssl_verify=None): | |
"""Search the Google search engine""" | |
escaped_term = urllib.parse.quote_plus(term) # make 'site:xxx.xxx.xxx ' works. | |
# Proxy | |
proxies = None | |
if proxy: | |
if proxy.startswith("https"): | |
proxies = {"https": proxy} | |
else: | |
proxies = {"http": proxy} | |
# Fetch | |
start = 0 | |
results = [] | |
while start < num_results: | |
# Send request | |
resp = _req(escaped_term, num_results - start, | |
lang, start, proxies, timeout, safe, ssl_verify) | |
# Parse | |
soup = BeautifulSoup(resp.text, "html.parser") | |
result_block = soup.find_all("div", attrs={"class": "g"}) | |
if len(result_block) == 0: | |
start += 1 | |
for result in result_block: | |
# Find link, title, description | |
link = result.find("a", href=True) | |
title = result.find("h3") | |
description_box = result.find( | |
"div", {"style": "-webkit-line-clamp:2"}) | |
if description_box: | |
description = description_box.text | |
if link and title and description: | |
start += 1 | |
if advanced: | |
results.append(SearchResult(link["href"], title.text, description)) | |
else: | |
results.append(link["href"]) | |
sleep(sleep_interval) | |
return results | |
def index(): | |
return render_template("index.html") | |
def perform_search(): | |
search_query = request.args.get("q") | |
max_results = int(request.args.get("max_results", 10)) # Default to 10 results | |
search_results = search(term=search_query, num_results=max_results, advanced=True) | |
# Convert search results to a list of dictionaries | |
results_json = [result.to_dict() for result in search_results] | |
return jsonify(results_json) | |
if __name__ == "__main__": | |
app.run(debug=True) |