Spaces:
Sleeping
Sleeping
from flask import Flask, request, jsonify | |
import requests | |
from bs4 import BeautifulSoup | |
app = Flask(__name__) | |
# Example URL, modify based on the search page you're targeting | |
url = "https://html.duckduckgo.com/html/" | |
# Function to fetch results from a specific page (1st page: page_num=1, 2nd page: page_num=2, etc.) | |
def fetch_duckduckgo_results(search_query, page_num=1): | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36' | |
} | |
# DuckDuckGo uses 's' parameter for result offset, calculate offset for pages (e.g., 0 for first, 50 for second) | |
offset = (page_num - 1) * 50 | |
# Search query parameters (assuming DuckDuckGo), 's' is for pagination | |
params = {'q': search_query, 's': offset} | |
# Send request to DuckDuckGo or relevant HTML page | |
response = requests.get(url, headers=headers, params=params) | |
# Check if request was successful | |
if response.status_code != 200: | |
return {"error": f"Failed to retrieve data: {response.status_code}"} | |
# Parse HTML content | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# Find all result blocks | |
results = [] | |
for result in soup.find_all('div', class_='result__body'): | |
title_tag = result.find('h2', class_='result__title') | |
snippet_tag = result.find('a', class_='result__snippet') | |
icon_tag = result.find('img', class_='result__icon__img') | |
url_tag = result.find('a', class_='result__a') | |
if title_tag and snippet_tag and icon_tag and url_tag: | |
result_data = { | |
'title': title_tag.text.strip(), | |
'description': snippet_tag.text.strip(), | |
'icon_url': "https:" + icon_tag['src'] if icon_tag else None, | |
'url': url_tag['href'] | |
} | |
results.append(result_data) | |
return results | |
# API endpoint for fetching search results | |
def search(): | |
# Get query parameters | |
search_query = request.args.get('query', default='', type=str) | |
page_num = request.args.get('page', default=1, type=int) | |
if not search_query: | |
return jsonify({"error": "Search query is required"}), 400 | |
# Fetch results from DuckDuckGo | |
results = fetch_duckduckgo_results(search_query, page_num) | |
# Return results as JSON | |
return jsonify(results) | |
# Run the Flask application | |
if __name__ == '__main__': | |
app.run(host="0.0.0.0", port=7860) | |