Spaces:
Sleeping
Sleeping
from flask import Flask, request, render_template_string, Response, jsonify | |
from openai import OpenAI | |
import os | |
import json | |
from urllib.parse import quote, urljoin | |
import html | |
import requests | |
from bs4 import BeautifulSoup | |
app = Flask(__name__) | |
# Initialize OpenAI client with API key and base URL from environment variables | |
client = OpenAI( | |
api_key=os.environ["OPENAI_API_KEY"], | |
base_url=os.environ["OPENAI_BASE_URL"] | |
) | |
# Define constants for pagination | |
RESULTS_PER_PAGE = 10 | |
TOTAL_RESULTS = 30 # Generate 30 results to allow pagination | |
def fetch_search_results(query): | |
"""Fetch search results from the LLM without streaming.""" | |
if not query.strip(): | |
return None, "Please enter a search query." | |
prompt = f""" | |
You are a search engine that provides informative and relevant results. For the given query '{query}', | |
generate {TOTAL_RESULTS} search results. Each result should include: | |
- 'title': A concise, descriptive title of the result. | |
- 'snippet': A short summary (2-3 sentences) of the content. | |
- 'url': A plausible, clickable URL where the information might be found (e.g., a real or hypothetical website). | |
Format the response as a JSON array of objects, where each object has 'title', 'snippet', and 'url' fields. | |
Ensure the results are diverse, relevant to the query, and the URLs are realistic (e.g., https://example.com/page). | |
""" | |
try: | |
response = client.chat.completions.create( | |
model="gemini-2.0-flash-lite", # Adjust model name as needed | |
messages=[ | |
{"role": "system", "content": "You are a helpful search engine."}, | |
{"role": "user", "content": prompt} | |
], | |
response_format={"type": "json_object"} | |
) | |
content = response.choices[0].message.content | |
results = json.loads(content) | |
# Handle different possible JSON structures | |
if isinstance(results, dict) and "results" in results: | |
results = results["results"] | |
elif isinstance(results, list): | |
pass | |
else: | |
return None, "Error: Unexpected JSON structure." | |
return results, None | |
except Exception as e: | |
error_msg = str(e) | |
if "404" in error_msg: | |
return None, f"Error 404: Model or endpoint not found. Check OPENAI_BASE_URL ({os.environ['OPENAI_BASE_URL']}) and model name." | |
elif "401" in error_msg: | |
return None, "Error 401: Invalid API key. Check OPENAI_API_KEY." | |
else: | |
return None, f"Error: {error_msg}" | |
def check_url(): | |
"""Check if a URL is valid (returns 200) or broken, and fetch favicon if valid.""" | |
url = request.args.get('url', '') | |
if not url: | |
return jsonify({'broken': True, 'favicon': None}) | |
try: | |
# Follow redirects and check final status | |
response = requests.head(url, allow_redirects=True, timeout=5) | |
if response.status_code == 200: | |
# Fetch favicon for valid URLs | |
html_response = requests.get(url, timeout=5) | |
soup = BeautifulSoup(html_response.text, 'html.parser') | |
favicon_tag = soup.find("link", rel=["icon", "shortcut icon"]) | |
favicon_url = favicon_tag['href'] if favicon_tag and 'href' in favicon_tag.attrs else None | |
if favicon_url and not favicon_url.startswith('http'): | |
favicon_url = urljoin(url, favicon_url) # Resolve relative URLs | |
return jsonify({'broken': False, 'favicon': favicon_url or '/static/default-favicon.ico'}) | |
else: | |
return jsonify({'broken': True, 'favicon': None}) | |
except requests.RequestException: | |
return jsonify({'broken': True, 'favicon': None}) | |
def search_page(): | |
"""Serve the initial page or process search with a progress bar and URL validation.""" | |
query = request.args.get('query', '') | |
page = request.args.get('page', '1') | |
btn = request.args.get('btn', 'LLM Search') | |
try: | |
page = int(page) | |
except ValueError: | |
page = 1 | |
# Initial page (no query yet) | |
if not query.strip(): | |
html_content = """ | |
<html> | |
<head> | |
<title>LLM Search Engine</title> | |
<style> | |
body { font-family: Arial, sans-serif; margin: 0; padding: 20px; background-color: #fff; } | |
.header { text-align: center; margin-bottom: 20px; } | |
.logo { font-size: 36px; font-weight: bold; } | |
.logo span:nth-child(1) { color: #4285f4; } | |
.logo span:nth-child(2) { color: #ea4335; } | |
.logo span:nth-child(3) { color: #fbbc05; } | |
.logo span:nth-child(4) { color: #4285f4; } | |
.logo span:nth-child(5) { color: #34a853; } | |
.search-box { max-width: 584px; margin: 0 auto; } | |
.search-box input[type="text"] { | |
width: 100%; padding: 12px 20px; font-size: 16px; | |
border: 1px solid #dfe1e5; border-radius: 24px; | |
box-shadow: 0 1px 6px rgba(32,33,36,0.28); | |
} | |
.search-box input[type="submit"] { | |
background-color: #f8f9fa; border: 1px solid #f8f9fa; | |
border-radius: 4px; color: #3c4043; font-size: 14px; | |
padding: 10px 16px; margin: 11px 4px; cursor: pointer; | |
} | |
.search-box input[type="submit"]:hover { | |
border: 1px solid #dadce0; box-shadow: 0 1px 2px rgba(0,0,0,0.1); | |
} | |
.search-buttons { text-align: center; } | |
.progress-container { display: none; max-width: 584px; margin: 20px auto; border: 1px solid #dfe1e5; border-radius: 10px; } | |
.progress-bar { | |
width: 0%; height: 20px; background-color: #4285f4; | |
border-radius: 10px; animation: progress 30s ease-out forwards; | |
} | |
@keyframes progress { | |
from { width: 0%; } | |
to { width: 100%; } | |
} | |
</style> | |
<script> | |
function showProgress() { | |
document.getElementById('progress').style.display = 'block'; | |
} | |
function hideProgress() { | |
document.getElementById('progress').style.display = 'none'; | |
} | |
</script> | |
</head> | |
<body> | |
<div class="header"> | |
<div class="logo"> | |
<span>L</span><span>L</span><span>M</span><span> </span><span>Search</span> | |
</div> | |
</div> | |
<div class="search-box"> | |
<form method="get" action="/" onsubmit="showProgress()"> | |
<input type="text" name="query" placeholder="Search..."> | |
<input type="hidden" name="page" value="1"> | |
<div class="search-buttons"> | |
<input type="submit" name="btn" value="LLM Search"> | |
<input type="submit" name="btn" value="I'm Feeling Lucky"> | |
</div> | |
</form> | |
</div> | |
<div class="progress-container" id="progress"> | |
<div class="progress-bar"></div> | |
</div> | |
</body> | |
</html> | |
""" | |
return render_template_string(html_content) | |
# Fetch results after showing progress bar | |
results, error = fetch_search_results(query) | |
if error: | |
html_content = f""" | |
<html> | |
<head> | |
<title>LLM Search Engine</title> | |
<style> | |
body {{ font-family: Arial, sans-serif; margin: 0; padding: 20px; background-color: #fff; }} | |
.header {{ text-align: center; margin-bottom: 20px; }} | |
.logo {{ font-size: 36px; font-weight: bold; }} | |
.logo span:nth-child(1) {{ color: #4285f4; }} | |
.logo span:nth-child(2) {{ color: #ea4335; }} | |
.logo span:nth-child(3) {{ color: #fbbc05; }} | |
.logo span:nth-child(4) {{ color: #4285f4; }} | |
.logo span:nth-child(5) {{ color: #34a853; }} | |
.search-box {{ max-width: 584px; margin: 0 auto; }} | |
.search-box input[type="text"] {{ | |
width: 100%; padding: 12px 20px; font-size: 16px; | |
border: 1px solid #dfe1e5; border-radius: 24px; | |
box-shadow: 0 1px 6px rgba(0,0,0,0.28); | |
}} | |
.search-box input[type="submit"] {{ | |
background-color: #f8f9fa; border: 1px solid #f8f9fa; | |
border-radius: 4px; color: #3c4043; font-size: 14px; | |
padding: 10px 16px; margin: 11px 4px; cursor: pointer; | |
}} | |
.search-box input[type="submit"]:hover {{ | |
border: 1px solid #dadce0; box-shadow: 0 1px 2px rgba(0,0,0,0.1); | |
}} | |
.search-buttons {{ text-align: center; }} | |
.error {{ color: red; text-align: center; }} | |
</style> | |
<script> | |
function hideProgress() {{ | |
document.getElementById('progress').style.display = 'none'; | |
}} | |
</script> | |
</head> | |
<body onload="hideProgress()"> | |
<div class="header"> | |
<div class="logo"> | |
<span>L</span><span>L</span><span>M</span><span> </span><span>Search</span> | |
</div> | |
</div> | |
<div class="search-box"> | |
<form method="get" action="/" onsubmit="showProgress()"> | |
<input type="text" name="query" value="{html.escape(query)}"> | |
<input type="hidden" name="page" value="1"> | |
<div class="search-buttons"> | |
<input type="submit" name="btn" value="LLM Search"> | |
<input type="submit" name="btn" value="I'm Feeling Lucky"> | |
</div> | |
</form> | |
</div> | |
<p class="error">{error}</p> | |
</body> | |
</html> | |
""" | |
return render_template_string(html_content) | |
# "I'm Feeling Lucky" redirects to the first URL | |
if btn == "I'm Feeling Lucky": | |
first_url = results[0].get("url", "#") if results else "#" | |
return Response(f""" | |
<html> | |
<head> | |
<meta http-equiv="refresh" content="0; url={first_url}"> | |
<style> | |
.progress-container {{ max-width: 584px; margin: 20px auto; border: 1px solid #dfe1e5; border-radius: 10px; }} | |
.progress-bar {{ | |
width: 0%; height: 20px; background-color: #4285f4; | |
border-radius: 10px; animation: progress 30s ease-out forwards; | |
}} | |
@keyframes progress {{ from {{ width: 0%; }} to {{ width: 100%; }} }} | |
</style> | |
</head> | |
<body> | |
<div class="progress-container" id="progress"> | |
<div class="progress-bar"></div> | |
</div> | |
<p style="text-align: center;">Redirecting to {first_url}...</p> | |
<script> | |
setTimeout(function() {{ window.location.href = "{first_url}"; }}, 100); | |
</script> | |
</body> | |
</html> | |
""", mimetype="text/html") | |
# Calculate pagination for "LLM Search" | |
start_idx = (page - 1) * RESULTS_PER_PAGE | |
end_idx = min(start_idx + RESULTS_PER_PAGE, len(results)) | |
total_pages = (len(results) + RESULTS_PER_PAGE - 1) // RESULTS_PER_PAGE | |
if start_idx >= len(results): | |
html_content = f""" | |
<html> | |
<head> | |
<title>LLM Search Engine</title> | |
<style> | |
body {{ font-family: Arial, sans-serif; margin: 0; padding: 20px; background-color: #fff; }} | |
.header {{ text-align: center; margin-bottom: 20px; }} | |
.logo {{ font-size: 36px; font-weight: bold; }} | |
.logo span:nth-child(1) {{ color: #4285f4; }} | |
.logo span:nth-child(2) {{ color: #ea4335; }} | |
.logo span:nth-child(3) {{ color: #fbbc05; }} | |
.logo span:nth-child(4) {{ color: #4285f4; }} | |
.logo span:nth-child(5) {{ color: #34a853; }} | |
.search-box {{ max-width: 584px; margin: 0 auto; }} | |
.search-box input[type="text"] {{ | |
width: 100%; padding: 12px 20px; font-size: 16px; | |
border: 1px solid #dfe1e5; border-radius: 24px; | |
box-shadow: 0 1px 6px rgba(0,0,0,0.28); | |
}} | |
.search-box input[type="submit"] {{ | |
background-color: #f8f9fa; border: 1px solid #f8f9fa; | |
border-radius: 4px; color: #3c4043; font-size: 14px; | |
padding: 10px 16px; margin: 11px 4px; cursor: pointer; | |
}} | |
.search-box input[type="submit"]:hover {{ | |
border: 1px solid #dadce0; box-shadow: 0 1px 2px rgba(0,0,0,0.1); | |
}} | |
.search-buttons {{ text-align: center; }} | |
</style> | |
<script> | |
function hideProgress() {{ | |
document.getElementById('progress').style.display = 'none'; | |
}} | |
</script> | |
</head> | |
<body onload="hideProgress()"> | |
<div class="header"> | |
<div class="logo"> | |
<span>L</span><span>L</span><span>M</span><span> </span><span>Search</span> | |
</div> | |
</div> | |
<div class="search-box"> | |
<form method="get" action="/" onsubmit="showProgress()"> | |
<input type="text" name="query" value="{html.escape(query)}"> | |
<input type="hidden" name="page" value="1"> | |
<div class="search-buttons"> | |
<input type="submit" name="btn" value="LLM Search"> | |
<input type="submit" name="btn" value="I'm Feeling Lucky"> | |
</div> | |
</form> | |
</div> | |
<p style="text-align: center;">No more results to display.</p> | |
</body> | |
</html> | |
""" | |
return render_template_string(html_content) | |
# Generate full results page for "LLM Search" | |
paginated_results = results[start_idx:end_idx] | |
html_content = f""" | |
<html> | |
<head> | |
<title>LLM Search Engine</title> | |
<style> | |
body {{ font-family: Arial, sans-serif; margin: 0; padding: 0; color: #202124; background-color: #fff; }} | |
.header {{ text-align: center; padding: 20px 0; }} | |
.logo {{ font-size: 36px; font-weight: bold; }} | |
.logo span:nth-child(1) {{ color: #4285f4; }} | |
.logo span:nth-child(2) {{ color: #ea4335; }} | |
.logo span:nth-child(3) {{ color: #fbbc05; }} | |
.logo span:nth-child(4) {{ color: #4285f4; }} | |
.logo span:nth-child(5) {{ color: #34a853; }} | |
.search-box {{ max-width: 584px; margin: 0 auto 20px; }} | |
.search-box input[type="text"] {{ | |
width: 100%; padding: 12px 20px; font-size: 16px; | |
border: 1px solid #dfe1e5; border-radius: 24px; | |
box-shadow: 0 1px 6px rgba(0,0,0,0.28); outline: none; | |
}} | |
.search-box input[type="submit"] {{ | |
background-color: #f8f9fa; border: 1px solid #f8f9fa; | |
border-radius: 4px; color: #3c4043; font-size: 14px; | |
padding: 10px 16px; margin: 11px 4px; cursor: pointer; | |
}} | |
.search-box input[type="submit"]:hover {{ | |
border: 1px solid #dadce0; box-shadow: 0 1px 2px rgba(0,0,0,0.1); | |
}} | |
.search-buttons {{ text-align: center; }} | |
.results {{ max-width: 652px; margin: 0 auto; }} | |
.search-result {{ margin-bottom: 28px; }} | |
.search-result a {{ color: #1a0dab; font-size: 20px; text-decoration: none; display: flex; align-items: center; }} | |
.search-result a:hover {{ text-decoration: underline; }} | |
.search-result a.broken {{ color: #d93025; }} | |
.search-result .favicon {{ width: 16px; height: 16px; margin-right: 8px; vertical-align: middle; }} | |
.search-result .url {{ color: #006621; font-size: 14px; line-height: 20px; }} | |
.search-result p {{ color: #4d5156; font-size: 14px; line-height: 22px; margin: 0; }} | |
.pagination {{ text-align: center; margin: 40px 0; }} | |
.pagination a, .pagination span {{ | |
color: #1a0dab; font-size: 14px; margin: 0 8px; text-decoration: none; | |
}} | |
.pagination a:hover {{ text-decoration: underline; }} | |
.progress-container {{ display: none; max-width: 584px; margin: 20px auto; border: 1px solid #dfe1e5; border-radius: 10px; }} | |
.progress-bar {{ | |
width: 0%; height: 20px; background-color: #4285f4; | |
border-radius: 10px; animation: progress 30s ease-out forwards; | |
}} | |
@keyframes progress {{ | |
from {{ width: 0%; }} | |
to {{ width: 100%; }} | |
}} | |
</style> | |
<script> | |
function showProgress() {{ | |
document.getElementById('progress').style.display = 'block'; | |
}} | |
function hideProgress() {{ | |
document.getElementById('progress').style.display = 'none'; | |
}} | |
async function checkLinks() {{ | |
const links = document.querySelectorAll('.search-result a'); | |
const promises = Array.from(links).map(async (link) => {{ | |
const url = encodeURIComponent(link.href); | |
try {{ | |
const response = await fetch('/check-url?url=' + url); | |
const data = await response.json(); | |
if (data.broken) {{ | |
link.textContent += ' [Broken Link]'; | |
link.classList.add('broken'); | |
}} else if (data.favicon) {{ | |
const img = document.createElement('img'); | |
img.src = data.favicon; | |
img.className = 'favicon'; | |
img.alt = 'Favicon'; | |
link.insertBefore(img, link.firstChild); | |
}} | |
}} catch (error) {{ | |
link.textContent += ' [Broken Link]'; | |
link.classList.add('broken'); | |
}} | |
}}); | |
await Promise.all(promises); | |
}} | |
window.onload = function() {{ | |
hideProgress(); | |
checkLinks(); | |
}}; | |
</script> | |
</head> | |
<body> | |
<div class="header"> | |
<div class="logo"> | |
<span>L</span><span>L</span><span>M</span><span> </span><span>Search</span> | |
</div> | |
</div> | |
<div class="search-box"> | |
<form method="get" action="/" onsubmit="showProgress()"> | |
<input type="text" name="query" value="{html.escape(query)}"> | |
<input type="hidden" name="page" value="1"> | |
<div class="search-buttons"> | |
<input type="submit" name="btn" value="LLM Search"> | |
<input type="submit" name="btn" value="I'm Feeling Lucky"> | |
</div> | |
</form> | |
</div> | |
<div class="progress-container" id="progress"> | |
<div class="progress-bar"></div> | |
</div> | |
<div class="results"> | |
<h2 style="font-size: 18px; color: #70757a; margin-bottom: 20px;">Results for '{html.escape(query)}' (Page {page} of {total_pages})</h2> | |
""" | |
for result in paginated_results: | |
title = html.escape(result.get("title", "No title")) | |
snippet = html.escape(result.get("snippet", "No snippet")) | |
url = html.escape(result.get("url", "#")) | |
html_content += f""" | |
<div class="search-result"> | |
<a href="{url}" target="_blank">{title}</a> | |
<div class="url">{url}</div> | |
<p>{snippet}</p> | |
</div> | |
""" | |
encoded_query = quote(query) | |
prev_link = f'<a href="/?query={encoded_query}&page={page-1}&btn=LLM+Search" onclick="showProgress()">Previous</a>' if page > 1 else '<span>Previous</span>' | |
next_link = f'<a href="/?query={encoded_query}&page={page+1}&btn=LLM+Search" onclick="showProgress()">Next</a>' if page < total_pages else '<span>Next</span>' | |
html_content += f""" | |
</div> | |
<div class="pagination"> | |
{prev_link} | |
<span>Page {page} of {total_pages}</span> | |
{next_link} | |
</div> | |
</body> | |
</html> | |
""" | |
return render_template_string(html_content) | |
if __name__ == '__main__': | |
app.run(debug=True, host='0.0.0.0', port=int(os.environ.get("PORT", 5000))) |