LLMSearchEngine / app.py
codelion's picture
Update app.py
f0de125 verified
raw
history blame
21.6 kB
from flask import Flask, request, render_template_string, Response, jsonify
from openai import OpenAI
import os
import json
from urllib.parse import quote, urljoin
import html
import requests
from bs4 import BeautifulSoup
app = Flask(__name__)
# Initialize OpenAI client with API key and base URL from environment variables
client = OpenAI(
api_key=os.environ["OPENAI_API_KEY"],
base_url=os.environ["OPENAI_BASE_URL"]
)
# Define constants for pagination
RESULTS_PER_PAGE = 10
TOTAL_RESULTS = 30 # Generate 30 results to allow pagination
def fetch_search_results(query):
"""Fetch search results from the LLM without streaming, minimizing hallucinations."""
if not query.strip():
return None, "Please enter a search query."
prompt = f"""
You are a highly accurate search engine designed to provide reliable and factual results. For the given query '{query}',
generate {TOTAL_RESULTS} search results. Each result must include:
- 'title': A concise, accurate title relevant to the query.
- 'snippet': A short, factual summary (2-3 sentences) based on real-world knowledge.
- 'url': A valid, existing URL from well-known websites or domains that likely exist based on your training data (e.g., wikipedia.org, python.org, nytimes.com). Do NOT generate fictional or random URLs; use only URLs you are confident are real and accessible.
Format the response as a JSON array of objects, where each object has 'title', 'snippet', and 'url' fields. Ensure the results are diverse, directly relevant to the query, and avoid any hallucinated or fabricated content. Double-check your URLs to match real entities or resources.
"""
try:
response = client.chat.completions.create(
model="gemini-2.0-flash-lite", # Adjust model name as needed
messages=[
{"role": "system", "content": "You are a helpful search engine."},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"}
)
content = response.choices[0].message.content
results = json.loads(content)
# Handle different possible JSON structures
if isinstance(results, dict) and "results" in results:
results = results["results"]
elif isinstance(results, list):
pass
else:
return None, "Error: Unexpected JSON structure."
return results, None
except Exception as e:
error_msg = str(e)
if "404" in error_msg:
return None, f"Error 404: Model or endpoint not found. Check OPENAI_BASE_URL ({os.environ['OPENAI_BASE_URL']}) and model name."
elif "401" in error_msg:
return None, "Error 401: Invalid API key. Check OPENAI_API_KEY."
else:
return None, f"Error: {error_msg}"
@app.route('/check-url', methods=['GET'])
def check_url():
"""Check if a URL is valid (returns 200) or broken, and fetch favicon if valid."""
url = request.args.get('url', '')
if not url:
return jsonify({'broken': True, 'favicon': None})
try:
# Follow redirects and check final status
response = requests.head(url, allow_redirects=True, timeout=5)
if response.status_code == 200:
# Fetch favicon for valid URLs
html_response = requests.get(url, timeout=5)
soup = BeautifulSoup(html_response.text, 'html.parser')
favicon_tag = soup.find("link", rel=["icon", "shortcut icon"])
favicon_url = favicon_tag['href'] if favicon_tag and 'href' in favicon_tag.attrs else None
if favicon_url and not favicon_url.startswith('http'):
favicon_url = urljoin(url, favicon_url) # Resolve relative URLs
return jsonify({'broken': False, 'favicon': favicon_url or '/static/default-favicon.ico'})
else:
return jsonify({'broken': True, 'favicon': None})
except requests.RequestException:
return jsonify({'broken': True, 'favicon': None})
@app.route('/', methods=['GET'])
def search_page():
"""Serve the initial page or process search with a progress bar and URL validation."""
query = request.args.get('query', '')
page = request.args.get('page', '1')
btn = request.args.get('btn', 'LLM Search')
try:
page = int(page)
except ValueError:
page = 1
# Initial page (no query yet)
if not query.strip():
html_content = """
<html>
<head>
<title>LLM Search Engine</title>
<style>
body { font-family: Arial, sans-serif; margin: 0; padding: 20px; background-color: #fff; }
.header { text-align: center; margin-bottom: 20px; }
.logo { font-size: 36px; font-weight: bold; }
.logo span:nth-child(1) { color: #4285f4; }
.logo span:nth-child(2) { color: #ea4335; }
.logo span:nth-child(3) { color: #fbbc05; }
.logo span:nth-child(4) { color: #4285f4; }
.logo span:nth-child(5) { color: #34a853; }
.search-box { max-width: 584px; margin: 0 auto; }
.search-box input[type="text"] {
width: 100%; padding: 12px 20px; font-size: 16px;
border: 1px solid #dfe1e5; border-radius: 24px;
box-shadow: 0 1px 6px rgba(0,0,0,0.28);
}
.search-box input[type="submit"] {
background-color: #f8f9fa; border: 1px solid #f8f9fa;
border-radius: 4px; color: #3c4043; font-size: 14px;
padding: 10px 16px; margin: 11px 4px; cursor: pointer;
}
.search-box input[type="submit"]:hover {
border: 1px solid #dadce0; box-shadow: 0 1px 2px rgba(0,0,0,0.1);
}
.search-buttons { text-align: center; }
.progress-container { display: none; max-width: 584px; margin: 20px auto; border: 1px solid #dfe1e5; border-radius: 10px; }
.progress-bar {
width: 0%; height: 20px; background-color: #4285f4;
border-radius: 10px; animation: progress 30s ease-out forwards;
}
@keyframes progress {
from { width: 0%; }
to { width: 100%; }
}
</style>
<script>
function showProgress() {
document.getElementById('progress').style.display = 'block';
}
function hideProgress() {
document.getElementById('progress').style.display = 'none';
}
</script>
</head>
<body>
<div class="header">
<div class="logo">
<span>L</span><span>L</span><span>M</span><span> </span><span>Search</span>
</div>
</div>
<div class="search-box">
<form method="get" action="/" onsubmit="showProgress()">
<input type="text" name="query" placeholder="Search...">
<input type="hidden" name="page" value="1">
<div class="search-buttons">
<input type="submit" name="btn" value="LLM Search">
<input type="submit" name="btn" value="I'm Feeling Lucky">
</div>
</form>
</div>
<div class="progress-container" id="progress">
<div class="progress-bar"></div>
</div>
</body>
</html>
"""
return render_template_string(html_content)
# Fetch results after showing progress bar
results, error = fetch_search_results(query)
if error:
html_content = f"""
<html>
<head>
<title>LLM Search Engine</title>
<style>
body {{ font-family: Arial, sans-serif; margin: 0; padding: 20px; background-color: #fff; }}
.header {{ text-align: center; margin-bottom: 20px; }}
.logo {{ font-size: 36px; font-weight: bold; }}
.logo span:nth-child(1) {{ color: #4285f4; }}
.logo span:nth-child(2) {{ color: #ea4335; }}
.logo span:nth-child(3) {{ color: #fbbc05; }}
.logo span:nth-child(4) {{ color: #4285f4; }}
.logo span:nth-child(5) {{ color: #34a853; }}
.search-box {{ max-width: 584px; margin: 0 auto; }}
.search-box input[type="text"] {{
width: 100%; padding: 12px 20px; font-size: 16px;
border: 1px solid #dfe1e5; border-radius: 24px;
box-shadow: 0 1px 6px rgba(0,0,0,0.28);
}}
.search-box input[type="submit"] {{
background-color: #f8f9fa; border: 1px solid #f8f9fa;
border-radius: 4px; color: #3c4043; font-size: 14px;
padding: 10px 16px; margin: 11px 4px; cursor: pointer;
}}
.search-box input[type="submit"]:hover {{
border: 1px solid #dadce0; box-shadow: 0 1px 2px rgba(0,0,0,0.1);
}}
.search-buttons {{ text-align: center; }}
.error {{ color: red; text-align: center; }}
</style>
<script>
function hideProgress() {{
document.getElementById('progress').style.display = 'none';
}}
</script>
</head>
<body onload="hideProgress()">
<div class="header">
<div class="logo">
<span>L</span><span>L</span><span>M</span><span> </span><span>Search</span>
</div>
</div>
<div class="search-box">
<form method="get" action="/" onsubmit="showProgress()">
<input type="text" name="query" value="{html.escape(query)}">
<input type="hidden" name="page" value="1">
<div class="search-buttons">
<input type="submit" name="btn" value="LLM Search">
<input type="submit" name="btn" value="I'm Feeling Lucky">
</div>
</form>
</div>
<p class="error">{error}</p>
</body>
</html>
"""
return render_template_string(html_content)
# "I'm Feeling Lucky" redirects to the first URL
if btn == "I'm Feeling Lucky":
first_url = results[0].get("url", "#") if results else "#"
return Response(f"""
<html>
<head>
<meta http-equiv="refresh" content="0; url={first_url}">
<style>
.progress-container {{ max-width: 584px; margin: 20px auto; border: 1px solid #dfe1e5; border-radius: 10px; }}
.progress-bar {{
width: 0%; height: 20px; background-color: #4285f4;
border-radius: 10px; animation: progress 30s ease-out forwards;
}}
@keyframes progress {{ from {{ width: 0%; }} to {{ width: 100%; }} }}
</style>
</head>
<body>
<div class="progress-container" id="progress">
<div class="progress-bar"></div>
</div>
<p style="text-align: center;">Redirecting to {first_url}...</p>
<script>
setTimeout(function() {{ window.location.href = "{first_url}"; }}, 100);
</script>
</body>
</html>
""", mimetype="text/html")
# Calculate pagination for "LLM Search"
start_idx = (page - 1) * RESULTS_PER_PAGE
end_idx = min(start_idx + RESULTS_PER_PAGE, len(results))
total_pages = (len(results) + RESULTS_PER_PAGE - 1) // RESULTS_PER_PAGE
if start_idx >= len(results):
html_content = f"""
<html>
<head>
<title>LLM Search Engine</title>
<style>
body {{ font-family: Arial, sans-serif; margin: 0; padding: 20px; background-color: #fff; }}
.header {{ text-align: center; margin-bottom: 20px; }}
.logo {{ font-size: 36px; font-weight: bold; }}
.logo span:nth-child(1) {{ color: #4285f4; }}
.logo span:nth-child(2) {{ color: #ea4335; }}
.logo span:nth-child(3) {{ color: #fbbc05; }}
.logo span:nth-child(4) {{ color: #4285f4; }}
.logo span:nth-child(5) {{ color: #34a853; }}
.search-box {{ max-width: 584px; margin: 0 auto; }}
.search-box input[type="text"] {{
width: 100%; padding: 12px 20px; font-size: 16px;
border: 1px solid #dfe1e5; border-radius: 24px;
box-shadow: 0 1px 6px rgba(0,0,0,0.28);
}}
.search-box input[type="submit"] {{
background-color: #f8f9fa; border: 1px solid #f8f9fa;
border-radius: 4px; color: #3c4043; font-size: 14px;
padding: 10px 16px; margin: 11px 4px; cursor: pointer;
}}
.search-box input[type="submit"]:hover {{
border: 1px solid #dadce0; box-shadow: 0 1px 2px rgba(0,0,0,0.1);
}}
.search-buttons {{ text-align: center; }}
</style>
<script>
function hideProgress() {{
document.getElementById('progress').style.display = 'none';
}}
</script>
</head>
<body onload="hideProgress()">
<div class="header">
<div class="logo">
<span>L</span><span>L</span><span>M</span><span> </span><span>Search</span>
</div>
</div>
<div class="search-box">
<form method="get" action="/" onsubmit="showProgress()">
<input type="text" name="query" value="{html.escape(query)}">
<input type="hidden" name="page" value="1">
<div class="search-buttons">
<input type="submit" name="btn" value="LLM Search">
<input type="submit" name="btn" value="I'm Feeling Lucky">
</div>
</form>
</div>
<p style="text-align: center;">No more results to display.</p>
</body>
</html>
"""
return render_template_string(html_content)
# Generate full results page for "LLM Search"
paginated_results = results[start_idx:end_idx]
html_content = f"""
<html>
<head>
<title>LLM Search Engine</title>
<style>
body {{ font-family: Arial, sans-serif; margin: 0; padding: 0; color: #202124; background-color: #fff; }}
.header {{ text-align: center; padding: 20px 0; }}
.logo {{ font-size: 36px; font-weight: bold; }}
.logo span:nth-child(1) {{ color: #4285f4; }}
.logo span:nth-child(2) {{ color: #ea4335; }}
.logo span:nth-child(3) {{ color: #fbbc05; }}
.logo span:nth-child(4) {{ color: #4285f4; }}
.logo span:nth-child(5) {{ color: #34a853; }}
.search-box {{ max-width: 584px; margin: 0 auto 20px; }}
.search-box input[type="text"] {{
width: 100%; padding: 12px 20px; font-size: 16px;
border: 1px solid #dfe1e5; border-radius: 24px;
box-shadow: 0 1px 6px rgba(0,0,0,0.28); outline: none;
}}
.search-box input[type="submit"] {{
background-color: #f8f9fa; border: 1px solid #f8f9fa;
border-radius: 4px; color: #3c4043; font-size: 14px;
padding: 10px 16px; margin: 11px 4px; cursor: pointer;
}}
.search-box input[type="submit"]:hover {{
border: 1px solid #dadce0; box-shadow: 0 1px 2px rgba(0,0,0,0.1);
}}
.search-buttons {{ text-align: center; }}
.results {{ max-width: 652px; margin: 0 auto; }}
.search-result {{ margin-bottom: 28px; }}
.search-result a {{ color: #1a0dab; font-size: 20px; text-decoration: none; display: flex; align-items: center; }}
.search-result a:hover {{ text-decoration: underline; }}
.search-result a.broken {{ color: #d93025; }}
.search-result .favicon {{ width: 16px; height: 16px; margin-right: 8px; vertical-align: middle; }}
.search-result .url {{ color: #006621; font-size: 14px; line-height: 20px; }}
.search-result p {{ color: #4d5156; font-size: 14px; line-height: 22px; margin: 0; }}
.pagination {{ text-align: center; margin: 40px 0; }}
.pagination a, .pagination span {{
color: #1a0dab; font-size: 14px; margin: 0 8px; text-decoration: none;
}}
.pagination a:hover {{ text-decoration: underline; }}
.progress-container {{ display: none; max-width: 584px; margin: 20px auto; border: 1px solid #dfe1e5; border-radius: 10px; }}
.progress-bar {{
width: 0%; height: 20px; background-color: #4285f4;
border-radius: 10px; animation: progress 30s ease-out forwards;
}}
@keyframes progress {{
from {{ width: 0%; }}
to {{ width: 100%; }}
}}
</style>
<script>
function showProgress() {{
document.getElementById('progress').style.display = 'block';
}}
function hideProgress() {{
document.getElementById('progress').style.display = 'none';
}}
async function checkLinks() {{
const links = document.querySelectorAll('.search-result a');
const promises = Array.from(links).map(async (link) => {{
const url = encodeURIComponent(link.href);
try {{
const response = await fetch('/check-url?url=' + url);
const data = await response.json();
if (data.broken) {{
link.textContent += ' [Broken Link]';
link.classList.add('broken');
}} else if (data.favicon) {{
const img = document.createElement('img');
img.src = data.favicon;
img.className = 'favicon';
img.alt = 'Favicon';
link.insertBefore(img, link.firstChild);
}}
}} catch (error) {{
link.textContent += ' [Broken Link]';
link.classList.add('broken');
}}
}});
await Promise.all(promises);
}}
window.onload = function() {{
hideProgress();
checkLinks();
}};
</script>
</head>
<body>
<div class="header">
<div class="logo">
<span>L</span><span>L</span><span>M</span><span> </span><span>Search</span>
</div>
</div>
<div class="search-box">
<form method="get" action="/" onsubmit="showProgress()">
<input type="text" name="query" value="{html.escape(query)}">
<input type="hidden" name="page" value="1">
<div class="search-buttons">
<input type="submit" name="btn" value="LLM Search">
<input type="submit" name="btn" value="I'm Feeling Lucky">
</div>
</form>
</div>
<div class="progress-container" id="progress">
<div class="progress-bar"></div>
</div>
<div class="results">
<h2 style="font-size: 18px; color: #70757a; margin-bottom: 20px;">Results for '{html.escape(query)}' (Page {page} of {total_pages})</h2>
"""
for result in paginated_results:
title = html.escape(result.get("title", "No title"))
snippet = html.escape(result.get("snippet", "No snippet"))
url = html.escape(result.get("url", "#"))
html_content += f"""
<div class="search-result">
<a href="{url}" target="_blank">{title}</a>
<div class="url">{url}</div>
<p>{snippet}</p>
</div>
"""
encoded_query = quote(query)
prev_link = f'<a href="/?query={encoded_query}&page={page-1}&btn=LLM+Search" onclick="showProgress()">Previous</a>' if page > 1 else '<span>Previous</span>'
next_link = f'<a href="/?query={encoded_query}&page={page+1}&btn=LLM+Search" onclick="showProgress()">Next</a>' if page < total_pages else '<span>Next</span>'
html_content += f"""
</div>
<div class="pagination">
{prev_link}
<span>Page {page} of {total_pages}</span>
{next_link}
</div>
</body>
</html>
"""
return render_template_string(html_content)
if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=int(os.environ.get("PORT", 5000)))