Bagoodex-Web-Search / helpers.py
abdibrokhim's picture
tons of changes
21ffffb
raw
history blame
5.43 kB
from dotenv import load_dotenv
import os
import gradio as gr
import urllib.parse
import re
from pytube import YouTube
from typing import List, Optional
from r_types import (
SearchVideosResponse,
SearchImagesResponse,
SearchLinksResponse,
LocalMapResponse,
KnowledgeBaseResponse
)
def get_video_id(url: str) -> Optional[str]:
"""
Safely retrieve the YouTube video_id from a given URL using pytube.
Returns None if the URL is invalid or an error occurs.
"""
if not url:
return None
try:
yt = YouTube(url)
return yt.video_id
except Exception:
# If the URL is invalid or pytube fails, return None
return None
def embed_video(videos: List[SearchVideosResponse]) -> str:
"""
Given a list of video data (with 'link' and 'title'),
returns an HTML string of embedded YouTube iframes.
"""
if not videos:
return "<p>No videos found.</p>"
# Collect each iframe snippet
iframes = []
for video in videos:
url = video.get("link", "")
video_id = get_video_id(url)
if not video_id:
# Skip invalid or non-parsable links
continue
title = video.get("title", "").replace('"', '\\"') # Escape quotes
iframe = f"""
<iframe
width="560"
height="315"
src="https://www.youtube.com/embed/{video_id}"
title="{title}"
frameborder="0"
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
allowfullscreen>
</iframe>
"""
iframes.append(iframe)
# If no valid videos after processing, return a fallback message
if not iframes:
return "<p>No valid YouTube videos found.</p>"
# Join all iframes into one HTML string
return "\n".join(iframes)
def embed_image(json_data: SearchImagesResponse) -> str:
"""
Given image data with 'original' (URL) and 'title',
returns an HTML string with an <img> tag.
"""
title = json_data.get("title", "").replace('"', '\\"')
original = json_data.get("original", "")
if not original:
return "<p>No image URL provided.</p>"
embed_html = f"""
<img src="{original}" alt="{title}" style="width:100%">
"""
return embed_html
def build_search_links_response(urls: List[str]) -> List[SearchLinksResponse]:
"""
Convert raw URLs into a list of dicts,
each with 'title' and 'link' keys for display.
"""
results = []
for url in urls:
# Extract the last part of the URL as a rough "title"
raw_title = url.rstrip("/").split("/")[-1]
# Decode URL-encoded entities like %20
decoded_title = urllib.parse.unquote(raw_title)
# Replace hyphens/underscores with spaces
nice_title = decoded_title.replace("_", " ").replace("-", " ")
results.append({"title": nice_title, "link": url})
return results
def format_links(links: List[SearchLinksResponse]) -> str:
"""
Convert a list of {'title': str, 'link': str} objects
into a bulleted Markdown string with clickable links.
"""
if not links:
return "No links found."
links_md = "### Links\n\n"
for item in links:
links_md += f"- [{item['title']}]({item['link']})\n"
return links_md
def embed_google_map(map_url: str) -> str:
"""
Extracts a textual location from the given Google Maps URL
and returns an embedded Google Map iframe for that location.
Assumes you have a valid API key in place of 'YOUR_API_KEY'.
"""
load_dotenv()
GOOGLE_MAPS_API_KEY = os.getenv("GOOGLE_MAPS_API_KEY")
if not map_url:
return "<p>Invalid Google Maps URL.</p>"
# Attempt to extract "San+Francisco,+CA" from the URL
match = re.search(r"/maps/place/([^/]+)", map_url)
if not match:
return "Invalid Google Maps URL. Could not extract location."
location_text = match.group(1)
# Remove query params or additional slashes from the captured group
location_text = re.split(r"[/?]", location_text)[0]
# URL-encode location to avoid issues with special characters
encoded_location = urllib.parse.quote(location_text, safe="")
embed_html = f"""
<iframe
width="600"
height="450"
style="border:0"
loading="lazy"
allowfullscreen
src="https://www.google.com/maps/embed/v1/place?key={GOOGLE_MAPS_API_KEY}&q={encoded_location}">
</iframe>
"""
return embed_html
def format_knowledge(result: KnowledgeBaseResponse) -> str:
"""
Given a dictionary of knowledge data (e.g., about a person),
produce a Markdown string summarizing that info.
"""
title = result.get("title", "Unknown")
type_ = result.get("type", "")
born = result.get("born", "")
died = result.get("died", "")
content = f"""
**{title}**
Type: {type_}
Born: {born}
Died: {died}
"""
return content
def format_followup_questions(questions: List[str]) -> str:
"""
Given a list of follow-up questions, return a Markdown string
with each question as a bulleted list item.
"""
if not questions:
return "No follow-up questions provided."
questions_md = "### Follow-up Questions\n\n"
for question in questions:
questions_md += f"- {question}\n"
return questions_md