Bagoodex-Web-Search / helpers.py
abdibrokhim's picture
search for images, videos, links, and follow up questions working nice
5058660
from dotenv import load_dotenv
import os
import gradio as gr
import urllib.parse
import re
from pytube import YouTube
from typing import List, Optional, Dict
from r_types import (
SearchVideosResponse,
SearchImagesResponse,
SearchLinksResponse,
LocalMapResponse,
KnowledgeBaseResponse
)
import json
def get_video_id(url: str) -> Optional[str]:
"""
Safely retrieve the YouTube video_id from a given URL using pytube.
Returns None if the URL is invalid or an error occurs.
"""
if not url:
return None
try:
yt = YouTube(url)
return yt.video_id
except Exception:
# If the URL is invalid or pytube fails, return None
return None
def embed_video(videos: List[SearchVideosResponse]) -> str:
"""
Given a list of video data (with 'link' and 'title'),
returns an HTML string of embedded YouTube iframes.
"""
if not videos:
return "<p>No videos found.</p>"
# Collect each iframe snippet
iframes = []
for video in videos:
url = video.get("link", "")
video_id = get_video_id(url)
if not video_id:
# Skip invalid or non-parsable links
continue
title = video.get("title", "").replace('"', '\\"') # Escape quotes
iframe = f"""
<iframe
width="560"
height="315"
src="https://www.youtube.com/embed/{video_id}"
title="{title}"
frameborder="0"
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
allowfullscreen>
</iframe>
"""
iframes.append(iframe)
# If no valid videos after processing, return a fallback message
if not iframes:
return "<p>No valid YouTube videos found.</p>"
# Join all iframes into one HTML string
return "\n".join(iframes)
def get_video_thumbnail(videos: List[SearchVideosResponse]) -> str:
pass
def format_links(links) -> str:
"""
Convert a list of {'title': str, 'link': str} objects
into a bulleted Markdown string with clickable links.
"""
if not links:
return "No links found."
links_md = "**Links:**\n"
for url in links:
title = url.rstrip('/').split('/')[-1]
links_md += f"- [{title}]({url})\n"
return links_md
def embed_google_map(map_url: str) -> str:
"""
Extracts a textual location from the given Google Maps URL
and returns an embedded Google Map iframe for that location.
Assumes you have a valid API key in place of 'YOUR_API_KEY'.
"""
load_dotenv()
GOOGLE_MAPS_API_KEY = os.getenv("GOOGLE_MAPS_API_KEY")
if not map_url:
return "<p>Invalid Google Maps URL.</p>"
# Attempt to extract "San+Francisco,+CA" from the URL
match = re.search(r"/maps/place/([^/]+)", map_url)
if not match:
return "Invalid Google Maps URL. Could not extract location."
location_text = match.group(1)
# Remove query params or additional slashes from the captured group
location_text = re.split(r"[/?]", location_text)[0]
# URL-encode location to avoid issues with special characters
encoded_location = urllib.parse.quote(location_text, safe="")
embed_html = f"""
<iframe
width="600"
height="450"
style="border:0"
loading="lazy"
allowfullscreen
src="https://www.google.com/maps/embed/v1/place?key={GOOGLE_MAPS_API_KEY}&q={encoded_location}">
</iframe>
"""
return embed_html
def format_knowledge(raw_result: str) -> str:
"""
Given a dictionary of knowledge data (e.g., about a person),
produce a Markdown string summarizing that info.
"""
if not raw_result:
return 0000
# Clean up the raw JSON string
clean_json_str = cleanup_raw_json(raw_result)
print('Knowledge Data: ', clean_json_str)
try:
# Parse the cleaned JSON string
result = json.loads(clean_json_str)
title = result.get("title", "...")
type_ = result.get("type", "...")
born = result.get("born", "...")
died = result.get("died", "...")
content = f"""
**{title}**
Type: {type_}
Born: {born}
Died: {died}
"""
return content
except json.JSONDecodeError:
return "Error: Failed to parse knowledge data."
def format_followup_questions(raw_questions: str) -> str:
"""
Extracts and formats follow-up questions from a raw JSON-like string.
The input string may contain triple backticks (```json ... ```) which need to be removed before parsing.
Expected input format:
```json
{
"followup_question": [
"What materials are needed to make a slingshot?",
"How to make a slingshot more powerful?"
]
}
```
Returns a Markdown-formatted string with the follow-up questions.
"""
if not raw_questions:
return "No follow-up questions available."
# Clean up the raw JSON string
clean_json_str = cleanup_raw_json(raw_questions)
try:
# Parse the cleaned JSON string
questions_dict = json.loads(clean_json_str)
# Ensure the expected key exists
followup_list = questions_dict.get("followup_question", [])
if not isinstance(followup_list, list) or not followup_list:
return "No follow-up questions available."
# Format the questions into Markdown
questions_md = "### Follow-up Questions\n\n"
for question in followup_list:
questions_md += f"- {question}\n"
return questions_md
except json.JSONDecodeError:
return "Error: Failed to parse follow-up questions."
def cleanup_raw_json(raw_json: str) -> str:
"""
Remove triple backticks and 'json' from the beginning and end of a raw JSON string.
"""
return re.sub(r"```json|```", "", raw_json).strip()