Spaces:

AimlAPI
/

Bagoodex-Web-Search

Runtime error

File size: 5,994 Bytes

from dotenv import load_dotenv
import os
import gradio as gr
import urllib.parse
import re
from pytube import YouTube
from typing import List, Optional, Dict
from r_types import (
    SearchVideosResponse,
    SearchImagesResponse,
    SearchLinksResponse,
    LocalMapResponse,
    KnowledgeBaseResponse
)
import json


def get_video_id(url: str) -> Optional[str]:
    """
    Safely retrieve the YouTube video_id from a given URL using pytube.
    Returns None if the URL is invalid or an error occurs.
    """
    if not url:
        return None

    try:
        yt = YouTube(url)
        return yt.video_id
    except Exception:
        # If the URL is invalid or pytube fails, return None
        return None


def embed_video(videos: List[SearchVideosResponse]) -> str:
    """
    Given a list of video data (with 'link' and 'title'),
    returns an HTML string of embedded YouTube iframes.
    """
    if not videos:
        return "<p>No videos found.</p>"

    # Collect each iframe snippet
    iframes = []
    for video in videos:
        url = video.get("link", "")
        video_id = get_video_id(url)
        if not video_id:
            # Skip invalid or non-parsable links
            continue

        title = video.get("title", "").replace('"', '\\"')  # Escape quotes
        iframe = f"""
        <iframe 
            width="560" 
            height="315" 
            src="https://www.youtube.com/embed/{video_id}" 
            title="{title}" 
            frameborder="0" 
            allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" 
            allowfullscreen>
        </iframe>
        """
        iframes.append(iframe)

    # If no valid videos after processing, return a fallback message
    if not iframes:
        return "<p>No valid YouTube videos found.</p>"

    # Join all iframes into one HTML string
    return "\n".join(iframes)

def get_video_thumbnail(videos: List[SearchVideosResponse]) -> str:
    pass

def format_links(links) -> str:
    """
    Convert a list of {'title': str, 'link': str} objects
    into a bulleted Markdown string with clickable links.
    """
    if not links:
        return "No links found."

    links_md = "**Links:**\n"
    for url in links:
        title = url.rstrip('/').split('/')[-1]
        links_md += f"- [{title}]({url})\n"
    return links_md


def embed_google_map(map_url: str) -> str:
    """
    Extracts a textual location from the given Google Maps URL
    and returns an embedded Google Map iframe for that location.
    Assumes you have a valid API key in place of 'YOUR_API_KEY'.
    """
    load_dotenv()
    GOOGLE_MAPS_API_KEY = os.getenv("GOOGLE_MAPS_API_KEY")

    if not map_url:
        return "<p>Invalid Google Maps URL.</p>"

    # Attempt to extract "San+Francisco,+CA" from the URL
    match = re.search(r"/maps/place/([^/]+)", map_url)
    if not match:
        return "Invalid Google Maps URL. Could not extract location."

    location_text = match.group(1)
    # Remove query params or additional slashes from the captured group
    location_text = re.split(r"[/?]", location_text)[0]

    # URL-encode location to avoid issues with special characters
    encoded_location = urllib.parse.quote(location_text, safe="")

    embed_html = f"""
    <iframe
      width="600"
      height="450"
      style="border:0"
      loading="lazy"
      allowfullscreen
      src="https://www.google.com/maps/embed/v1/place?key={GOOGLE_MAPS_API_KEY}&q={encoded_location}">
    </iframe>
    """
    return embed_html


def format_knowledge(raw_result: str) -> str:
    """
    Given a dictionary of knowledge data (e.g., about a person),
    produce a Markdown string summarizing that info.
    """

    if not raw_result:
        return 0000
    
    # Clean up the raw JSON string
    clean_json_str = cleanup_raw_json(raw_result)
    print('Knowledge Data: ', clean_json_str)

    try:
        # Parse the cleaned JSON string
        result = json.loads(clean_json_str)
        title = result.get("title", "...")
        type_ = result.get("type", "...")
        born = result.get("born", "...")
        died = result.get("died", "...")

        content = f"""
    **{title}**  
    Type: {type_}  
    Born: {born}  
    Died: {died}
        """
        return content
    except json.JSONDecodeError:
        return "Error: Failed to parse knowledge data."



def format_followup_questions(raw_questions: str) -> str:
    """
    Extracts and formats follow-up questions from a raw JSON-like string.

    The input string may contain triple backticks (```json ... ```) which need to be removed before parsing.

    Expected input format:
    ```json
    {
        "followup_question": [
            "What materials are needed to make a slingshot?", 
            "How to make a slingshot more powerful?"
        ]
    }
    ```

    Returns a Markdown-formatted string with the follow-up questions.
    """

    if not raw_questions:
        return "No follow-up questions available."
    
    # Clean up the raw JSON string
    clean_json_str = cleanup_raw_json(raw_questions)

    try:
        # Parse the cleaned JSON string
        questions_dict = json.loads(clean_json_str)
        
        # Ensure the expected key exists
        followup_list = questions_dict.get("followup_question", [])
        
        if not isinstance(followup_list, list) or not followup_list:
            return "No follow-up questions available."

        # Format the questions into Markdown
        questions_md = "### Follow-up Questions\n\n"
        for question in followup_list:
            questions_md += f"- {question}\n"

        return questions_md
    
    except json.JSONDecodeError:
        return "Error: Failed to parse follow-up questions."

def cleanup_raw_json(raw_json: str) -> str:
    """
    Remove triple backticks and 'json' from the beginning and end of a raw JSON string.
    """
    return re.sub(r"```json|```", "", raw_json).strip()