Spaces:

AimlAPI
/

Bagoodex-Web-Search

Runtime error

App Files Files Community

Bagoodex-Web-Search / helpers.py

abdibrokhim

search for images, videos, links, and follow up questions working nice

5058660 5 months ago

raw

history blame contribute delete

5.99 kB

	from dotenv import load_dotenv
	import os
	import gradio as gr
	import urllib.parse
	import re
	from pytube import YouTube
	from typing import List, Optional, Dict
	from r_types import (
	SearchVideosResponse,
	SearchImagesResponse,
	SearchLinksResponse,
	LocalMapResponse,
	KnowledgeBaseResponse
	)
	import json


	def get_video_id(url: str) -> Optional[str]:
	"""
	Safely retrieve the YouTube video_id from a given URL using pytube.
	Returns None if the URL is invalid or an error occurs.
	"""
	if not url:
	return None

	try:
	yt = YouTube(url)
	return yt.video_id
	except Exception:
	# If the URL is invalid or pytube fails, return None
	return None


	def embed_video(videos: List[SearchVideosResponse]) -> str:
	"""
	Given a list of video data (with 'link' and 'title'),
	returns an HTML string of embedded YouTube iframes.
	"""
	if not videos:
	return "<p>No videos found.</p>"

	# Collect each iframe snippet
	iframes = []
	for video in videos:
	url = video.get("link", "")
	video_id = get_video_id(url)
	if not video_id:
	# Skip invalid or non-parsable links
	continue

	title = video.get("title", "").replace('"', '\\"') # Escape quotes
	iframe = f"""
	<iframe
	width="560"
	height="315"
	src="https://www.youtube.com/embed/{video_id}"
	title="{title}"
	frameborder="0"
	allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
	allowfullscreen>
	</iframe>
	"""
	iframes.append(iframe)

	# If no valid videos after processing, return a fallback message
	if not iframes:
	return "<p>No valid YouTube videos found.</p>"

	# Join all iframes into one HTML string
	return "\n".join(iframes)

	def get_video_thumbnail(videos: List[SearchVideosResponse]) -> str:
	pass

	def format_links(links) -> str:
	"""
	Convert a list of {'title': str, 'link': str} objects
	into a bulleted Markdown string with clickable links.
	"""
	if not links:
	return "No links found."

	links_md = "Links:\n"
	for url in links:
	title = url.rstrip('/').split('/')[-1]
	links_md += f"- [{title}]({url})\n"
	return links_md


	def embed_google_map(map_url: str) -> str:
	"""
	Extracts a textual location from the given Google Maps URL
	and returns an embedded Google Map iframe for that location.
	Assumes you have a valid API key in place of 'YOUR_API_KEY'.
	"""
	load_dotenv()
	GOOGLE_MAPS_API_KEY = os.getenv("GOOGLE_MAPS_API_KEY")

	if not map_url:
	return "<p>Invalid Google Maps URL.</p>"

	# Attempt to extract "San+Francisco,+CA" from the URL
	match = re.search(r"/maps/place/([^/]+)", map_url)
	if not match:
	return "Invalid Google Maps URL. Could not extract location."

	location_text = match.group(1)
	# Remove query params or additional slashes from the captured group
	location_text = re.split(r"[/?]", location_text)[0]

	# URL-encode location to avoid issues with special characters
	encoded_location = urllib.parse.quote(location_text, safe="")

	embed_html = f"""
	<iframe
	width="600"
	height="450"
	style="border:0"
	loading="lazy"
	allowfullscreen
	src="https://www.google.com/maps/embed/v1/place?key={GOOGLE_MAPS_API_KEY}&q={encoded_location}">
	</iframe>
	"""
	return embed_html


	def format_knowledge(raw_result: str) -> str:
	"""
	Given a dictionary of knowledge data (e.g., about a person),
	produce a Markdown string summarizing that info.
	"""

	if not raw_result:
	return 0000

	# Clean up the raw JSON string
	clean_json_str = cleanup_raw_json(raw_result)
	print('Knowledge Data: ', clean_json_str)

	try:
	# Parse the cleaned JSON string
	result = json.loads(clean_json_str)
	title = result.get("title", "...")
	type_ = result.get("type", "...")
	born = result.get("born", "...")
	died = result.get("died", "...")

	content = f"""
	{title}
	Type: {type_}
	Born: {born}
	Died: {died}
	"""
	return content
	except json.JSONDecodeError:
	return "Error: Failed to parse knowledge data."



	def format_followup_questions(raw_questions: str) -> str:
	"""
	Extracts and formats follow-up questions from a raw JSON-like string.

	The input string may contain triple backticks (```json ... ```) which need to be removed before parsing.

	Expected input format:
	```json
	{
	"followup_question": [
	"What materials are needed to make a slingshot?",
	"How to make a slingshot more powerful?"
	]
	}
	```

	Returns a Markdown-formatted string with the follow-up questions.
	"""

	if not raw_questions:
	return "No follow-up questions available."

	# Clean up the raw JSON string
	clean_json_str = cleanup_raw_json(raw_questions)

	try:
	# Parse the cleaned JSON string
	questions_dict = json.loads(clean_json_str)

	# Ensure the expected key exists
	followup_list = questions_dict.get("followup_question", [])

	if not isinstance(followup_list, list) or not followup_list:
	return "No follow-up questions available."

	# Format the questions into Markdown
	questions_md = "### Follow-up Questions\n\n"
	for question in followup_list:
	questions_md += f"- {question}\n"

	return questions_md

	except json.JSONDecodeError:
	return "Error: Failed to parse follow-up questions."

	def cleanup_raw_json(raw_json: str) -> str:
	"""
	Remove triple backticks and 'json' from the beginning and end of a raw JSON string.
	"""
	return re.sub(r"```json\|```", "", raw_json).strip()