Spaces:

AmmarFahmy
/

AutoRAG_llama3_groq

Runtime error

AutoRAG_llama3_groq / phi /tools /youtube_toolkit.py

AmmarFahmy

adding all files

105b369 about 1 year ago

4.64 kB

	import json
	from urllib.parse import urlparse, parse_qs, urlencode
	from urllib.request import urlopen
	from typing import Optional, List

	from phi.tools import Toolkit

	try:
	from youtube_transcript_api import YouTubeTranscriptApi
	except ImportError:
	raise ImportError(
	"`youtube_transcript_api` not installed. Please install using `pip install youtube_transcript_api`"
	)


	class YouTubeTools(Toolkit):
	def __init__(
	self,
	get_video_captions: bool = True,
	get_video_data: bool = True,
	languages: Optional[List[str]] = None,
	):
	super().__init__(name="youtube_toolkit")

	self.languages: Optional[List[str]] = languages
	if get_video_captions:
	self.register(self.get_youtube_video_captions)
	if get_video_data:
	self.register(self.get_youtube_video_data)

	def get_youtube_video_id(self, url: str) -> Optional[str]:
	"""Function to get the video ID from a YouTube URL.

	Args:
	url: The URL of the YouTube video.

	Returns:
	str: The video ID of the YouTube video.
	"""
	parsed_url = urlparse(url)
	hostname = parsed_url.hostname

	if hostname == "youtu.be":
	return parsed_url.path[1:]
	if hostname in ("www.youtube.com", "youtube.com"):
	if parsed_url.path == "/watch":
	query_params = parse_qs(parsed_url.query)
	return query_params.get("v", [None])[0]
	if parsed_url.path.startswith("/embed/"):
	return parsed_url.path.split("/")[2]
	if parsed_url.path.startswith("/v/"):
	return parsed_url.path.split("/")[2]
	return None

	def get_youtube_video_data(self, url: str) -> str:
	"""Function to get video data from a YouTube URL.
	Data returned includes {title, author_name, author_url, type, height, width, version, provider_name, provider_url, thumbnail_url}

	Args:
	url: The URL of the YouTube video.

	Returns:
	str: JSON data of the YouTube video.
	"""
	if not url:
	return "No URL provided"

	try:
	video_id = self.get_youtube_video_id(url)
	except Exception:
	return "Error getting video ID from URL, please provide a valid YouTube url"

	try:
	params = {"format": "json", "url": f"https://www.youtube.com/watch?v={video_id}"}
	url = "https://www.youtube.com/oembed"
	query_string = urlencode(params)
	url = url + "?" + query_string

	with urlopen(url) as response:
	response_text = response.read()
	video_data = json.loads(response_text.decode())
	clean_data = {
	"title": video_data.get("title"),
	"author_name": video_data.get("author_name"),
	"author_url": video_data.get("author_url"),
	"type": video_data.get("type"),
	"height": video_data.get("height"),
	"width": video_data.get("width"),
	"version": video_data.get("version"),
	"provider_name": video_data.get("provider_name"),
	"provider_url": video_data.get("provider_url"),
	"thumbnail_url": video_data.get("thumbnail_url"),
	}
	return json.dumps(clean_data, indent=4)
	except Exception as e:
	return f"Error getting video data: {e}"

	def get_youtube_video_captions(self, url: str) -> str:
	"""Use this function to get captions from a YouTube video.

	Args:
	url: The URL of the YouTube video.

	Returns:
	str: The captions of the YouTube video.
	"""
	if not url:
	return "No URL provided"

	try:
	video_id = self.get_youtube_video_id(url)
	except Exception:
	return "Error getting video ID from URL, please provide a valid YouTube url"

	try:
	captions = None
	if self.languages:
	captions = YouTubeTranscriptApi.get_transcript(video_id, languages=self.languages)
	else:
	captions = YouTubeTranscriptApi.get_transcript(video_id)
	# logger.debug(f"Captions for video {video_id}: {captions}")
	if captions:
	return " ".join(line["text"] for line in captions)
	return "No captions found for video"
	except Exception as e:
	return f"Error getting captions for video: {e}"