traversaal-ai's picture
Update app.py
f8712de verified
import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi
def get_youtube_transcript(video_id: str) -> str:
"""
Fetches and formats the transcript for a given YouTube video ID.
Args:
video_id (str): The YouTube video ID (e.g., 'dQw4w9WgXcQ').
Returns:
str: A formatted string containing the full transcript and detailed segments,
or an error message if the transcript cannot be retrieved.
"""
if not video_id:
return "Please enter a YouTube video ID."
try:
# Fetch the transcript list
transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
# Extract full transcript text
full_transcript_text = " ".join([item['text'] for item in transcript_list])
# Prepare detailed transcript segments
detailed_segments = []
for segment in transcript_list:
detailed_segments.append(
f"Start: {segment['start']:.2f}s, Duration: {segment['duration']:.2f}s, Text: {segment['text']}"
)
# Combine results into a single string for Gradio output
# Corrected approach: Join segments first, then format the output string
detailed_segments_str = "\n".join(detailed_segments)
output = (
"Full Transcript:\n"
f"{full_transcript_text}\n\n"
"Detailed Transcript Segments:\n"
f"{detailed_segments_str}"
)
return output
except Exception as e:
# Handle potential errors during transcript fetching
error_message = (
f"An error occurred: {e}\n"
"Possible reasons: No transcript available for this video, "
"invalid video ID, or network issues. "
"Please ensure the video ID is correct and the video has captions enabled."
)
return error_message
# Create the Gradio interface
iface = gr.Interface(
fn=get_youtube_transcript,
inputs=gr.Textbox(
label="YouTube Video ID",
placeholder="e.g., dQw4w9WgXcQ (from youtube.com/watch?v=dQw4w9WgXcQ)"
),
outputs=gr.Textbox(
label="Transcript Output",
lines=20, # Adjust number of lines for better display of long transcripts
interactive=False # Make the output box read-only
),
title="YouTube Transcript Fetcher",
description="Enter a YouTube video ID to get its full transcript and detailed segments."
)
# Launch the Gradio app
if __name__ == "__main__":
iface.launch()