File size: 2,526 Bytes
795a986
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8712de
 
795a986
 
 
 
f8712de
 
 
795a986
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8712de
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi

def get_youtube_transcript(video_id: str) -> str:
    """
    Fetches and formats the transcript for a given YouTube video ID.

    Args:
        video_id (str): The YouTube video ID (e.g., 'dQw4w9WgXcQ').

    Returns:
        str: A formatted string containing the full transcript and detailed segments,
             or an error message if the transcript cannot be retrieved.
    """
    if not video_id:
        return "Please enter a YouTube video ID."

    try:
        # Fetch the transcript list
        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)

        # Extract full transcript text
        full_transcript_text = " ".join([item['text'] for item in transcript_list])

        # Prepare detailed transcript segments
        detailed_segments = []
        for segment in transcript_list:
            detailed_segments.append(
                f"Start: {segment['start']:.2f}s, Duration: {segment['duration']:.2f}s, Text: {segment['text']}"
            )

        # Combine results into a single string for Gradio output
        # Corrected approach: Join segments first, then format the output string
        detailed_segments_str = "\n".join(detailed_segments)
        output = (
            "Full Transcript:\n"
            f"{full_transcript_text}\n\n"
            "Detailed Transcript Segments:\n"
            f"{detailed_segments_str}"
        )

        return output

    except Exception as e:
        # Handle potential errors during transcript fetching
        error_message = (
            f"An error occurred: {e}\n"
            "Possible reasons: No transcript available for this video, "
            "invalid video ID, or network issues. "
            "Please ensure the video ID is correct and the video has captions enabled."
        )
        return error_message

# Create the Gradio interface
iface = gr.Interface(
    fn=get_youtube_transcript,
    inputs=gr.Textbox(
        label="YouTube Video ID",
        placeholder="e.g., dQw4w9WgXcQ (from youtube.com/watch?v=dQw4w9WgXcQ)"
    ),
    outputs=gr.Textbox(
        label="Transcript Output",
        lines=20,  # Adjust number of lines for better display of long transcripts
        interactive=False # Make the output box read-only
    ),
    title="YouTube Transcript Fetcher",
    description="Enter a YouTube video ID to get its full transcript and detailed segments."
)

# Launch the Gradio app
if __name__ == "__main__":
    iface.launch()