traversaal-ai commited on
Commit
795a986
·
verified ·
1 Parent(s): 730ce43

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -0
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from youtube_transcript_api import YouTubeTranscriptApi
3
+
4
+ def get_youtube_transcript(video_id: str) -> str:
5
+ """
6
+ Fetches and formats the transcript for a given YouTube video ID.
7
+
8
+ Args:
9
+ video_id (str): The YouTube video ID (e.g., 'dQw4w9WgXcQ').
10
+
11
+ Returns:
12
+ str: A formatted string containing the full transcript and detailed segments,
13
+ or an error message if the transcript cannot be retrieved.
14
+ """
15
+ if not video_id:
16
+ return "Please enter a YouTube video ID."
17
+
18
+ try:
19
+ # Fetch the transcript list
20
+ transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
21
+
22
+ # Extract full transcript text
23
+ full_transcript_text = " ".join([item['text'] for item in transcript_list])
24
+
25
+ # Prepare detailed transcript segments
26
+ detailed_segments = []
27
+ for segment in transcript_list:
28
+ detailed_segments.append(
29
+ f"Start: {segment['start']:.2f}s, Duration: {segment['duration']:.2f}s, Text: {segment['text']}"
30
+ )
31
+
32
+ # Combine results into a single string for Gradio output
33
+ output = (
34
+ "Full Transcript:\n"
35
+ f"{full_transcript_text}\n\n"
36
+ "Detailed Transcript Segments:\n"
37
+ f"{'\\n'.join(detailed_segments)}"
38
+ )
39
+ return output
40
+
41
+ except Exception as e:
42
+ # Handle potential errors during transcript fetching
43
+ error_message = (
44
+ f"An error occurred: {e}\n"
45
+ "Possible reasons: No transcript available for this video, "
46
+ "invalid video ID, or network issues. "
47
+ "Please ensure the video ID is correct and the video has captions enabled."
48
+ )
49
+ return error_message
50
+
51
+ # Create the Gradio interface
52
+ iface = gr.Interface(
53
+ fn=get_youtube_transcript,
54
+ inputs=gr.Textbox(
55
+ label="YouTube Video ID",
56
+ placeholder="e.g., dQw4w9WgXcQ (from youtube.com/watch?v=dQw4w9WgXcQ)"
57
+ ),
58
+ outputs=gr.Textbox(
59
+ label="Transcript Output",
60
+ lines=20, # Adjust number of lines for better display of long transcripts
61
+ interactive=False # Make the output box read-only
62
+ ),
63
+ title="YouTube Transcript Fetcher",
64
+ description="Enter a YouTube video ID to get its full transcript and detailed segments."
65
+ )
66
+
67
+ # Launch the Gradio app
68
+ if __name__ == "__main__":
69
+ iface.launch()