File size: 3,150 Bytes
a2632d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import os

import uuid
import yt_dlp as youtube_dl
from typing import Generator
from faster_whisper import WhisperModel
import pandas as pd
from typing import Generator
from faster_whisper import WhisperModel
import pandas as pd
import gradio as gr

class YouTubeTranscriber:
    def __init__(self, model_path: str):
        self.model = WhisperModel(model_path)
        
    def download_audio(self, url: str, preferred_quality: str = "192") -> str:
        file_name = f"{uuid.uuid4()}.mp3"
        output_path = os.path.join("/tmp", file_name)  # Use /tmp directory for temporary storage

        ydl_opts = {
            'format': 'bestaudio/best',
            'postprocessors': [{
                'key': 'FFmpegExtractAudio',
                'preferredcodec': 'mp3',
                'preferredquality': preferred_quality,
            }],
            'outtmpl': output_path,  # Specify the output path and file name
        }

        try:
            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                info_dict = ydl.extract_info(url, download=False)
                video_title = info_dict.get('title', 'Unknown title')
                print(f"Downloading audio for: {video_title}")

                ydl.download([url])
                print(f"Audio file saved as: {output_path}")

            return output_path

        except youtube_dl.utils.DownloadError as e:
            print(f"Error downloading audio: {e}")
            return None

    def transcribe_audio(self, path: str) -> Generator:
        print(f"Reading {path}")
        segments, _ = self.model.transcribe(path)
        return segments

    def process_segments(self, segments: Generator) -> pd.DataFrame:
        result = []
        for i, segment in enumerate(segments):
            result.append({
                'chunk_id': f"chunk_{i}",
                'chunk_length': segment.end - segment.start,
                'text': segment.text,
                'start_time': segment.start,
                'end_time': segment.end
            })

        df = pd.DataFrame(result)
        return df

# Function to be called by the Gradio interface
def transcribe_youtube_video(url: str, model_path: str = "distil-large-v2") -> str:
    yt_transcriber = YouTubeTranscriber(model_path)
    audio_path = yt_transcriber.download_audio(url)
    
    if audio_path:
        segments = yt_transcriber.transcribe_audio(audio_path)
        df = yt_transcriber.process_segments(segments)
        output_csv = os.path.join("/tmp", f"{uuid.uuid4()}.csv")
        df.to_csv(output_csv, index=False)
        return output_csv
    else:
        return "Failed to download audio."


import gradio as gr

interface = gr.Interface(
    fn=transcribe_youtube_video,
    inputs=[
        gr.Textbox(lines=1, placeholder="Enter YouTube URL here...", label="YouTube URL"),
        gr.Textbox(lines=1, label="Whisper Model Path")
    ],
    outputs=gr.File(label="Transcribed Segments CSV"),  # Use gr.File directly
    title="YouTube Audio Transcriber",
    description="Enter a YouTube URL to download the audio and transcribe it using Whisper."
)

# Launch the interface
interface.launch()