File size: 1,434 Bytes
7971622
 
 
66083b4
7971622
66083b4
5472a84
831670f
66083b4
 
 
 
 
 
 
 
 
 
 
ac8f0d7
66083b4
7971622
5472a84
 
 
 
 
 
 
 
 
 
 
7971622
 
 
66083b4
5472a84
 
 
 
7971622
5472a84
7971622
 
 
ac8f0d7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from transformers import pipeline
from fpdf import FPDF
import librosa
import gradio as gr

def transcribe_and_generate_pdf(audio_file, chunk_duration=30):
    try:
        transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-small")

        audio, sr = librosa.load(audio_file, sr=None)

        num_chunks = int(len(audio) / (sr * chunk_duration)) + 1

        transcription = ""
        for i in range(num_chunks):
            start = i * sr * chunk_duration
            end = min((i + 1) * sr * chunk_duration, len(audio))
            chunk = audio[start:end]

            chunk_transcription = transcriber(chunk, return_timestamps=True)["text"]
            transcription += chunk_transcription + " "

        output_pdf = "transcription.pdf"
        pdf = FPDF()
        pdf.add_page()
        pdf.set_font("Arial", size=12)
        pdf.multi_cell(0, 10, transcription)
        pdf.output(output_pdf)

        return transcription, output_pdf

    except Exception as e:
        return f"An error occurred: {e}", None

interface = gr.Interface(
    fn=transcribe_and_generate_pdf,
    inputs=gr.Audio(type="filepath"),
    outputs=[
        gr.Textbox(label="Transcription"),
        gr.File(label="Download PDF")
    ],
    title="Audio-to-Text and PDF Generator",
    description="Upload an audio file to get its transcription and download the PDF."
)

if __name__ == "__main__":
    interface.launch()