|
from transformers import pipeline |
|
from fpdf import FPDF |
|
import librosa |
|
import gradio as gr |
|
|
|
def transcribe_and_generate_pdf(audio_file, chunk_duration=30): |
|
try: |
|
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-small") |
|
|
|
audio, sr = librosa.load(audio_file, sr=None) |
|
|
|
num_chunks = int(len(audio) / (sr * chunk_duration)) + 1 |
|
|
|
transcription = "" |
|
for i in range(num_chunks): |
|
start = i * sr * chunk_duration |
|
end = min((i + 1) * sr * chunk_duration, len(audio)) |
|
chunk = audio[start:end] |
|
|
|
chunk_transcription = transcriber(chunk, return_timestamps=True)["text"] |
|
transcription += chunk_transcription + " " |
|
|
|
output_pdf = "transcription.pdf" |
|
pdf = FPDF() |
|
pdf.add_page() |
|
pdf.set_font("Arial", size=12) |
|
pdf.multi_cell(0, 10, transcription) |
|
pdf.output(output_pdf) |
|
|
|
return transcription, output_pdf |
|
|
|
except Exception as e: |
|
return f"An error occurred: {e}", None |
|
|
|
interface = gr.Interface( |
|
fn=transcribe_and_generate_pdf, |
|
inputs=gr.Audio(type="filepath"), |
|
outputs=[ |
|
gr.Textbox(label="Transcription"), |
|
gr.File(label="Download PDF") |
|
], |
|
title="Audio-to-Text and PDF Generator", |
|
description="Upload an audio file to get its transcription and download the PDF." |
|
) |
|
|
|
if __name__ == "__main__": |
|
interface.launch() |