Idx / app.py
gaur3009's picture
Update app.py
831670f verified
from transformers import pipeline
from fpdf import FPDF
import librosa
import gradio as gr
def transcribe_and_generate_pdf(audio_file, chunk_duration=30):
try:
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-small")
audio, sr = librosa.load(audio_file, sr=None)
num_chunks = int(len(audio) / (sr * chunk_duration)) + 1
transcription = ""
for i in range(num_chunks):
start = i * sr * chunk_duration
end = min((i + 1) * sr * chunk_duration, len(audio))
chunk = audio[start:end]
chunk_transcription = transcriber(chunk, return_timestamps=True)["text"]
transcription += chunk_transcription + " "
output_pdf = "transcription.pdf"
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=12)
pdf.multi_cell(0, 10, transcription)
pdf.output(output_pdf)
return transcription, output_pdf
except Exception as e:
return f"An error occurred: {e}", None
interface = gr.Interface(
fn=transcribe_and_generate_pdf,
inputs=gr.Audio(type="filepath"),
outputs=[
gr.Textbox(label="Transcription"),
gr.File(label="Download PDF")
],
title="Audio-to-Text and PDF Generator",
description="Upload an audio file to get its transcription and download the PDF."
)
if __name__ == "__main__":
interface.launch()