from transformers import pipeline from fpdf import FPDF import librosa import gradio as gr def transcribe_and_generate_pdf(audio_file, chunk_duration=30): try: transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-small") audio, sr = librosa.load(audio_file, sr=None) num_chunks = int(len(audio) / (sr * chunk_duration)) + 1 transcription = "" for i in range(num_chunks): start = i * sr * chunk_duration end = min((i + 1) * sr * chunk_duration, len(audio)) chunk = audio[start:end] chunk_transcription = transcriber(chunk, return_timestamps=True)["text"] transcription += chunk_transcription + " " output_pdf = "transcription.pdf" pdf = FPDF() pdf.add_page() pdf.set_font("Arial", size=12) pdf.multi_cell(0, 10, transcription) pdf.output(output_pdf) return transcription, output_pdf except Exception as e: return f"An error occurred: {e}", None interface = gr.Interface( fn=transcribe_and_generate_pdf, inputs=gr.Audio(type="filepath"), outputs=[ gr.Textbox(label="Transcription"), gr.File(label="Download PDF") ], title="Audio-to-Text and PDF Generator", description="Upload an audio file to get its transcription and download the PDF." ) if __name__ == "__main__": interface.launch()