Spaces:
Running
Running
import gradio as gr | |
import whisper | |
import os | |
from pydub import AudioSegment | |
def convert_video_to_audio(video_file): | |
audio_file = video_file.rsplit('.', 1)[0] + ".wav" | |
audio = AudioSegment.from_file(video_file) | |
audio.export(audio_file, format="wav") | |
return audio_file | |
def chunk_audio(audio_file, chunk_size_mb=25, max_duration_seconds=900): | |
audio = AudioSegment.from_file(audio_file) | |
chunk_size_bytes = chunk_size_mb * 1024 * 1024 | |
chunk_duration_ms = min(len(audio), max_duration_seconds * 1000) | |
if len(audio) > chunk_duration_ms or audio.frame_count() * audio.frame_width > chunk_size_bytes: | |
return [audio[:chunk_duration_ms]] | |
else: | |
return [audio] | |
model = whisper.load_model("base") | |
def transcribe_audio(file): | |
if file is None: | |
return "Error: No file uploaded.", None | |
file_extension = os.path.splitext(file.name)[1].lower() | |
if file_extension in ['.mp4', '.avi', '.mov']: | |
audio_file = convert_video_to_audio(file.name) | |
else: | |
audio_file = file.name | |
chunks = chunk_audio(audio_file) | |
transcriptions = [] | |
for i, chunk in enumerate(chunks): | |
chunk_file = f"chunk_{i}.wav" | |
chunk.export(chunk_file, format="wav") | |
result = model.transcribe(chunk_file) | |
transcriptions.append(result["text"]) | |
os.remove(chunk_file) | |
if file_extension in ['.mp4', '.avi', '.mov']: | |
os.remove(audio_file) | |
full_transcription = " ".join(transcriptions) | |
output_filename = os.path.splitext(os.path.basename(file.name))[0] + ".txt" | |
with open(output_filename, "w") as text_file: | |
text_file.write(full_transcription) | |
return full_transcription, output_filename | |
# Test function | |
def test_transcription(file_path): | |
class MockFile: | |
def __init__(self, path): | |
self.name = path | |
mock_file = MockFile(file_path) | |
transcription, output_file = transcribe_audio(mock_file) | |
print("Transcription:") | |
print(transcription) | |
print("\nOutput file:", output_file) | |
# Example usage (uncomment and replace with your file path) | |
# test_transcription("/path/to/your/audio_or_video_file.mp3") | |
# Gradio interface | |
iface = gr.Interface( | |
fn=transcribe_audio, | |
inputs=gr.File(label="Upload Audio/Video File"), | |
outputs=[ | |
gr.Textbox(label="Transcription"), | |
gr.File(label="Download Transcript") | |
], | |
title="QuickTranscribe AI", | |
description="Upload an audio or video file to get its transcription. The transcript will be displayed and available for download. Files will be processed in chunks of 25MB or 15 minutes maximum. Please use responsibly." | |
) | |
iface.launch(share=True) |