Siddiqui Qamar commited on
Commit
fb62578
·
verified ·
1 Parent(s): bfc2061

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -0
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import whisper
3
+ import os
4
+ from moviepy.editor import VideoFileClip
5
+ from pydub import AudioSegment
6
+ import numpy as np
7
+
8
+ model = whisper.load_model("base")
9
+
10
+ def convert_video_to_audio(video_file):
11
+ video = VideoFileClip(video_file)
12
+ audio = video.audio
13
+ audio_file = video_file.rsplit('.', 1)[0] + ".wav"
14
+ audio.write_audiofile(audio_file)
15
+ return audio_file
16
+
17
+ def chunk_audio(audio_file, chunk_size_mb=25, max_duration_seconds=900):
18
+ audio = AudioSegment.from_file(audio_file)
19
+ chunk_size_bytes = chunk_size_mb * 1024 * 1024
20
+ chunk_duration_ms = min(len(audio), max_duration_seconds * 1000)
21
+
22
+ if len(audio) > chunk_duration_ms or audio.frame_count() * audio.frame_width > chunk_size_bytes:
23
+ return [audio[:chunk_duration_ms]]
24
+ else:
25
+ return [audio]
26
+
27
+ def transcribe_audio(file):
28
+ if file is None:
29
+ return "Error: No file uploaded.", None
30
+
31
+ file_extension = os.path.splitext(file.name)[1].lower()
32
+
33
+ if file_extension in ['.mp4', '.avi', '.mov']:
34
+ audio_file = convert_video_to_audio(file.name)
35
+ else:
36
+ audio_file = file.name
37
+
38
+ chunks = chunk_audio(audio_file)
39
+ transcriptions = []
40
+
41
+ for i, chunk in enumerate(chunks):
42
+ chunk_file = f"chunk_{i}.wav"
43
+ chunk.export(chunk_file, format="wav")
44
+ result = model.transcribe(chunk_file)
45
+ transcriptions.append(result["text"])
46
+ os.remove(chunk_file)
47
+
48
+ if file_extension in ['.mp4', '.avi', '.mov']:
49
+ os.remove(audio_file)
50
+
51
+ full_transcription = " ".join(transcriptions)
52
+
53
+ output_filename = os.path.splitext(os.path.basename(file.name))[0] + ".txt"
54
+ with open(output_filename, "w") as text_file:
55
+ text_file.write(full_transcription)
56
+
57
+ return full_transcription, output_filename
58
+
59
+ # Test function
60
+ def test_transcription(file_path):
61
+ class MockFile:
62
+ def __init__(self, path):
63
+ self.name = path
64
+
65
+ mock_file = MockFile(file_path)
66
+ transcription, output_file = transcribe_audio(mock_file)
67
+ print("Transcription:")
68
+ print(transcription)
69
+ print("\nOutput file:", output_file)
70
+
71
+ # Example usage (uncomment and replace with your file path)
72
+ # test_transcription("/path/to/your/audio_or_video_file.mp3")
73
+
74
+ # Gradio interface
75
+ iface = gr.Interface(
76
+ fn=transcribe_audio,
77
+ inputs=gr.File(label="Upload Audio/Video File"),
78
+ outputs=[
79
+ gr.Textbox(label="Transcription"),
80
+ gr.File(label="Download Transcript")
81
+ ],
82
+ title="QuickTranscribe AI",
83
+ description="Upload an audio or video file to get its transcription. The transcript will be displayed and available for download. Files will be processed in chunks of 25MB or 15 minutes maximum. Please use responsibly."
84
+ )
85
+
86
+ iface.launch(share=True)