Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import whisper
|
2 |
+
import multiprocessing
|
3 |
+
import os
|
4 |
+
from pydub import AudioSegment
|
5 |
+
from typing import List
|
6 |
+
|
7 |
+
model = whisper.load_model("base")
|
8 |
+
|
9 |
+
def convert_to_text(audio_path: str) -> str:
|
10 |
+
# Load the model outside the function if possible, so it's only loaded once
|
11 |
+
model = whisper.load_model("base")
|
12 |
+
|
13 |
+
# Split the audio into segments/chunks
|
14 |
+
chunk_size = 30 # Length of each segment in seconds
|
15 |
+
audio_segments = split_audio(audio_path, chunk_size)
|
16 |
+
|
17 |
+
|
18 |
+
# Process segments in parallel using multiprocessing
|
19 |
+
"""ctx = multiprocessing.get_context('spawn')
|
20 |
+
pool = ctx.Pool()
|
21 |
+
print("Starting the processes....")
|
22 |
+
results = pool.map(process_segment, audio_segments)
|
23 |
+
pool.close()
|
24 |
+
pool.join() """
|
25 |
+
|
26 |
+
return audio_segments[0]
|
27 |
+
|
28 |
+
# Combine the results
|
29 |
+
#text = ' '.join(results)
|
30 |
+
#return text
|
31 |
+
|
32 |
+
import os
|
33 |
+
from pydub import AudioSegment
|
34 |
+
|
35 |
+
def split_audio(audio_path: str, chunk_size: int) -> List[str]:
|
36 |
+
# Create a directory to store the segmented audio files
|
37 |
+
output_dir = "segmented_audio"
|
38 |
+
os.makedirs(output_dir, exist_ok=True)
|
39 |
+
|
40 |
+
# Open the audio file using pydub
|
41 |
+
audio = AudioSegment.from_file(audio_path)
|
42 |
+
|
43 |
+
# Calculate the number of chunks
|
44 |
+
duration = len(audio) / 1000 # Convert to seconds
|
45 |
+
num_chunks = int(duration / chunk_size)
|
46 |
+
|
47 |
+
print(f"Chunk : Duration : {duration} : Number : {num_chunks}")
|
48 |
+
|
49 |
+
# Split the audio into chunks
|
50 |
+
audio_segments = []
|
51 |
+
for i in range(num_chunks):
|
52 |
+
start_time = i * chunk_size * 1000 # Convert to milliseconds
|
53 |
+
end_time = (i + 1) * chunk_size * 1000
|
54 |
+
|
55 |
+
# Extract the chunk from the audio file
|
56 |
+
chunk = audio[start_time:end_time]
|
57 |
+
|
58 |
+
# Create a temporary file to store the chunk
|
59 |
+
chunk_path = os.path.join(output_dir, f"chunk_{i}.wav")
|
60 |
+
chunk.export(chunk_path, format="wav")
|
61 |
+
|
62 |
+
print(f"Chunk number {i} path : {chunk_path}")
|
63 |
+
audio_segments.append(chunk_path)
|
64 |
+
|
65 |
+
print(f"Audio split into : {len(audio_segments)}")
|
66 |
+
|
67 |
+
return audio_segments
|
68 |
+
|
69 |
+
|
70 |
+
def process_segment(segment_path: str, q) -> str:
|
71 |
+
# Load the model for each process if necessary
|
72 |
+
|
73 |
+
print(f"Processing segment : {segment_path}")
|
74 |
+
|
75 |
+
# Process the segment and return the transcribed text
|
76 |
+
result = model.transcribe(segment_path)
|
77 |
+
print(result['text'])
|
78 |
+
return result["text"]
|
79 |
+
|
80 |
+
if __name__ == '__main__':
|
81 |
+
|
82 |
+
path = '/content/The genius of Satya Nadella Sam Altman and Lex Fridman.mp3'
|
83 |
+
|
84 |
+
seg = convert_to_text(path)
|
85 |
+
q = multiprocessing.Queue()
|
86 |
+
p = multiprocessing.Process(target=process_segment, args=(seg,q))
|
87 |
+
p.start()
|
88 |
+
print(q.get())
|
89 |
+
p.join()
|
90 |
+
|