iamAI123 commited on
Commit
26089e3
·
1 Parent(s): 2856267

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -0
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ import multiprocessing
3
+ import os
4
+ from pydub import AudioSegment
5
+ from typing import List
6
+
7
+ model = whisper.load_model("base")
8
+
9
+ def convert_to_text(audio_path: str) -> str:
10
+ # Load the model outside the function if possible, so it's only loaded once
11
+ model = whisper.load_model("base")
12
+
13
+ # Split the audio into segments/chunks
14
+ chunk_size = 30 # Length of each segment in seconds
15
+ audio_segments = split_audio(audio_path, chunk_size)
16
+
17
+
18
+ # Process segments in parallel using multiprocessing
19
+ """ctx = multiprocessing.get_context('spawn')
20
+ pool = ctx.Pool()
21
+ print("Starting the processes....")
22
+ results = pool.map(process_segment, audio_segments)
23
+ pool.close()
24
+ pool.join() """
25
+
26
+ return audio_segments[0]
27
+
28
+ # Combine the results
29
+ #text = ' '.join(results)
30
+ #return text
31
+
32
+ import os
33
+ from pydub import AudioSegment
34
+
35
+ def split_audio(audio_path: str, chunk_size: int) -> List[str]:
36
+ # Create a directory to store the segmented audio files
37
+ output_dir = "segmented_audio"
38
+ os.makedirs(output_dir, exist_ok=True)
39
+
40
+ # Open the audio file using pydub
41
+ audio = AudioSegment.from_file(audio_path)
42
+
43
+ # Calculate the number of chunks
44
+ duration = len(audio) / 1000 # Convert to seconds
45
+ num_chunks = int(duration / chunk_size)
46
+
47
+ print(f"Chunk : Duration : {duration} : Number : {num_chunks}")
48
+
49
+ # Split the audio into chunks
50
+ audio_segments = []
51
+ for i in range(num_chunks):
52
+ start_time = i * chunk_size * 1000 # Convert to milliseconds
53
+ end_time = (i + 1) * chunk_size * 1000
54
+
55
+ # Extract the chunk from the audio file
56
+ chunk = audio[start_time:end_time]
57
+
58
+ # Create a temporary file to store the chunk
59
+ chunk_path = os.path.join(output_dir, f"chunk_{i}.wav")
60
+ chunk.export(chunk_path, format="wav")
61
+
62
+ print(f"Chunk number {i} path : {chunk_path}")
63
+ audio_segments.append(chunk_path)
64
+
65
+ print(f"Audio split into : {len(audio_segments)}")
66
+
67
+ return audio_segments
68
+
69
+
70
+ def process_segment(segment_path: str, q) -> str:
71
+ # Load the model for each process if necessary
72
+
73
+ print(f"Processing segment : {segment_path}")
74
+
75
+ # Process the segment and return the transcribed text
76
+ result = model.transcribe(segment_path)
77
+ print(result['text'])
78
+ return result["text"]
79
+
80
+ if __name__ == '__main__':
81
+
82
+ path = '/content/The genius of Satya Nadella Sam Altman and Lex Fridman.mp3'
83
+
84
+ seg = convert_to_text(path)
85
+ q = multiprocessing.Queue()
86
+ p = multiprocessing.Process(target=process_segment, args=(seg,q))
87
+ p.start()
88
+ print(q.get())
89
+ p.join()
90
+