File size: 2,684 Bytes
26089e3 ab8cb27 26089e3 8a22c32 26089e3 8a22c32 26089e3 8a22c32 26089e3 8a22c32 26089e3 255c20e 26089e3 abf2ad0 26089e3 54f8344 8a22c32 54f8344 26089e3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import whisper
import multiprocessing
import os
from pydub import AudioSegment
from typing import List
import gradio as gr
model = whisper.load_model("base")
def convert_to_text(audio_path: str) -> str:
# Load the model outside the function if possible, so it's only loaded once
model = whisper.load_model("base")
# Split the audio into segments/chunks
chunk_size = 30 # Length of each segment in seconds
audio_segments = split_audio(audio_path, chunk_size)
# Process segments in parallel using multiprocessing
pool = multiprocessing.Pool()
print("Starting the processes....")
results = pool.map(process_segment, audio_segments)
pool.close()
pool.join()
# Combine the results
text = ' '.join(results)
return text
import os
from pydub import AudioSegment
def split_audio(audio_path: str, chunk_size: int) -> List[str]:
# Create a directory to store the segmented audio files
output_dir = "segmented_audio"
os.makedirs(output_dir, exist_ok=True)
# Open the audio file using pydub
audio = AudioSegment.from_file(audio_path)
# Calculate the number of chunks
duration = len(audio) / 1000 # Convert to seconds
num_chunks = int(duration / chunk_size)
print(f"Chunk : Duration : {duration} : Number : {num_chunks}")
# Split the audio into chunks
audio_segments = []
for i in range(num_chunks):
start_time = i * chunk_size * 1000 # Convert to milliseconds
end_time = (i + 1) * chunk_size * 1000
# Extract the chunk from the audio file
chunk = audio[start_time:end_time]
# Create a temporary file to store the chunk
chunk_path = os.path.join(output_dir, f"chunk_{i}.wav")
chunk.export(chunk_path, format="wav")
print(f"Chunk number {i} path : {chunk_path}")
audio_segments.append(chunk_path)
print(f"Audio split into : {len(audio_segments)}")
return audio_segments
def process_segment(segment_path: str) -> str:
# Load the model for each process if necessary
print(f"Processing segment : {segment_path}")
# Process the segment and return the transcribed text
result = model.transcribe(segment_path)
print(result['text'])
return result["text"]
def get_results(path):
#path = '/The genius of Satya Nadella Sam Altman and Lex Fridman.mp3'
seg = convert_to_text(path)
q = multiprocessing.Queue()
p = multiprocessing.Process(target=process_segment, args=(seg,q))
p.start()
print(q.get())
p.join()
return "complete"
ad = gr.components.Audio(type='filepath')
iface = gr.Interface(fn=convert_to_text, inputs=ad, outputs="text")
iface.launch()
|