DebasishDhal99 commited on
Commit
887d074
·
1 Parent(s): d005ec9

add pydub library for audio manipulation

Browse files
Files changed (1) hide show
  1. backend/audio_to_tgt.py +12 -12
backend/audio_to_tgt.py CHANGED
@@ -1,17 +1,17 @@
1
- from openai import OpenAI
2
  import whisper
3
- import os
 
4
 
5
- # whisper_key = os.getenv("OPENAI_API_KEY")
6
- # client = OpenAI(api_key = whisper_key)
 
 
7
 
 
8
 
9
  def src_audio_to_eng_translator(audio_file_input):
10
- with open(audio_file_input, "rb") as audio_file:
11
- # transcription = client.audio.translations.create(
12
- # model="whisper-1",
13
- # file=audio_file
14
- # )
15
- model = whisper.load_model("turbo")
16
- result = model.transcribe(audio_file)
17
- return result.text
 
 
1
  import whisper
2
+ import numpy as np
3
+ from pydub import AudioSegment
4
 
5
+ def audio_to_numpy(audio_file_input):
6
+ audio = AudioSegment.from_file(audio_file_input)
7
+ audio = audio.set_channels(1).set_frame_rate(16000)
8
+ samples = np.array(audio.get_array_of_samples(), dtype=np.float32)
9
 
10
+ return samples / np.iinfo(audio.array_type).max
11
 
12
  def src_audio_to_eng_translator(audio_file_input):
13
+ audio_data = audio_to_numpy(audio_file_input)
14
+
15
+ model = whisper.load_model("turbo")
16
+ result = model.transcribe(audio_data)
17
+ return result['text']