CXDJY commited on
Commit
8d49ccc
·
1 Parent(s): a709495

Added prediction logic

Browse files
Files changed (2) hide show
  1. app.py +40 -2
  2. requirements.txt +5 -1
app.py CHANGED
@@ -1,15 +1,53 @@
1
  import gradio as gr
2
  import librosa
 
 
3
 
4
  def load_audio_to_tensor(filename):
5
  audio, sampling_rate = librosa.load(filename, sr=None, mono=True) # load audio and convert to mono
6
  wave = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000) # resample to 16KHz
7
  return wave
8
 
 
 
 
 
 
 
 
 
 
 
9
  def greet(name):
10
  wave = load_audio_to_tensor(name)
11
- # return "Hello " + str(name[1]) + "!!"
12
- return wave
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  iface = gr.Interface(fn=greet, inputs="file", outputs="text")
15
  # iface = gr.Interface(fn=greet, inputs="audio", outputs="text")
 
1
  import gradio as gr
2
  import librosa
3
+ import tensorflow as tf
4
+ from huggingface_hub import from_pretrained_keras
5
 
6
  def load_audio_to_tensor(filename):
7
  audio, sampling_rate = librosa.load(filename, sr=None, mono=True) # load audio and convert to mono
8
  wave = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000) # resample to 16KHz
9
  return wave
10
 
11
+ def preprocess_mp3(sample, index):
12
+ sample = sample[0]
13
+ sample = tf.cast(sample, tf.float32)
14
+ zero_padding = tf.zeros([16000] - tf.shape(sample), dtype=tf.float32)
15
+ wave = tf.concat([zero_padding, sample], 0)
16
+ spectrogram = tf.signal.stft(wave, frame_length=320, frame_step=32)
17
+ spectrogram = tf.abs(spectrogram)
18
+ spectrogram = tf.expand_dims(spectrogram, axis=2)
19
+ return spectrogram
20
+
21
  def greet(name):
22
  wave = load_audio_to_tensor(name)
23
+ power = sum(wave * 2) / len(wave) # audio signal power
24
+ SNR = 3.5 # signal-to-noise ratio
25
+ SNR_linear = 10 ** (SNR / 10) # convert SNR to linear scale
26
+ noise_power = power / SNR_linear # noise power
27
+
28
+ # add noise to audio to simulate environment
29
+ noise = np.random.normal(0, noise_power ** 0.5, wave.shape) # generate noise
30
+ wave = (wave + noise) * 32768.0 # add noise to the audio signal
31
+ tensor_wave = tf.convert_to_tensor(wave, dtype=tf.float32) # convert to tensor
32
+ min_wave = min(wave)
33
+ if len(wave) > 16000:
34
+ sequence_stride = 16000
35
+ else:
36
+ sequence_stride = 16000-1
37
+
38
+ # create audio slices
39
+ audio_slices = tf.keras.utils.timeseries_dataset_from_array(wave, wave, sequence_length=16000, sequence_stride=sequence_stride, batch_size=1)
40
+ samples, index = audio_slices.as_numpy_iterator().next()
41
+
42
+ audio_slices = audio_slices.map(preprocess_mp3)
43
+ audio_slices = audio_slices.batch(64)
44
+
45
+ model = from_pretrained_keras("CXDJY/snore_ai")
46
+
47
+ yhat = model.predict(audio_slices)
48
+ yhat = [1 if prediction > 0.99 else 0 for prediction in yhat]
49
+ yhat1 = [key for key, group in groupby(yhat)]
50
+ return yhat1
51
 
52
  iface = gr.Interface(fn=greet, inputs="file", outputs="text")
53
  # iface = gr.Interface(fn=greet, inputs="audio", outputs="text")
requirements.txt CHANGED
@@ -1 +1,5 @@
1
- librosa==0.10.1
 
 
 
 
 
1
+ librosa==0.10.1
2
+ huggingface_hub==0.20.1
3
+ numpy==1.26.4
4
+ tensorflow==2.15.0
5
+ tensorflow_intel==2.15.0