File size: 1,131 Bytes
bbcc5b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import tensorflow as tf
import numpy as np
import librosa
import pickle
import io

# Load the YAMNet model from the SavedModel format
yamnet_model = tf.saved_model.load('yamnet_saved_model')

# Function to extract embeddings from audio file using YAMNet
def extract_audio_embeddings(audio_binary):
    # Load audio from binary data using librosa
    audio, sample_rate = librosa.load(io.BytesIO(audio_binary), sr=16000)  # YAMNet requires a sample rate of 16kHz
    # Convert audio to float32 tensor
    audio_tensor = tf.convert_to_tensor(audio, dtype=tf.float32)
    # Extract embeddings using YAMNet model
    scores, embeddings, spectrogram = yamnet_model(audio_tensor)
    embeddings_list = embeddings.numpy().tolist()  # Convert embeddings to a list of lists
    return embeddings_list

# Example usage
if __name__ == "__main__":
    image_audio_path = "pictures/users/1a.mp3"
    # Extract embeddings from image audio file
    image_audio_embeddings = extract_audio_embeddings(image_audio_path)
    print("Embeddings for", image_audio_path)
    print(image_audio_embeddings)
print("audio embedding model loaded succesfully")