Comparative-Analysis-of-Speech-Synthesis-Models
/
TensorFlowTTS
/examples
/cpptflite
/src
/AudioFile.h
//======================================================================= | |
/** @file AudioFile.h | |
* @author Adam Stark | |
* @copyright Copyright (C) 2017 Adam Stark | |
* | |
* This file is part of the 'AudioFile' library | |
* | |
* This program is free software: you can redistribute it and/or modify | |
* it under the terms of the GNU General Public License as published by | |
* the Free Software Foundation, either version 3 of the License, or | |
* (at your option) any later version. | |
* | |
* This program is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
* GNU General Public License for more details. | |
* | |
* You should have received a copy of the GNU General Public License | |
* along with this program. If not, see <http://www.gnu.org/licenses/>. | |
*/ | |
//======================================================================= | |
// disable some warnings on Windows | |
__pragma(warning (push)) | |
__pragma(warning (disable : 4244)) | |
__pragma(warning (disable : 4457)) | |
__pragma(warning (disable : 4458)) | |
__pragma(warning (disable : 4389)) | |
__pragma(warning (disable : 4996)) | |
_Pragma("GCC diagnostic push") | |
_Pragma("GCC diagnostic ignored \"-Wconversion\"") | |
_Pragma("GCC diagnostic ignored \"-Wsign-compare\"") | |
_Pragma("GCC diagnostic ignored \"-Wshadow\"") | |
//============================================================= | |
/** The different types of audio file, plus some other types to | |
* indicate a failure to load a file, or that one hasn't been | |
* loaded yet | |
*/ | |
enum class AudioFileFormat | |
{ | |
Error, | |
NotLoaded, | |
Wave, | |
Aiff | |
}; | |
//============================================================= | |
template <class T> | |
class AudioFile | |
{ | |
public: | |
//============================================================= | |
typedef std::vector<std::vector<T> > AudioBuffer; | |
//============================================================= | |
/** Constructor */ | |
AudioFile(); | |
//============================================================= | |
/** Loads an audio file from a given file path. | |
* @Returns true if the file was successfully loaded | |
*/ | |
bool load (std::string filePath); | |
/** Saves an audio file to a given file path. | |
* @Returns true if the file was successfully saved | |
*/ | |
bool save (std::string filePath, AudioFileFormat format = AudioFileFormat::Wave); | |
//============================================================= | |
/** @Returns the sample rate */ | |
uint32_t getSampleRate() const; | |
/** @Returns the number of audio channels in the buffer */ | |
int getNumChannels() const; | |
/** @Returns true if the audio file is mono */ | |
bool isMono() const; | |
/** @Returns true if the audio file is stereo */ | |
bool isStereo() const; | |
/** @Returns the bit depth of each sample */ | |
int getBitDepth() const; | |
/** @Returns the number of samples per channel */ | |
int getNumSamplesPerChannel() const; | |
/** @Returns the length in seconds of the audio file based on the number of samples and sample rate */ | |
double getLengthInSeconds() const; | |
/** Prints a summary of the audio file to the console */ | |
void printSummary() const; | |
//============================================================= | |
/** Set the audio buffer for this AudioFile by copying samples from another buffer. | |
* @Returns true if the buffer was copied successfully. | |
*/ | |
bool setAudioBuffer (AudioBuffer& newBuffer); | |
/** Sets the audio buffer to a given number of channels and number of samples per channel. This will try to preserve | |
* the existing audio, adding zeros to any new channels or new samples in a given channel. | |
*/ | |
void setAudioBufferSize (int numChannels, int numSamples); | |
/** Sets the number of samples per channel in the audio buffer. This will try to preserve | |
* the existing audio, adding zeros to new samples in a given channel if the number of samples is increased. | |
*/ | |
void setNumSamplesPerChannel (int numSamples); | |
/** Sets the number of channels. New channels will have the correct number of samples and be initialised to zero */ | |
void setNumChannels (int numChannels); | |
/** Sets the bit depth for the audio file. If you use the save() function, this bit depth rate will be used */ | |
void setBitDepth (int numBitsPerSample); | |
/** Sets the sample rate for the audio file. If you use the save() function, this sample rate will be used */ | |
void setSampleRate (uint32_t newSampleRate); | |
//============================================================= | |
/** Sets whether the library should log error messages to the console. By default this is true */ | |
void shouldLogErrorsToConsole (bool logErrors); | |
//============================================================= | |
/** A vector of vectors holding the audio samples for the AudioFile. You can | |
* access the samples by channel and then by sample index, i.e: | |
* | |
* samples[channel][sampleIndex] | |
*/ | |
AudioBuffer samples; | |
//============================================================= | |
/** An optional iXML chunk that can be added to the AudioFile. | |
*/ | |
std::string iXMLChunk; | |
private: | |
//============================================================= | |
enum class Endianness | |
{ | |
LittleEndian, | |
BigEndian | |
}; | |
//============================================================= | |
AudioFileFormat determineAudioFileFormat (std::vector<uint8_t>& fileData); | |
bool decodeWaveFile (std::vector<uint8_t>& fileData); | |
bool decodeAiffFile (std::vector<uint8_t>& fileData); | |
//============================================================= | |
bool saveToWaveFile (std::string filePath); | |
bool saveToAiffFile (std::string filePath); | |
//============================================================= | |
void clearAudioBuffer(); | |
//============================================================= | |
int32_t fourBytesToInt (std::vector<uint8_t>& source, int startIndex, Endianness endianness = Endianness::LittleEndian); | |
int16_t twoBytesToInt (std::vector<uint8_t>& source, int startIndex, Endianness endianness = Endianness::LittleEndian); | |
int getIndexOfString (std::vector<uint8_t>& source, std::string s); | |
int getIndexOfChunk (std::vector<uint8_t>& source, const std::string& chunkHeaderID, int startIndex, Endianness endianness = Endianness::LittleEndian); | |
//============================================================= | |
T sixteenBitIntToSample (int16_t sample); | |
int16_t sampleToSixteenBitInt (T sample); | |
//============================================================= | |
uint8_t sampleToSingleByte (T sample); | |
T singleByteToSample (uint8_t sample); | |
uint32_t getAiffSampleRate (std::vector<uint8_t>& fileData, int sampleRateStartIndex); | |
bool tenByteMatch (std::vector<uint8_t>& v1, int startIndex1, std::vector<uint8_t>& v2, int startIndex2); | |
void addSampleRateToAiffData (std::vector<uint8_t>& fileData, uint32_t sampleRate); | |
T clamp (T v1, T minValue, T maxValue); | |
//============================================================= | |
void addStringToFileData (std::vector<uint8_t>& fileData, std::string s); | |
void addInt32ToFileData (std::vector<uint8_t>& fileData, int32_t i, Endianness endianness = Endianness::LittleEndian); | |
void addInt16ToFileData (std::vector<uint8_t>& fileData, int16_t i, Endianness endianness = Endianness::LittleEndian); | |
//============================================================= | |
bool writeDataToFile (std::vector<uint8_t>& fileData, std::string filePath); | |
//============================================================= | |
void reportError (std::string errorMessage); | |
//============================================================= | |
AudioFileFormat audioFileFormat; | |
uint32_t sampleRate; | |
int bitDepth; | |
bool logErrorsToConsole {true}; | |
}; | |
//============================================================= | |
// Pre-defined 10-byte representations of common sample rates | |
static std::unordered_map <uint32_t, std::vector<uint8_t>> aiffSampleRateTable = { | |
{8000, {64, 11, 250, 0, 0, 0, 0, 0, 0, 0}}, | |
{11025, {64, 12, 172, 68, 0, 0, 0, 0, 0, 0}}, | |
{16000, {64, 12, 250, 0, 0, 0, 0, 0, 0, 0}}, | |
{22050, {64, 13, 172, 68, 0, 0, 0, 0, 0, 0}}, | |
{32000, {64, 13, 250, 0, 0, 0, 0, 0, 0, 0}}, | |
{37800, {64, 14, 147, 168, 0, 0, 0, 0, 0, 0}}, | |
{44056, {64, 14, 172, 24, 0, 0, 0, 0, 0, 0}}, | |
{44100, {64, 14, 172, 68, 0, 0, 0, 0, 0, 0}}, | |
{47250, {64, 14, 184, 146, 0, 0, 0, 0, 0, 0}}, | |
{48000, {64, 14, 187, 128, 0, 0, 0, 0, 0, 0}}, | |
{50000, {64, 14, 195, 80, 0, 0, 0, 0, 0, 0}}, | |
{50400, {64, 14, 196, 224, 0, 0, 0, 0, 0, 0}}, | |
{88200, {64, 15, 172, 68, 0, 0, 0, 0, 0, 0}}, | |
{96000, {64, 15, 187, 128, 0, 0, 0, 0, 0, 0}}, | |
{176400, {64, 16, 172, 68, 0, 0, 0, 0, 0, 0}}, | |
{192000, {64, 16, 187, 128, 0, 0, 0, 0, 0, 0}}, | |
{352800, {64, 17, 172, 68, 0, 0, 0, 0, 0, 0}}, | |
{2822400, {64, 20, 172, 68, 0, 0, 0, 0, 0, 0}}, | |
{5644800, {64, 21, 172, 68, 0, 0, 0, 0, 0, 0}} | |
}; | |
//============================================================= | |
enum WavAudioFormat | |
{ | |
PCM = 0x0001, | |
IEEEFloat = 0x0003, | |
ALaw = 0x0006, | |
MULaw = 0x0007, | |
Extensible = 0xFFFE | |
}; | |
//============================================================= | |
enum AIFFAudioFormat | |
{ | |
Uncompressed, | |
Compressed, | |
Error | |
}; | |
//============================================================= | |
/* IMPLEMENTATION */ | |
//============================================================= | |
//============================================================= | |
template <class T> | |
AudioFile<T>::AudioFile() | |
{ | |
static_assert(std::is_floating_point<T>::value, "ERROR: This version of AudioFile only supports floating point sample formats"); | |
bitDepth = 16; | |
sampleRate = 44100; | |
samples.resize (1); | |
samples[0].resize (0); | |
audioFileFormat = AudioFileFormat::NotLoaded; | |
} | |
//============================================================= | |
template <class T> | |
uint32_t AudioFile<T>::getSampleRate() const | |
{ | |
return sampleRate; | |
} | |
//============================================================= | |
template <class T> | |
int AudioFile<T>::getNumChannels() const | |
{ | |
return (int)samples.size(); | |
} | |
//============================================================= | |
template <class T> | |
bool AudioFile<T>::isMono() const | |
{ | |
return getNumChannels() == 1; | |
} | |
//============================================================= | |
template <class T> | |
bool AudioFile<T>::isStereo() const | |
{ | |
return getNumChannels() == 2; | |
} | |
//============================================================= | |
template <class T> | |
int AudioFile<T>::getBitDepth() const | |
{ | |
return bitDepth; | |
} | |
//============================================================= | |
template <class T> | |
int AudioFile<T>::getNumSamplesPerChannel() const | |
{ | |
if (samples.size() > 0) | |
return (int) samples[0].size(); | |
else | |
return 0; | |
} | |
//============================================================= | |
template <class T> | |
double AudioFile<T>::getLengthInSeconds() const | |
{ | |
return (double)getNumSamplesPerChannel() / (double)sampleRate; | |
} | |
//============================================================= | |
template <class T> | |
void AudioFile<T>::printSummary() const | |
{ | |
std::cout << "|======================================|" << std::endl; | |
std::cout << "Num Channels: " << getNumChannels() << std::endl; | |
std::cout << "Num Samples Per Channel: " << getNumSamplesPerChannel() << std::endl; | |
std::cout << "Sample Rate: " << sampleRate << std::endl; | |
std::cout << "Bit Depth: " << bitDepth << std::endl; | |
std::cout << "Length in Seconds: " << getLengthInSeconds() << std::endl; | |
std::cout << "|======================================|" << std::endl; | |
} | |
//============================================================= | |
template <class T> | |
bool AudioFile<T>::setAudioBuffer (AudioBuffer& newBuffer) | |
{ | |
int numChannels = (int)newBuffer.size(); | |
if (numChannels <= 0) | |
{ | |
assert (false && "The buffer your are trying to use has no channels"); | |
return false; | |
} | |
size_t numSamples = newBuffer[0].size(); | |
// set the number of channels | |
samples.resize (newBuffer.size()); | |
for (int k = 0; k < getNumChannels(); k++) | |
{ | |
assert (newBuffer[k].size() == numSamples); | |
samples[k].resize (numSamples); | |
for (size_t i = 0; i < numSamples; i++) | |
{ | |
samples[k][i] = newBuffer[k][i]; | |
} | |
} | |
return true; | |
} | |
//============================================================= | |
template <class T> | |
void AudioFile<T>::setAudioBufferSize (int numChannels, int numSamples) | |
{ | |
samples.resize (numChannels); | |
setNumSamplesPerChannel (numSamples); | |
} | |
//============================================================= | |
template <class T> | |
void AudioFile<T>::setNumSamplesPerChannel (int numSamples) | |
{ | |
int originalSize = getNumSamplesPerChannel(); | |
for (int i = 0; i < getNumChannels();i++) | |
{ | |
samples[i].resize (numSamples); | |
// set any new samples to zero | |
if (numSamples > originalSize) | |
std::fill (samples[i].begin() + originalSize, samples[i].end(), (T)0.); | |
} | |
} | |
//============================================================= | |
template <class T> | |
void AudioFile<T>::setNumChannels (int numChannels) | |
{ | |
int originalNumChannels = getNumChannels(); | |
int originalNumSamplesPerChannel = getNumSamplesPerChannel(); | |
samples.resize (numChannels); | |
// make sure any new channels are set to the right size | |
// and filled with zeros | |
if (numChannels > originalNumChannels) | |
{ | |
for (int i = originalNumChannels; i < numChannels; i++) | |
{ | |
samples[i].resize (originalNumSamplesPerChannel); | |
std::fill (samples[i].begin(), samples[i].end(), (T)0.); | |
} | |
} | |
} | |
//============================================================= | |
template <class T> | |
void AudioFile<T>::setBitDepth (int numBitsPerSample) | |
{ | |
bitDepth = numBitsPerSample; | |
} | |
//============================================================= | |
template <class T> | |
void AudioFile<T>::setSampleRate (uint32_t newSampleRate) | |
{ | |
sampleRate = newSampleRate; | |
} | |
//============================================================= | |
template <class T> | |
void AudioFile<T>::shouldLogErrorsToConsole (bool logErrors) | |
{ | |
logErrorsToConsole = logErrors; | |
} | |
//============================================================= | |
template <class T> | |
bool AudioFile<T>::load (std::string filePath) | |
{ | |
std::ifstream file (filePath, std::ios::binary); | |
// check the file exists | |
if (! file.good()) | |
{ | |
reportError ("ERROR: File doesn't exist or otherwise can't load file\n" + filePath); | |
return false; | |
} | |
file.unsetf (std::ios::skipws); | |
std::istream_iterator<uint8_t> begin (file), end; | |
std::vector<uint8_t> fileData (begin, end); | |
// get audio file format | |
audioFileFormat = determineAudioFileFormat (fileData); | |
if (audioFileFormat == AudioFileFormat::Wave) | |
{ | |
return decodeWaveFile (fileData); | |
} | |
else if (audioFileFormat == AudioFileFormat::Aiff) | |
{ | |
return decodeAiffFile (fileData); | |
} | |
else | |
{ | |
reportError ("Audio File Type: Error"); | |
return false; | |
} | |
} | |
//============================================================= | |
template <class T> | |
bool AudioFile<T>::decodeWaveFile (std::vector<uint8_t>& fileData) | |
{ | |
// ----------------------------------------------------------- | |
// HEADER CHUNK | |
std::string headerChunkID (fileData.begin(), fileData.begin() + 4); | |
//int32_t fileSizeInBytes = fourBytesToInt (fileData, 4) + 8; | |
std::string format (fileData.begin() + 8, fileData.begin() + 12); | |
// ----------------------------------------------------------- | |
// try and find the start points of key chunks | |
int indexOfDataChunk = getIndexOfChunk (fileData, "data", 12); | |
int indexOfFormatChunk = getIndexOfChunk (fileData, "fmt ", 12); | |
int indexOfXMLChunk = getIndexOfChunk (fileData, "iXML", 12); | |
// if we can't find the data or format chunks, or the IDs/formats don't seem to be as expected | |
// then it is unlikely we'll able to read this file, so abort | |
if (indexOfDataChunk == -1 || indexOfFormatChunk == -1 || headerChunkID != "RIFF" || format != "WAVE") | |
{ | |
reportError ("ERROR: this doesn't seem to be a valid .WAV file"); | |
return false; | |
} | |
// ----------------------------------------------------------- | |
// FORMAT CHUNK | |
int f = indexOfFormatChunk; | |
std::string formatChunkID (fileData.begin() + f, fileData.begin() + f + 4); | |
//int32_t formatChunkSize = fourBytesToInt (fileData, f + 4); | |
int16_t audioFormat = twoBytesToInt (fileData, f + 8); | |
int16_t numChannels = twoBytesToInt (fileData, f + 10); | |
sampleRate = (uint32_t) fourBytesToInt (fileData, f + 12); | |
int32_t numBytesPerSecond = fourBytesToInt (fileData, f + 16); | |
int16_t numBytesPerBlock = twoBytesToInt (fileData, f + 20); | |
bitDepth = (int) twoBytesToInt (fileData, f + 22); | |
int numBytesPerSample = bitDepth / 8; | |
// check that the audio format is PCM or Float | |
if (audioFormat != WavAudioFormat::PCM && audioFormat != WavAudioFormat::IEEEFloat) | |
{ | |
reportError ("ERROR: this .WAV file is encoded in a format that this library does not support at present"); | |
return false; | |
} | |
// check the number of channels is mono or stereo | |
if (numChannels < 1 || numChannels > 128) | |
{ | |
reportError ("ERROR: this WAV file seems to be an invalid number of channels (or corrupted?)"); | |
return false; | |
} | |
// check header data is consistent | |
if ((numBytesPerSecond != (numChannels * sampleRate * bitDepth) / 8) || (numBytesPerBlock != (numChannels * numBytesPerSample))) | |
{ | |
reportError ("ERROR: the header data in this WAV file seems to be inconsistent"); | |
return false; | |
} | |
// check bit depth is either 8, 16, 24 or 32 bit | |
if (bitDepth != 8 && bitDepth != 16 && bitDepth != 24 && bitDepth != 32) | |
{ | |
reportError ("ERROR: this file has a bit depth that is not 8, 16, 24 or 32 bits"); | |
return false; | |
} | |
// ----------------------------------------------------------- | |
// DATA CHUNK | |
int d = indexOfDataChunk; | |
std::string dataChunkID (fileData.begin() + d, fileData.begin() + d + 4); | |
int32_t dataChunkSize = fourBytesToInt (fileData, d + 4); | |
int numSamples = dataChunkSize / (numChannels * bitDepth / 8); | |
int samplesStartIndex = indexOfDataChunk + 8; | |
clearAudioBuffer(); | |
samples.resize (numChannels); | |
for (int i = 0; i < numSamples; i++) | |
{ | |
for (int channel = 0; channel < numChannels; channel++) | |
{ | |
int sampleIndex = samplesStartIndex + (numBytesPerBlock * i) + channel * numBytesPerSample; | |
if (bitDepth == 8) | |
{ | |
T sample = singleByteToSample (fileData[sampleIndex]); | |
samples[channel].push_back (sample); | |
} | |
else if (bitDepth == 16) | |
{ | |
int16_t sampleAsInt = twoBytesToInt (fileData, sampleIndex); | |
T sample = sixteenBitIntToSample (sampleAsInt); | |
samples[channel].push_back (sample); | |
} | |
else if (bitDepth == 24) | |
{ | |
int32_t sampleAsInt = 0; | |
sampleAsInt = (fileData[sampleIndex + 2] << 16) | (fileData[sampleIndex + 1] << 8) | fileData[sampleIndex]; | |
if (sampleAsInt & 0x800000) // if the 24th bit is set, this is a negative number in 24-bit world | |
sampleAsInt = sampleAsInt | ~0xFFFFFF; // so make sure sign is extended to the 32 bit float | |
T sample = (T)sampleAsInt / (T)8388608.; | |
samples[channel].push_back (sample); | |
} | |
else if (bitDepth == 32) | |
{ | |
int32_t sampleAsInt = fourBytesToInt (fileData, sampleIndex); | |
T sample; | |
if (audioFormat == WavAudioFormat::IEEEFloat) | |
sample = (T)reinterpret_cast<float&> (sampleAsInt); | |
else // assume PCM | |
sample = (T) sampleAsInt / static_cast<float> (std::numeric_limits<std::int32_t>::max()); | |
samples[channel].push_back (sample); | |
} | |
else | |
{ | |
assert (false); | |
} | |
} | |
} | |
// ----------------------------------------------------------- | |
// iXML CHUNK | |
if (indexOfXMLChunk != -1) | |
{ | |
int32_t chunkSize = fourBytesToInt (fileData, indexOfXMLChunk + 4); | |
iXMLChunk = std::string ((const char*) &fileData[indexOfXMLChunk + 8], chunkSize); | |
} | |
return true; | |
} | |
//============================================================= | |
template <class T> | |
bool AudioFile<T>::decodeAiffFile (std::vector<uint8_t>& fileData) | |
{ | |
// ----------------------------------------------------------- | |
// HEADER CHUNK | |
std::string headerChunkID (fileData.begin(), fileData.begin() + 4); | |
//int32_t fileSizeInBytes = fourBytesToInt (fileData, 4, Endianness::BigEndian) + 8; | |
std::string format (fileData.begin() + 8, fileData.begin() + 12); | |
int audioFormat = format == "AIFF" ? AIFFAudioFormat::Uncompressed : format == "AIFC" ? AIFFAudioFormat::Compressed : AIFFAudioFormat::Error; | |
// ----------------------------------------------------------- | |
// try and find the start points of key chunks | |
int indexOfCommChunk = getIndexOfChunk (fileData, "COMM", 12, Endianness::BigEndian); | |
int indexOfSoundDataChunk = getIndexOfChunk (fileData, "SSND", 12, Endianness::BigEndian); | |
int indexOfXMLChunk = getIndexOfChunk (fileData, "iXML", 12, Endianness::BigEndian); | |
// if we can't find the data or format chunks, or the IDs/formats don't seem to be as expected | |
// then it is unlikely we'll able to read this file, so abort | |
if (indexOfSoundDataChunk == -1 || indexOfCommChunk == -1 || headerChunkID != "FORM" || audioFormat == AIFFAudioFormat::Error) | |
{ | |
reportError ("ERROR: this doesn't seem to be a valid AIFF file"); | |
return false; | |
} | |
// ----------------------------------------------------------- | |
// COMM CHUNK | |
int p = indexOfCommChunk; | |
std::string commChunkID (fileData.begin() + p, fileData.begin() + p + 4); | |
//int32_t commChunkSize = fourBytesToInt (fileData, p + 4, Endianness::BigEndian); | |
int16_t numChannels = twoBytesToInt (fileData, p + 8, Endianness::BigEndian); | |
int32_t numSamplesPerChannel = fourBytesToInt (fileData, p + 10, Endianness::BigEndian); | |
bitDepth = (int) twoBytesToInt (fileData, p + 14, Endianness::BigEndian); | |
sampleRate = getAiffSampleRate (fileData, p + 16); | |
// check the sample rate was properly decoded | |
if (sampleRate == 0) | |
{ | |
reportError ("ERROR: this AIFF file has an unsupported sample rate"); | |
return false; | |
} | |
// check the number of channels is mono or stereo | |
if (numChannels < 1 ||numChannels > 2) | |
{ | |
reportError ("ERROR: this AIFF file seems to be neither mono nor stereo (perhaps multi-track, or corrupted?)"); | |
return false; | |
} | |
// check bit depth is either 8, 16, 24 or 32-bit | |
if (bitDepth != 8 && bitDepth != 16 && bitDepth != 24 && bitDepth != 32) | |
{ | |
reportError ("ERROR: this file has a bit depth that is not 8, 16, 24 or 32 bits"); | |
return false; | |
} | |
// ----------------------------------------------------------- | |
// SSND CHUNK | |
int s = indexOfSoundDataChunk; | |
std::string soundDataChunkID (fileData.begin() + s, fileData.begin() + s + 4); | |
int32_t soundDataChunkSize = fourBytesToInt (fileData, s + 4, Endianness::BigEndian); | |
int32_t offset = fourBytesToInt (fileData, s + 8, Endianness::BigEndian); | |
//int32_t blockSize = fourBytesToInt (fileData, s + 12, Endianness::BigEndian); | |
int numBytesPerSample = bitDepth / 8; | |
int numBytesPerFrame = numBytesPerSample * numChannels; | |
int totalNumAudioSampleBytes = numSamplesPerChannel * numBytesPerFrame; | |
int samplesStartIndex = s + 16 + (int)offset; | |
// sanity check the data | |
if ((soundDataChunkSize - 8) != totalNumAudioSampleBytes || totalNumAudioSampleBytes > static_cast<long>(fileData.size() - samplesStartIndex)) | |
{ | |
reportError ("ERROR: the metadatafor this file doesn't seem right"); | |
return false; | |
} | |
clearAudioBuffer(); | |
samples.resize (numChannels); | |
for (int i = 0; i < numSamplesPerChannel; i++) | |
{ | |
for (int channel = 0; channel < numChannels; channel++) | |
{ | |
int sampleIndex = samplesStartIndex + (numBytesPerFrame * i) + channel * numBytesPerSample; | |
if (bitDepth == 8) | |
{ | |
int8_t sampleAsSigned8Bit = (int8_t)fileData[sampleIndex]; | |
T sample = (T)sampleAsSigned8Bit / (T)128.; | |
samples[channel].push_back (sample); | |
} | |
else if (bitDepth == 16) | |
{ | |
int16_t sampleAsInt = twoBytesToInt (fileData, sampleIndex, Endianness::BigEndian); | |
T sample = sixteenBitIntToSample (sampleAsInt); | |
samples[channel].push_back (sample); | |
} | |
else if (bitDepth == 24) | |
{ | |
int32_t sampleAsInt = 0; | |
sampleAsInt = (fileData[sampleIndex] << 16) | (fileData[sampleIndex + 1] << 8) | fileData[sampleIndex + 2]; | |
if (sampleAsInt & 0x800000) // if the 24th bit is set, this is a negative number in 24-bit world | |
sampleAsInt = sampleAsInt | ~0xFFFFFF; // so make sure sign is extended to the 32 bit float | |
T sample = (T)sampleAsInt / (T)8388608.; | |
samples[channel].push_back (sample); | |
} | |
else if (bitDepth == 32) | |
{ | |
int32_t sampleAsInt = fourBytesToInt (fileData, sampleIndex, Endianness::BigEndian); | |
T sample; | |
if (audioFormat == AIFFAudioFormat::Compressed) | |
sample = (T)reinterpret_cast<float&> (sampleAsInt); | |
else // assume uncompressed | |
sample = (T) sampleAsInt / static_cast<float> (std::numeric_limits<std::int32_t>::max()); | |
samples[channel].push_back (sample); | |
} | |
else | |
{ | |
assert (false); | |
} | |
} | |
} | |
// ----------------------------------------------------------- | |
// iXML CHUNK | |
if (indexOfXMLChunk != -1) | |
{ | |
int32_t chunkSize = fourBytesToInt (fileData, indexOfXMLChunk + 4); | |
iXMLChunk = std::string ((const char*) &fileData[indexOfXMLChunk + 8], chunkSize); | |
} | |
return true; | |
} | |
//============================================================= | |
template <class T> | |
uint32_t AudioFile<T>::getAiffSampleRate (std::vector<uint8_t>& fileData, int sampleRateStartIndex) | |
{ | |
for (auto it : aiffSampleRateTable) | |
{ | |
if (tenByteMatch (fileData, sampleRateStartIndex, it.second, 0)) | |
return it.first; | |
} | |
return 0; | |
} | |
//============================================================= | |
template <class T> | |
bool AudioFile<T>::tenByteMatch (std::vector<uint8_t>& v1, int startIndex1, std::vector<uint8_t>& v2, int startIndex2) | |
{ | |
for (int i = 0; i < 10; i++) | |
{ | |
if (v1[startIndex1 + i] != v2[startIndex2 + i]) | |
return false; | |
} | |
return true; | |
} | |
//============================================================= | |
template <class T> | |
void AudioFile<T>::addSampleRateToAiffData (std::vector<uint8_t>& fileData, uint32_t sampleRate) | |
{ | |
if (aiffSampleRateTable.count (sampleRate) > 0) | |
{ | |
for (int i = 0; i < 10; i++) | |
fileData.push_back (aiffSampleRateTable[sampleRate][i]); | |
} | |
} | |
//============================================================= | |
template <class T> | |
bool AudioFile<T>::save (std::string filePath, AudioFileFormat format) | |
{ | |
if (format == AudioFileFormat::Wave) | |
{ | |
return saveToWaveFile (filePath); | |
} | |
else if (format == AudioFileFormat::Aiff) | |
{ | |
return saveToAiffFile (filePath); | |
} | |
return false; | |
} | |
//============================================================= | |
template <class T> | |
bool AudioFile<T>::saveToWaveFile (std::string filePath) | |
{ | |
std::vector<uint8_t> fileData; | |
int32_t dataChunkSize = getNumSamplesPerChannel() * (getNumChannels() * bitDepth / 8); | |
int16_t audioFormat = bitDepth == 32 ? WavAudioFormat::IEEEFloat : WavAudioFormat::PCM; | |
int32_t formatChunkSize = audioFormat == WavAudioFormat::PCM ? 16 : 18; | |
int32_t iXMLChunkSize = static_cast<int32_t> (iXMLChunk.size()); | |
// ----------------------------------------------------------- | |
// HEADER CHUNK | |
addStringToFileData (fileData, "RIFF"); | |
// The file size in bytes is the header chunk size (4, not counting RIFF and WAVE) + the format | |
// chunk size (24) + the metadata part of the data chunk plus the actual data chunk size | |
int32_t fileSizeInBytes = 4 + formatChunkSize + 8 + 8 + dataChunkSize; | |
if (iXMLChunkSize > 0) | |
{ | |
fileSizeInBytes += (8 + iXMLChunkSize); | |
} | |
addInt32ToFileData (fileData, fileSizeInBytes); | |
addStringToFileData (fileData, "WAVE"); | |
// ----------------------------------------------------------- | |
// FORMAT CHUNK | |
addStringToFileData (fileData, "fmt "); | |
addInt32ToFileData (fileData, formatChunkSize); // format chunk size (16 for PCM) | |
addInt16ToFileData (fileData, audioFormat); // audio format | |
addInt16ToFileData (fileData, (int16_t)getNumChannels()); // num channels | |
addInt32ToFileData (fileData, (int32_t)sampleRate); // sample rate | |
int32_t numBytesPerSecond = (int32_t) ((getNumChannels() * sampleRate * bitDepth) / 8); | |
addInt32ToFileData (fileData, numBytesPerSecond); | |
int16_t numBytesPerBlock = getNumChannels() * (bitDepth / 8); | |
addInt16ToFileData (fileData, numBytesPerBlock); | |
addInt16ToFileData (fileData, (int16_t)bitDepth); | |
if (audioFormat == WavAudioFormat::IEEEFloat) | |
addInt16ToFileData (fileData, 0); // extension size | |
// ----------------------------------------------------------- | |
// DATA CHUNK | |
addStringToFileData (fileData, "data"); | |
addInt32ToFileData (fileData, dataChunkSize); | |
for (int i = 0; i < getNumSamplesPerChannel(); i++) | |
{ | |
for (int channel = 0; channel < getNumChannels(); channel++) | |
{ | |
if (bitDepth == 8) | |
{ | |
uint8_t byte = sampleToSingleByte (samples[channel][i]); | |
fileData.push_back (byte); | |
} | |
else if (bitDepth == 16) | |
{ | |
int16_t sampleAsInt = sampleToSixteenBitInt (samples[channel][i]); | |
addInt16ToFileData (fileData, sampleAsInt); | |
} | |
else if (bitDepth == 24) | |
{ | |
int32_t sampleAsIntAgain = (int32_t) (samples[channel][i] * (T)8388608.); | |
uint8_t bytes[3]; | |
bytes[2] = (uint8_t) (sampleAsIntAgain >> 16) & 0xFF; | |
bytes[1] = (uint8_t) (sampleAsIntAgain >> 8) & 0xFF; | |
bytes[0] = (uint8_t) sampleAsIntAgain & 0xFF; | |
fileData.push_back (bytes[0]); | |
fileData.push_back (bytes[1]); | |
fileData.push_back (bytes[2]); | |
} | |
else if (bitDepth == 32) | |
{ | |
int32_t sampleAsInt; | |
if (audioFormat == WavAudioFormat::IEEEFloat) | |
sampleAsInt = (int32_t) reinterpret_cast<int32_t&> (samples[channel][i]); | |
else // assume PCM | |
sampleAsInt = (int32_t) (samples[channel][i] * std::numeric_limits<int32_t>::max()); | |
addInt32ToFileData (fileData, sampleAsInt, Endianness::LittleEndian); | |
} | |
else | |
{ | |
assert (false && "Trying to write a file with unsupported bit depth"); | |
return false; | |
} | |
} | |
} | |
// ----------------------------------------------------------- | |
// iXML CHUNK | |
if (iXMLChunkSize > 0) | |
{ | |
addStringToFileData (fileData, "iXML"); | |
addInt32ToFileData (fileData, iXMLChunkSize); | |
addStringToFileData (fileData, iXMLChunk); | |
} | |
// check that the various sizes we put in the metadata are correct | |
if (fileSizeInBytes != static_cast<int32_t> (fileData.size() - 8) || dataChunkSize != (getNumSamplesPerChannel() * getNumChannels() * (bitDepth / 8))) | |
{ | |
reportError ("ERROR: couldn't save file to " + filePath); | |
return false; | |
} | |
// try to write the file | |
return writeDataToFile (fileData, filePath); | |
} | |
//============================================================= | |
template <class T> | |
bool AudioFile<T>::saveToAiffFile (std::string filePath) | |
{ | |
std::vector<uint8_t> fileData; | |
int32_t numBytesPerSample = bitDepth / 8; | |
int32_t numBytesPerFrame = numBytesPerSample * getNumChannels(); | |
int32_t totalNumAudioSampleBytes = getNumSamplesPerChannel() * numBytesPerFrame; | |
int32_t soundDataChunkSize = totalNumAudioSampleBytes + 8; | |
int32_t iXMLChunkSize = static_cast<int32_t> (iXMLChunk.size()); | |
// ----------------------------------------------------------- | |
// HEADER CHUNK | |
addStringToFileData (fileData, "FORM"); | |
// The file size in bytes is the header chunk size (4, not counting FORM and AIFF) + the COMM | |
// chunk size (26) + the metadata part of the SSND chunk plus the actual data chunk size | |
int32_t fileSizeInBytes = 4 + 26 + 16 + totalNumAudioSampleBytes; | |
if (iXMLChunkSize > 0) | |
{ | |
fileSizeInBytes += (8 + iXMLChunkSize); | |
} | |
addInt32ToFileData (fileData, fileSizeInBytes, Endianness::BigEndian); | |
addStringToFileData (fileData, "AIFF"); | |
// ----------------------------------------------------------- | |
// COMM CHUNK | |
addStringToFileData (fileData, "COMM"); | |
addInt32ToFileData (fileData, 18, Endianness::BigEndian); // commChunkSize | |
addInt16ToFileData (fileData, getNumChannels(), Endianness::BigEndian); // num channels | |
addInt32ToFileData (fileData, getNumSamplesPerChannel(), Endianness::BigEndian); // num samples per channel | |
addInt16ToFileData (fileData, bitDepth, Endianness::BigEndian); // bit depth | |
addSampleRateToAiffData (fileData, sampleRate); | |
// ----------------------------------------------------------- | |
// SSND CHUNK | |
addStringToFileData (fileData, "SSND"); | |
addInt32ToFileData (fileData, soundDataChunkSize, Endianness::BigEndian); | |
addInt32ToFileData (fileData, 0, Endianness::BigEndian); // offset | |
addInt32ToFileData (fileData, 0, Endianness::BigEndian); // block size | |
for (int i = 0; i < getNumSamplesPerChannel(); i++) | |
{ | |
for (int channel = 0; channel < getNumChannels(); channel++) | |
{ | |
if (bitDepth == 8) | |
{ | |
uint8_t byte = sampleToSingleByte (samples[channel][i]); | |
fileData.push_back (byte); | |
} | |
else if (bitDepth == 16) | |
{ | |
int16_t sampleAsInt = sampleToSixteenBitInt (samples[channel][i]); | |
addInt16ToFileData (fileData, sampleAsInt, Endianness::BigEndian); | |
} | |
else if (bitDepth == 24) | |
{ | |
int32_t sampleAsIntAgain = (int32_t) (samples[channel][i] * (T)8388608.); | |
uint8_t bytes[3]; | |
bytes[0] = (uint8_t) (sampleAsIntAgain >> 16) & 0xFF; | |
bytes[1] = (uint8_t) (sampleAsIntAgain >> 8) & 0xFF; | |
bytes[2] = (uint8_t) sampleAsIntAgain & 0xFF; | |
fileData.push_back (bytes[0]); | |
fileData.push_back (bytes[1]); | |
fileData.push_back (bytes[2]); | |
} | |
else if (bitDepth == 32) | |
{ | |
// write samples as signed integers (no implementation yet for floating point, but looking at WAV implementation should help) | |
int32_t sampleAsInt = (int32_t) (samples[channel][i] * std::numeric_limits<int32_t>::max()); | |
addInt32ToFileData (fileData, sampleAsInt, Endianness::BigEndian); | |
} | |
else | |
{ | |
assert (false && "Trying to write a file with unsupported bit depth"); | |
return false; | |
} | |
} | |
} | |
// ----------------------------------------------------------- | |
// iXML CHUNK | |
if (iXMLChunkSize > 0) | |
{ | |
addStringToFileData (fileData, "iXML"); | |
addInt32ToFileData (fileData, iXMLChunkSize); | |
addStringToFileData (fileData, iXMLChunk); | |
} | |
// check that the various sizes we put in the metadata are correct | |
if (fileSizeInBytes != static_cast<int32_t> (fileData.size() - 8) || soundDataChunkSize != getNumSamplesPerChannel() * numBytesPerFrame + 8) | |
{ | |
reportError ("ERROR: couldn't save file to " + filePath); | |
return false; | |
} | |
// try to write the file | |
return writeDataToFile (fileData, filePath); | |
} | |
//============================================================= | |
template <class T> | |
bool AudioFile<T>::writeDataToFile (std::vector<uint8_t>& fileData, std::string filePath) | |
{ | |
std::ofstream outputFile (filePath, std::ios::binary); | |
if (outputFile.is_open()) | |
{ | |
for (size_t i = 0; i < fileData.size(); i++) | |
{ | |
char value = (char) fileData[i]; | |
outputFile.write (&value, sizeof (char)); | |
} | |
outputFile.close(); | |
return true; | |
} | |
return false; | |
} | |
//============================================================= | |
template <class T> | |
void AudioFile<T>::addStringToFileData (std::vector<uint8_t>& fileData, std::string s) | |
{ | |
for (size_t i = 0; i < s.length();i++) | |
fileData.push_back ((uint8_t) s[i]); | |
} | |
//============================================================= | |
template <class T> | |
void AudioFile<T>::addInt32ToFileData (std::vector<uint8_t>& fileData, int32_t i, Endianness endianness) | |
{ | |
uint8_t bytes[4]; | |
if (endianness == Endianness::LittleEndian) | |
{ | |
bytes[3] = (i >> 24) & 0xFF; | |
bytes[2] = (i >> 16) & 0xFF; | |
bytes[1] = (i >> 8) & 0xFF; | |
bytes[0] = i & 0xFF; | |
} | |
else | |
{ | |
bytes[0] = (i >> 24) & 0xFF; | |
bytes[1] = (i >> 16) & 0xFF; | |
bytes[2] = (i >> 8) & 0xFF; | |
bytes[3] = i & 0xFF; | |
} | |
for (int i = 0; i < 4; i++) | |
fileData.push_back (bytes[i]); | |
} | |
//============================================================= | |
template <class T> | |
void AudioFile<T>::addInt16ToFileData (std::vector<uint8_t>& fileData, int16_t i, Endianness endianness) | |
{ | |
uint8_t bytes[2]; | |
if (endianness == Endianness::LittleEndian) | |
{ | |
bytes[1] = (i >> 8) & 0xFF; | |
bytes[0] = i & 0xFF; | |
} | |
else | |
{ | |
bytes[0] = (i >> 8) & 0xFF; | |
bytes[1] = i & 0xFF; | |
} | |
fileData.push_back (bytes[0]); | |
fileData.push_back (bytes[1]); | |
} | |
//============================================================= | |
template <class T> | |
void AudioFile<T>::clearAudioBuffer() | |
{ | |
for (size_t i = 0; i < samples.size();i++) | |
{ | |
samples[i].clear(); | |
} | |
samples.clear(); | |
} | |
//============================================================= | |
template <class T> | |
AudioFileFormat AudioFile<T>::determineAudioFileFormat (std::vector<uint8_t>& fileData) | |
{ | |
std::string header (fileData.begin(), fileData.begin() + 4); | |
if (header == "RIFF") | |
return AudioFileFormat::Wave; | |
else if (header == "FORM") | |
return AudioFileFormat::Aiff; | |
else | |
return AudioFileFormat::Error; | |
} | |
//============================================================= | |
template <class T> | |
int32_t AudioFile<T>::fourBytesToInt (std::vector<uint8_t>& source, int startIndex, Endianness endianness) | |
{ | |
int32_t result; | |
if (endianness == Endianness::LittleEndian) | |
result = (source[startIndex + 3] << 24) | (source[startIndex + 2] << 16) | (source[startIndex + 1] << 8) | source[startIndex]; | |
else | |
result = (source[startIndex] << 24) | (source[startIndex + 1] << 16) | (source[startIndex + 2] << 8) | source[startIndex + 3]; | |
return result; | |
} | |
//============================================================= | |
template <class T> | |
int16_t AudioFile<T>::twoBytesToInt (std::vector<uint8_t>& source, int startIndex, Endianness endianness) | |
{ | |
int16_t result; | |
if (endianness == Endianness::LittleEndian) | |
result = (source[startIndex + 1] << 8) | source[startIndex]; | |
else | |
result = (source[startIndex] << 8) | source[startIndex + 1]; | |
return result; | |
} | |
//============================================================= | |
template <class T> | |
int AudioFile<T>::getIndexOfString (std::vector<uint8_t>& source, std::string stringToSearchFor) | |
{ | |
int index = -1; | |
int stringLength = (int)stringToSearchFor.length(); | |
for (size_t i = 0; i < source.size() - stringLength;i++) | |
{ | |
std::string section (source.begin() + i, source.begin() + i + stringLength); | |
if (section == stringToSearchFor) | |
{ | |
index = static_cast<int> (i); | |
break; | |
} | |
} | |
return index; | |
} | |
//============================================================= | |
template <class T> | |
int AudioFile<T>::getIndexOfChunk (std::vector<uint8_t>& source, const std::string& chunkHeaderID, int startIndex, Endianness endianness) | |
{ | |
constexpr int dataLen = 4; | |
if (chunkHeaderID.size() != dataLen) | |
{ | |
assert (false && "Invalid chunk header ID string"); | |
return -1; | |
} | |
int i = startIndex; | |
while (i < source.size() - dataLen) | |
{ | |
if (memcmp (&source[i], chunkHeaderID.data(), dataLen) == 0) | |
{ | |
return i; | |
} | |
i += dataLen; | |
auto chunkSize = fourBytesToInt (source, i, endianness); | |
i += (dataLen + chunkSize); | |
} | |
return -1; | |
} | |
//============================================================= | |
template <class T> | |
T AudioFile<T>::sixteenBitIntToSample (int16_t sample) | |
{ | |
return static_cast<T> (sample) / static_cast<T> (32768.); | |
} | |
//============================================================= | |
template <class T> | |
int16_t AudioFile<T>::sampleToSixteenBitInt (T sample) | |
{ | |
sample = clamp (sample, -1., 1.); | |
return static_cast<int16_t> (sample * 32767.); | |
} | |
//============================================================= | |
template <class T> | |
uint8_t AudioFile<T>::sampleToSingleByte (T sample) | |
{ | |
sample = clamp (sample, -1., 1.); | |
sample = (sample + 1.) / 2.; | |
return static_cast<uint8_t> (sample * 255.); | |
} | |
//============================================================= | |
template <class T> | |
T AudioFile<T>::singleByteToSample (uint8_t sample) | |
{ | |
return static_cast<T> (sample - 128) / static_cast<T> (128.); | |
} | |
//============================================================= | |
template <class T> | |
T AudioFile<T>::clamp (T value, T minValue, T maxValue) | |
{ | |
value = std::min (value, maxValue); | |
value = std::max (value, minValue); | |
return value; | |
} | |
//============================================================= | |
template <class T> | |
void AudioFile<T>::reportError (std::string errorMessage) | |
{ | |
if (logErrorsToConsole) | |
std::cout << errorMessage << std::endl; | |
} | |
__pragma(warning (pop)) | |
_Pragma("GCC diagnostic pop") | |