From 25bb2ef4add2f5ecc84a9e4a0da435a2379ee2f3 Mon Sep 17 00:00:00 2001 From: pbethge Date: Tue, 7 Sep 2021 11:58:45 +0200 Subject: [PATCH 1/8] use the correct number of input channels --- src/ofApp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ofApp.cpp b/src/ofApp.cpp index b076cc4..8629f08 100644 --- a/src/ofApp.cpp +++ b/src/ofApp.cpp @@ -68,7 +68,7 @@ void ofApp::setup() { settings.setInListener(this); settings.sampleRate = sampleRate; settings.numOutputChannels = 0; - settings.numInputChannels = device.inputChannels; + settings.numInputChannels = inputChannel+1; settings.bufferSize = bufferSize; if(!soundStream.setup(settings)) { ofLogError(PACKAGE) << "audio input device " << inputDevice << " setup failed"; -- GitLab From 81b97b47b52c7676bc88e507e87c2a46937e0606 Mon Sep 17 00:00:00 2001 From: pbethge Date: Tue, 7 Sep 2021 11:59:55 +0200 Subject: [PATCH 2/8] fix: actually sample from the mono buffer --- src/ofApp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ofApp.cpp b/src/ofApp.cpp index 8629f08..ad4e6e3 100644 --- a/src/ofApp.cpp +++ b/src/ofApp.cpp @@ -294,7 +294,7 @@ void ofApp::audioIn(ofSoundBuffer & input) { // if recording: save the incoming buffer to the recording // then trigger the neural network if(recording) { - sampleBuffers.push(input.getBuffer()); + sampleBuffers.push(monoBuffer); recordingCounter++; if(recordingCounter >= numBuffers) { recording = false; -- GitLab From ec397ec2afda73e2fe7309e2a207909232dc2fa9 Mon Sep 17 00:00:00 2001 From: pbethge Date: Tue, 7 Sep 2021 12:01:06 +0200 Subject: [PATCH 3/8] some logging --- src/ofApp.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/ofApp.cpp b/src/ofApp.cpp index ad4e6e3..2406d2b 100644 --- a/src/ofApp.cpp +++ b/src/ofApp.cpp @@ -36,6 +36,9 @@ void ofApp::setup() { numBuffers = sampleRate * inputSeconds / bufferSize; previousBuffers.setMaxLen(numPreviousBuffers); sampleBuffers.setMaxLen(numBuffers); + ofLogVerbose(PACKAGE) << "Looking " << std::to_string(numPreviousBuffers) << " into the past" + << " and recording a total of " << std::to_string(numBuffers) << " buffers" + << " each with " << std::to_string(bufferSize) << " samples"; // apply settings to soundStream ofSoundStreamSettings settings; -- GitLab From 13e034bb1b230b1125048ca6a6696870593f274d Mon Sep 17 00:00:00 2001 From: pbethge Date: Tue, 7 Sep 2021 12:02:11 +0200 Subject: [PATCH 4/8] add some comments on audio handling --- src/ofApp.cpp | 4 ++-- src/ofApp.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ofApp.cpp b/src/ofApp.cpp index 2406d2b..00bba77 100644 --- a/src/ofApp.cpp +++ b/src/ofApp.cpp @@ -284,8 +284,8 @@ void ofApp::audioIn(ofSoundBuffer & input) { ofLogVerbose(PACKAGE) << "Start recording..."; // copy previous buffers to the recording sampleBuffers = previousBuffers; - sampleBuffers.setMaxLen(numBuffers); // just to make sure (not tested) - recordingCounter = sampleBuffers.size(); + sampleBuffers.setMaxLen(numBuffers); // hacky: last step overwrites maxLen + recordingCounter = sampleBuffers.size(); // we already have the previous buffer // trigger recording in the next function call recording = true; recordingStarted = true; diff --git a/src/ofApp.h b/src/ofApp.h index be94d63..943aebc 100644 --- a/src/ofApp.h +++ b/src/ofApp.h @@ -67,9 +67,9 @@ class ofApp : public ofBaseApp { // audio ofSoundStream soundStream; - int inputDevice = -1; - int inputChannel = 0; - std::vector monoBuffer; //< mono inputChannel stream buffer + int inputDevice = -1; // -1 means search for default device + int inputChannel = 0; // 0 means mono, 1 means stereo + std::vector monoBuffer; //< mono inputChannel stream buffer bool listening = true; // neural network input parameters -- GitLab From f414d8d3c96dce5286bbaff66989bb4b9964173f Mon Sep 17 00:00:00 2001 From: pbethge Date: Tue, 7 Sep 2021 12:35:29 +0200 Subject: [PATCH 5/8] convert muti to mono channel --- src/ofApp.cpp | 22 +++++++++++----------- src/ofApp.h | 7 ++++--- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/ofApp.cpp b/src/ofApp.cpp index 00bba77..5a06c65 100644 --- a/src/ofApp.cpp +++ b/src/ofApp.cpp @@ -64,15 +64,17 @@ void ofApp::setup() { ofLogWarning(PACKAGE) << "audio input device does not have enough input channels"; inputChannel = 0; } - ofLogNotice(PACKAGE) << "audio input channel: " << inputChannel+1; + numInputChannels = inputChannel + 1; + recordedSamplesPerBuffer = bufferSize * numInputChannels; + ofLogNotice(PACKAGE) << "audio input channel: " << numInputChannels; ofLogNotice(PACKAGE) << "audio input samplerate: " << sampleRate; - ofLogNotice(PACKAGE) << "audio input buffer size: " << bufferSize; + ofLogNotice(PACKAGE) << "audio input buffer size: " << recordedSamplesPerBuffer; settings.setInDevice(device); settings.setInListener(this); settings.sampleRate = sampleRate; settings.numOutputChannels = 0; - settings.numInputChannels = inputChannel+1; - settings.bufferSize = bufferSize; + settings.numInputChannels = numInputChannels; + settings.bufferSize = bufferSize*numInputChannels; if(!soundStream.setup(settings)) { ofLogError(PACKAGE) << "audio input device " << inputDevice << " setup failed"; ofLogError(PACKAGE) << "perhaps try a different device or samplerate?"; @@ -254,16 +256,14 @@ void ofApp::exit() { //-------------------------------------------------------------- void ofApp::audioIn(ofSoundBuffer & input) { - // this shouldn't happen... but we don't let it blow up - if(input.getNumFrames() != monoBuffer.size()) { - ofLogWarning(PACKAGE) << "resizing mono input buffer to " << input.getNumFrames(); - monoBuffer.resize(input.getNumFrames()); - } - // copy desired channel out of interleaved stream into mono buffer, // assume input stream has enough channels... for(std::size_t i = 0; i < input.getNumFrames(); i++) { - monoBuffer[i] = input[i + inputChannel]; + float sum = 0; + for(std::size_t j = 0; j < numInputChannels; j++) { + sum = input[i*numInputChannels + j]; + } + monoBuffer[i] = sum / numInputChannels; } // calculate the root mean square which is a rough way to calculate volume diff --git a/src/ofApp.h b/src/ofApp.h index 943aebc..9e7ddbd 100644 --- a/src/ofApp.h +++ b/src/ofApp.h @@ -69,7 +69,6 @@ class ofApp : public ofBaseApp { ofSoundStream soundStream; int inputDevice = -1; // -1 means search for default device int inputChannel = 0; // 0 means mono, 1 means stereo - std::vector monoBuffer; //< mono inputChannel stream buffer bool listening = true; // neural network input parameters @@ -80,8 +79,9 @@ class ofApp : public ofBaseApp { std::size_t bufferSize = 1024; std::size_t sampleRate = 48000; std::size_t downsamplingFactor = 3; + std::size_t recordedSamplesPerBuffer; + std::size_t numInputChannels; - static const std::size_t modelSampleRate; //< sample rate expected by model // since volume detection has some latency we keep a history of buffers AudioBufferFifo previousBuffers; @@ -89,6 +89,7 @@ class ofApp : public ofBaseApp { // sampleBuffers acts as a buffer for recording (could be fused) AudioBufferFifo sampleBuffers; std::size_t numBuffers; + SimpleAudioBuffer monoBuffer; //< mono inputChannel stream buffer // volume float curVol = 0.0; @@ -104,9 +105,9 @@ class ofApp : public ofBaseApp { AudioClassifier model; cppflow::tensor output; std::size_t inputSeconds = 5; - const std::size_t inputSamplingRate = 16000; // AI was trained on 16kHz std::size_t inputSize; float minConfidence = 0.75; + static const std::size_t modelSampleRate; //< sample rate expected by model // neural network control logic std::size_t recordingCounter = 0; -- GitLab From a577fba0842cb3534dced8c188c8084a3b0e3cb3 Mon Sep 17 00:00:00 2001 From: pbethge Date: Tue, 7 Sep 2021 14:27:22 +0200 Subject: [PATCH 6/8] cosmetic --- src/ofApp.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/ofApp.cpp b/src/ofApp.cpp index 5a06c65..14cced2 100644 --- a/src/ofApp.cpp +++ b/src/ofApp.cpp @@ -64,9 +64,9 @@ void ofApp::setup() { ofLogWarning(PACKAGE) << "audio input device does not have enough input channels"; inputChannel = 0; } - numInputChannels = inputChannel + 1; + numInputChannels = inputChannel+1; recordedSamplesPerBuffer = bufferSize * numInputChannels; - ofLogNotice(PACKAGE) << "audio input channel: " << numInputChannels; + ofLogNotice(PACKAGE) << "audio input channel: " << inputChannel; ofLogNotice(PACKAGE) << "audio input samplerate: " << sampleRate; ofLogNotice(PACKAGE) << "audio input buffer size: " << recordedSamplesPerBuffer; settings.setInDevice(device); @@ -74,7 +74,7 @@ void ofApp::setup() { settings.sampleRate = sampleRate; settings.numOutputChannels = 0; settings.numInputChannels = numInputChannels; - settings.bufferSize = bufferSize*numInputChannels; + settings.bufferSize = recordedSamplesPerBuffer; if(!soundStream.setup(settings)) { ofLogError(PACKAGE) << "audio input device " << inputDevice << " setup failed"; ofLogError(PACKAGE) << "perhaps try a different device or samplerate?"; @@ -259,11 +259,7 @@ void ofApp::audioIn(ofSoundBuffer & input) { // copy desired channel out of interleaved stream into mono buffer, // assume input stream has enough channels... for(std::size_t i = 0; i < input.getNumFrames(); i++) { - float sum = 0; - for(std::size_t j = 0; j < numInputChannels; j++) { - sum = input[i*numInputChannels + j]; - } - monoBuffer[i] = sum / numInputChannels; + monoBuffer[i] = input[i*numInputChannels+inputChannel]; } // calculate the root mean square which is a rough way to calculate volume -- GitLab From 98174ef5aac516d5291cc967ceaa721e3808e6a4 Mon Sep 17 00:00:00 2001 From: Dan Wilcox Date: Tue, 7 Sep 2021 17:10:45 +0200 Subject: [PATCH 7/8] tiny format fix --- src/WavFileWriterBeta.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/WavFileWriterBeta.h b/src/WavFileWriterBeta.h index a6b6d3f..d48f43d 100644 --- a/src/WavFileWriterBeta.h +++ b/src/WavFileWriterBeta.h @@ -19,7 +19,7 @@ class WavFileWriterBeta { std::string filename; - /// open file for a fixed-length number of samples + /// open file for a fixed-length number of samples WavFileWriterBeta(std::string filename, unsigned short numChannels, unsigned long sampleRate, unsigned short bytesPerSample, -- GitLab From fb135c0ab96eff6af0407c31dea705a6da484355 Mon Sep 17 00:00:00 2001 From: Dan Wilcox Date: Tue, 7 Sep 2021 18:54:14 +0200 Subject: [PATCH 8/8] fixed buffer size issue when using input chan > 1, removed redundant member vars --- src/ofApp.cpp | 17 ++++++++++------- src/ofApp.h | 13 +++++-------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/ofApp.cpp b/src/ofApp.cpp index 14cced2..9b0d91b 100644 --- a/src/ofApp.cpp +++ b/src/ofApp.cpp @@ -64,17 +64,15 @@ void ofApp::setup() { ofLogWarning(PACKAGE) << "audio input device does not have enough input channels"; inputChannel = 0; } - numInputChannels = inputChannel+1; - recordedSamplesPerBuffer = bufferSize * numInputChannels; ofLogNotice(PACKAGE) << "audio input channel: " << inputChannel; ofLogNotice(PACKAGE) << "audio input samplerate: " << sampleRate; - ofLogNotice(PACKAGE) << "audio input buffer size: " << recordedSamplesPerBuffer; + ofLogNotice(PACKAGE) << "audio input buffer size: " << bufferSize; settings.setInDevice(device); settings.setInListener(this); settings.sampleRate = sampleRate; settings.numOutputChannels = 0; - settings.numInputChannels = numInputChannels; - settings.bufferSize = recordedSamplesPerBuffer; + settings.numInputChannels = inputChannel + 1; + settings.bufferSize = bufferSize * (inputChannel + 1); if(!soundStream.setup(settings)) { ofLogError(PACKAGE) << "audio input device " << inputDevice << " setup failed"; ofLogError(PACKAGE) << "perhaps try a different device or samplerate?"; @@ -84,6 +82,9 @@ void ofApp::setup() { if(!listening) { soundStream.stop(); } + if(soundStream.getSampleRate() == 44100) { + ofLogWarning(PACKAGE) << "treating sample rate of 44100 as 48000, may or may not affect detection"; + } // display volHistory.assign(400, 0.0); @@ -255,11 +256,13 @@ void ofApp::exit() { //-------------------------------------------------------------- void ofApp::audioIn(ofSoundBuffer & input) { + // beh, ofSoundBuffer::getNumFrames() actually returns the buffer size? + std::size_t numFrames = input.getNumFrames() / input.getNumChannels(); // copy desired channel out of interleaved stream into mono buffer, // assume input stream has enough channels... - for(std::size_t i = 0; i < input.getNumFrames(); i++) { - monoBuffer[i] = input[i*numInputChannels+inputChannel]; + for(std::size_t i = 0; i < numFrames; i++) { + monoBuffer[i] = input[(i*input.getNumChannels())+inputChannel]; } // calculate the root mean square which is a rough way to calculate volume diff --git a/src/ofApp.h b/src/ofApp.h index 9e7ddbd..66075f7 100644 --- a/src/ofApp.h +++ b/src/ofApp.h @@ -67,8 +67,8 @@ class ofApp : public ofBaseApp { // audio ofSoundStream soundStream; - int inputDevice = -1; // -1 means search for default device - int inputChannel = 0; // 0 means mono, 1 means stereo + int inputDevice = -1; // -1 means search for default device + int inputChannel = 0; // 0 - chan 1 (left), 1 - chan 2 (right), 2 - chan 3, etc bool listening = true; // neural network input parameters @@ -76,20 +76,17 @@ class ofApp : public ofBaseApp { // we want to keep the buffersize a multiple of the downsampling factor // downsamplingFactor = sampleRate / modelSampleRate // downsampling is required for microphones that do not have 16kHz sampling - std::size_t bufferSize = 1024; + std::size_t bufferSize = 1024; //< in this case, number of sample frames std::size_t sampleRate = 48000; std::size_t downsamplingFactor = 3; - std::size_t recordedSamplesPerBuffer; - std::size_t numInputChannels; - - // since volume detection has some latency we keep a history of buffers + // since volume detection has some latency,d we keep a history of buffers AudioBufferFifo previousBuffers; std::size_t numPreviousBuffers = 10; // how many buffers to save before trigger happens // sampleBuffers acts as a buffer for recording (could be fused) AudioBufferFifo sampleBuffers; std::size_t numBuffers; - SimpleAudioBuffer monoBuffer; //< mono inputChannel stream buffer + SimpleAudioBuffer monoBuffer; //< mono inputChannel stream buffer // volume float curVol = 0.0; -- GitLab