Commit a9f638ee authored by Dan Wilcox's avatar Dan Wilcox
Browse files

added input channel handling, added --inputchan and --samplerate options

parent d229200f
......@@ -28,7 +28,7 @@
#include "WavFileWriterBeta.h"
#endif
// a simple Fifo with adjustable max length
/// a simple Fifo with adjustable max length
template <typename T, typename Container=std::deque<T>>
class FixedFifo : public std::queue<T, Container> {
......@@ -55,7 +55,7 @@ class FixedFifo : public std::queue<T, Container> {
typedef std::vector<float> SimpleAudioBuffer;
typedef FixedFifo<SimpleAudioBuffer> AudioBufferFifo;
// custom ofxTF2::Model implementation to handle audio sample conversion, etc
/// custom ofxTF2::Model implementation to handle audio sample conversion, etc
class AudioClassifier : public ofxTF2::Model {
public:
......
......@@ -26,6 +26,8 @@ bool Commandline::parse(int argc, char **argv) {
bool list = false;
int inputNum = -1;
std::string inputName = "";
int inputChannel = 0;
int sampleRate = 0;
bool verbose = false;
bool version = false;
......@@ -39,6 +41,9 @@ bool Commandline::parse(int argc, char **argv) {
parser.add_flag( "-l,--list", list, "list audio input devices and exit");
parser.add_option("--inputdev", inputNum, "audio input device number");
parser.add_option("--inputname", inputName, "audio input device name, can do partial match, ex. \"Microphone\"");
parser.add_option("--inputchan", inputChannel, "audio input device channel, default 1");
parser.add_option("-r,--samplerate", sampleRate, "audio input device samplerate, can be 441000 or a multiple of " +
ofToString(ofApp::modelSampleRate) + ", default " + ofToString(app->sampleRate));
parser.add_flag( "-v,--verbose", verbose, "verbose printing");
parser.add_flag( "--version", version, "print version and exit");
......@@ -114,6 +119,31 @@ bool Commandline::parse(int argc, char **argv) {
}
}
// set audio input channel
if(inputChannel > 0) {
app->inputChannel = inputChannel-1; // 1-index to 0-index
}
// set audio input rate
if(sampleRate > 0) {
bool set = true;
if(sampleRate == 44100) {
// treat as 48k default, pitch change is minimal enough to not affect detection
// and we don't handle non-integer downsampling factors
app->sampleRate = sampleRate;
app->downsamplingFactor = 3;
set = false;
}
else if(sampleRate % ofApp::modelSampleRate != 0) {
ofLogWarning(PACKAGE) << "ignoring input sample rate which is not a multiple of "
<< ofApp::modelSampleRate << ": " << sampleRate;
}
if(set) {
app->sampleRate = sampleRate;
app->downsamplingFactor = sampleRate / ofApp::modelSampleRate;
}
}
// parse sender host strings
// split string by last : to get address & port pair,
// handle bracketed IPv6 hostnames: [::1]:8081
......
......@@ -15,6 +15,8 @@
#include "ofApp.h"
const std::size_t ofApp::modelSampleRate = 16000;
//--------------------------------------------------------------
void ofApp::setup() {
ofSetFrameRate(60);
......@@ -31,7 +33,7 @@ void ofApp::setup() {
}
// recording settings
numBuffers = samplingRate * inputSeconds / bufferSize;
numBuffers = sampleRate * inputSeconds / bufferSize;
previousBuffers.setMaxLen(numPreviousBuffers);
sampleBuffers.setMaxLen(numBuffers);
......@@ -53,17 +55,27 @@ void ofApp::setup() {
}
}
auto devices = soundStream.getDeviceList();
ofLogNotice(PACKAGE) << "audio input device: " << inputDevice << " " << devices[inputDevice].name;
settings.setInDevice(devices[inputDevice]);
ofSoundDevice &device = devices[inputDevice];
ofLogNotice(PACKAGE) << "audio input device: " << inputDevice << " " << device.name;
if(inputChannel >= device.inputChannels) {
ofLogWarning(PACKAGE) << "audio input device does not have enough input channels";
inputChannel = 0;
}
ofLogNotice(PACKAGE) << "audio input channel: " << inputChannel+1;
ofLogNotice(PACKAGE) << "audio input samplerate: " << sampleRate;
ofLogNotice(PACKAGE) << "audio input buffer size: " << bufferSize;
settings.setInDevice(device);
settings.setInListener(this);
settings.sampleRate = samplingRate;
settings.sampleRate = sampleRate;
settings.numOutputChannels = 0;
settings.numInputChannels = 1;
settings.numInputChannels = device.inputChannels;
settings.bufferSize = bufferSize;
if(!soundStream.setup(settings)) {
ofLogError(PACKAGE) << "audio input device " << inputDevice << " setup failed";
ofLogError(PACKAGE) << "perhaps try a different device or samplerate?";
std::exit(EXIT_FAILURE);
}
monoBuffer.resize(bufferSize);
// display
volHistory.assign(400, 0.0);
......@@ -219,15 +231,27 @@ void ofApp::exit() {
//--------------------------------------------------------------
void ofApp::audioIn(ofSoundBuffer & input) {
// this shouldn't happen... but we don't let it blow up
if(input.getNumFrames() != monoBuffer.size()) {
ofLogWarning(PACKAGE) << "resizing mono input buffer to " << input.getNumFrames();
monoBuffer.resize(input.getNumFrames());
}
// copy desired channel out of interleaved stream into mono buffer,
// assume input stream has enough channels...
for(std::size_t i = 0; i < input.getNumFrames(); i++) {
monoBuffer[i] = input[i + inputChannel];
}
// calculate the root mean square which is a rough way to calculate volume
float sumVol = 0.0;
for(size_t i = 0; i < input.getNumFrames(); i++) {
float vol = input[i];
for(std::size_t i = 0; i < monoBuffer.size(); i++) {
float vol = monoBuffer[i];
sumVol += vol * vol;
}
curVol = sumVol / (float)input.getNumFrames();
curVol = sumVol / (float)monoBuffer.size();
curVol = sqrt(curVol);
// smoothen the volume
// smooth the volume
smoothedVol *= 0.5;
smoothedVol += 0.5 * curVol;
......@@ -260,7 +284,7 @@ void ofApp::audioIn(ofSoundBuffer & input) {
}
// if not recording: save the incoming buffer to the previous buffer fifo
else {
previousBuffers.push(input.getBuffer());
previousBuffers.push(monoBuffer);
}
}
}
......
......@@ -53,16 +53,19 @@ class ofApp : public ofBaseApp {
// audio
ofSoundStream soundStream;
int inputDevice = -1;
int inputChannel = 0;
std::vector<float> monoBuffer; //< mono inputChannel stream buffer
// neural network input parameters
// for ease of use:
// we want to keep the buffersize a multiple of the downsampling factor
// downsamplingFactor = micSamplingRate / neuralNetworkInputSamplingRate
std::size_t bufferSize = 1024;
std::size_t samplingRate = 48000;
// downsamplingFactor must be an integer of samplingRate / inputSamplingeRate
// downsamplingFactor = sampleRate / modelSampleRate
// downsampling is required for microphones that do not have 16kHz sampling
std::size_t bufferSize = 1024;
std::size_t sampleRate = 48000;
std::size_t downsamplingFactor = 3;
static const std::size_t modelSampleRate; //< sample rate expected by model
// since volume detection has some latency we keep a history of buffers
AudioBufferFifo previousBuffers;
......@@ -80,7 +83,6 @@ class ofApp : public ofBaseApp {
// display
std::vector<float> volHistory;
std::string displayLabel = " ";
float minConfidence = 0.75;
// neural network
AudioClassifier model;
......@@ -88,6 +90,7 @@ class ofApp : public ofBaseApp {
std::size_t inputSeconds = 5;
const std::size_t inputSamplingRate = 16000; // AI was trained on 16kHz
std::size_t inputSize;
float minConfidence = 0.75;
// neural network control logic
std::size_t recordingCounter = 0;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment