utils.h 3.5 KB
Newer Older
Paul Bethge's avatar
init  
Paul Bethge committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
/*
 * ofxTensorFlow2
 *
 * Copyright (c) 2021 ZKM | Hertz-Lab
 * Paul Bethge <bethge@zkm.de>
 * Dan Wilcox <dan.wilcox@zkm.de>
 *
 * BSD Simplified License.
 * For information on usage and redistribution, and for a DISCLAIMER OF ALL
 * WARRANTIES, see the file, "LICENSE.txt," in this distribution.
 *
 * This code has been developed at ZKM | Hertz-Lab as part of „The Intelligent 
 * Museum“ generously funded by the German Federal Cultural Foundation.
 */

#pragma once

#include "ofxTensorFlow2.h"

#include <queue>
#include <deque>
#include <iostream>


// a simple Fifo with adjustable max length
template <typename T, typename Container=std::deque<T>>
class FixedFifo : public std::queue<T, Container> {

	public:
		FixedFifo(const std::size_t maxLength=10) : maxLen(maxLength) {}

		void push(const T& value) {
			if(this->size() == maxLen) {
				this->c.pop_front();
			}
			std::queue<T, Container>::push(value);
		}

		void setMaxLen(const std::size_t maxLength) {
			maxLen = maxLength;
		}

	private:
		std::size_t maxLen;
};

typedef std::vector<float> SimpleAudioBuffer;
typedef FixedFifo<SimpleAudioBuffer> AudioBufferFifo;

// custom ofxTF2::Model implementation to handle audio sample conversion, etc
class AudioClassifier : public ofxTF2::Model {

	public:

		void classify(AudioBufferFifo & bufferFifo, const std::size_t downsamplingFactor,
					  int & argMax, float & prob) {

			SimpleAudioBuffer sample;

			// downsample and empty the incoming Fifo
			downsample(bufferFifo, sample, downsamplingFactor);

Paul Bethge's avatar
Paul Bethge committed
63
64
65
66
			ofLog() << std::to_string(sample[0]);
			normalize(sample);
			ofLog() << std::to_string(sample[0]);

Paul Bethge's avatar
init  
Paul Bethge committed
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
			// convert recorded sample to a batch of size one
			ofxTF2::shapeVector tensorShape {1, static_cast<ofxTF2::shape_t>(sample.size()), 1};
			auto input = ofxTF2::vectorToTensor(sample, tensorShape);

			// inference
			auto output = runModel(input);

			// convert the output to std::vector
			std::vector<float> outputVector;
			ofxTF2::tensorToVector(output, outputVector);

			// get element with highest probabilty
			auto maxIt = std::max_element(outputVector.begin(), outputVector.end());
			argMax = std::distance(outputVector.begin(), maxIt);
			prob = *maxIt;
		}

	private:

Paul Bethge's avatar
Paul Bethge committed
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
		// inplace normalization
		void normalize(SimpleAudioBuffer & sample) {
			// find absolute maximum value
			float max = 0.0;
			for (const auto& s : sample) {
				if (abs(s) > max) {
					max = abs(s);
				}
			}
			if (max == 0.0)
				return;
			for (auto&& s : sample) {
				s /= max;
			}
		}


Paul Bethge's avatar
init  
Paul Bethge committed
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
		// downsample by an integer
		void downsample(AudioBufferFifo & bufferFifo, SimpleAudioBuffer & sample,
						const std::size_t downsamplingFactor) {

			// get the size of an element
			const std::size_t bufferSize = bufferFifo.front().size();
			const std::size_t bufferSizeDownsampled = bufferSize / downsamplingFactor;

			// allocate memory if neccessary
			sample.resize(bufferFifo.size() * bufferSizeDownsampled);

			// pop elements from the bufferFifo, downsample and save to flat buffer
			std::size_t i = 0;
			while(!bufferFifo.empty()) {

				// get a buffer from fifo
				const SimpleAudioBuffer & buffer = bufferFifo.front();

				// downsample by integer
				for(std::size_t j = 0; j < bufferSizeDownsampled; j++) {
					std::size_t offset = j * downsamplingFactor;
					float sum = 0.0;
					for(std::size_t k = 0; k < downsamplingFactor; k++) {
						sum += buffer[offset+k];
					}
					sample[i*bufferSizeDownsampled + j] = sum / downsamplingFactor;
				}
				// remove buffer from fifo
				bufferFifo.pop();
				i++;
			}
		}
};