Commit ce6d957b authored by Paul Bethge's avatar Paul Bethge
Browse files

init

parent 6d5c3e42
// TODO: please fill me
model/
# Python
__pycache__
......
# Attempt to load a config.make file.
# If none is found, project defaults in config.project.make will be used.
ifneq ($(wildcard config.make),)
include config.make
endif
# make sure the the OF_ROOT location is defined
ifndef OF_ROOT
OF_ROOT=$(realpath ../../..)
endif
# call the project makefile!
include $(OF_ROOT)/libs/openFrameworksCompiled/project/makefileCommon/compile.project.mk
# ofxTensorFlow2
include $(OF_ROOT)/addons/ofxTensorFlow2/addon_targets.mk
# Repo Name
"Short Description WHAT is it doing?"
# Language Identification within OpenFrameworks
Identification of chosen languages from 5s long audio snippets.
This code base has been developed by [ZKM | Hertz-Lab](https://zkm.de/en/about-the-zkm/organization/hertz-lab) as part of the project [»The Intelligent Museum«](#the-intelligent-museum).
Please raise issues, ask questions, throw in ideas or submit code, as this repository is intended to be an open platform to collaboratively improve "TOPIC NAME"
Please raise issues, ask questions, throw in ideas or submit code, as this repository is intended to be an open platform to collaboratively improve langugae identification.
##### Target Platform
Tested under Ubuntu 18.04 using python 3.7 and tensorflow 2.3
Copyright (c) 2021 ZKM | Karlsruhe.
##### Features
* can do this and that
* achieves blablabla
BSD Simplified License.
##### Structure
* folder1/: does that
* folder2/: has this
##### Tested Platforms
- MacBook Pro 2017, MacOS 10.15 & openFrameworks 0.11
## Installation
Download and Install [Anaconda](https://www.anaconda.com/products/individual). Afterwards create a virtual environment:
```
$ conda create -n "name" python=3.7
$ conda activate "name"
$ pip install -r requirements.txt
```
##### Structure
* src/: contains the C++ Code that interfaces with OpenFrameworks
* bin/: contains the SavedModel trained with TensorFlow2
#### Additional Software
```
$ sudo apt install howaboutthat
```
### Installation
As with all OpenFrameworks examples put this projekt to a folder where '../../../'
is the root folder of your OpenFrameworks installation.
## Usage
##### show help
### Run this code
##### Compile
```shell
cd $ROOT_OF_THIS_PROJECT
make
```
$ python executable.py --help
##### Execute
```
##### scenario 1
make RunRelease
```
$ python executable.py --parameter 42
```
## Further Reading
* [paper](www.google.com)
* [paper2](www.google.com)
## License
use a License
### Further Reading
Repo for the training and oder models coming soon...
## Contribute
### Contribute
Contributions are very welcome!
Please send an email to author@zkm.de
## The Intelligent Museum
### The Intelligent Museum
An artistic-curatorial field of experimentation for deep learning and visitor participation
The [ZKM | Center for Art and Media](https://zkm.de/en) and the [Deutsches Museum Nuremberg](https://www.deutsches-museum.de/en/nuernberg/information/) cooperate with the goal of implementing an AI-supported exhibition. Together with researchers and international artists, new AI-based works of art will be realized during the next four years (2020-2023). They will be embedded in the AI-supported exhibition in both houses. The Project „The Intelligent Museum“ is funded by the Digital Culture Programme of the [Kulturstiftung des Bundes](https://www.kulturstiftung-des-bundes.de/en) (German Federal Cultural Foundation).
......
ofxTensorFlow2
################################################################################
# CONFIGURE PROJECT MAKEFILE (optional)
# This file is where we make project specific configurations.
################################################################################
################################################################################
# OF ROOT
# The location of your root openFrameworks installation
# (default) OF_ROOT = ../../..
################################################################################
# OF_ROOT = ../../..
################################################################################
# PROJECT ROOT
# The location of the project - a starting place for searching for files
# (default) PROJECT_ROOT = . (this directory)
#
################################################################################
# PROJECT_ROOT = .
################################################################################
# PROJECT SPECIFIC CHECKS
# This is a project defined section to create internal makefile flags to
# conditionally enable or disable the addition of various features within
# this makefile. For instance, if you want to make changes based on whether
# GTK is installed, one might test that here and create a variable to check.
################################################################################
# None
################################################################################
# PROJECT EXTERNAL SOURCE PATHS
# These are fully qualified paths that are not within the PROJECT_ROOT folder.
# Like source folders in the PROJECT_ROOT, these paths are subject to
# exlclusion via the PROJECT_EXLCUSIONS list.
#
# (default) PROJECT_EXTERNAL_SOURCE_PATHS = (blank)
#
# Note: Leave a leading space when adding list items with the += operator
################################################################################
# PROJECT_EXTERNAL_SOURCE_PATHS =
################################################################################
# PROJECT EXCLUSIONS
# These makefiles assume that all folders in your current project directory
# and any listed in the PROJECT_EXTERNAL_SOURCH_PATHS are are valid locations
# to look for source code. The any folders or files that match any of the
# items in the PROJECT_EXCLUSIONS list below will be ignored.
#
# Each item in the PROJECT_EXCLUSIONS list will be treated as a complete
# string unless teh user adds a wildcard (%) operator to match subdirectories.
# GNU make only allows one wildcard for matching. The second wildcard (%) is
# treated literally.
#
# (default) PROJECT_EXCLUSIONS = (blank)
#
# Will automatically exclude the following:
#
# $(PROJECT_ROOT)/bin%
# $(PROJECT_ROOT)/obj%
# $(PROJECT_ROOT)/%.xcodeproj
#
# Note: Leave a leading space when adding list items with the += operator
################################################################################
# PROJECT_EXCLUSIONS =
################################################################################
# PROJECT LINKER FLAGS
# These flags will be sent to the linker when compiling the executable.
#
# (default) PROJECT_LDFLAGS = -Wl,-rpath=./libs
#
# Note: Leave a leading space when adding list items with the += operator
################################################################################
# Currently, shared libraries that are needed are copied to the
# $(PROJECT_ROOT)/bin/libs directory. The following LDFLAGS tell the linker to
# add a runtime path to search for those shared libraries, since they aren't
# incorporated directly into the final executable application binary.
# TODO: should this be a default setting?
# PROJECT_LDFLAGS=-Wl,-rpath=./libs
################################################################################
# PROJECT DEFINES
# Create a space-delimited list of DEFINES. The list will be converted into
# CFLAGS with the "-D" flag later in the makefile.
#
# (default) PROJECT_DEFINES = (blank)
#
# Note: Leave a leading space when adding list items with the += operator
################################################################################
# PROJECT_DEFINES =
################################################################################
# PROJECT CFLAGS
# This is a list of fully qualified CFLAGS required when compiling for this
# project. These CFLAGS will be used IN ADDITION TO the PLATFORM_CFLAGS
# defined in your platform specific core configuration files. These flags are
# presented to the compiler BEFORE the PROJECT_OPTIMIZATION_CFLAGS below.
#
# (default) PROJECT_CFLAGS = (blank)
#
# Note: Before adding PROJECT_CFLAGS, note that the PLATFORM_CFLAGS defined in
# your platform specific configuration file will be applied by default and
# further flags here may not be needed.
#
# Note: Leave a leading space when adding list items with the += operator
################################################################################
# PROJECT_CFLAGS =
################################################################################
# PROJECT OPTIMIZATION CFLAGS
# These are lists of CFLAGS that are target-specific. While any flags could
# be conditionally added, they are usually limited to optimization flags.
# These flags are added BEFORE the PROJECT_CFLAGS.
#
# PROJECT_OPTIMIZATION_CFLAGS_RELEASE flags are only applied to RELEASE targets.
#
# (default) PROJECT_OPTIMIZATION_CFLAGS_RELEASE = (blank)
#
# PROJECT_OPTIMIZATION_CFLAGS_DEBUG flags are only applied to DEBUG targets.
#
# (default) PROJECT_OPTIMIZATION_CFLAGS_DEBUG = (blank)
#
# Note: Before adding PROJECT_OPTIMIZATION_CFLAGS, please note that the
# PLATFORM_OPTIMIZATION_CFLAGS defined in your platform specific configuration
# file will be applied by default and further optimization flags here may not
# be needed.
#
# Note: Leave a leading space when adding list items with the += operator
################################################################################
# PROJECT_OPTIMIZATION_CFLAGS_RELEASE =
# PROJECT_OPTIMIZATION_CFLAGS_DEBUG =
################################################################################
# PROJECT COMPILERS
# Custom compilers can be set for CC and CXX
# (default) PROJECT_CXX = (blank)
# (default) PROJECT_CC = (blank)
# Note: Leave a leading space when adding list items with the += operator
################################################################################
# PROJECT_CXX =
# PROJECT_CC =
/*
* ofxTensorFlow2
*
* Copyright (c) 2021 ZKM | Hertz-Lab
* Paul Bethge <bethge@zkm.de>
* Dan Wilcox <dan.wilcox@zkm.de>
*
* BSD Simplified License.
* For information on usage and redistribution, and for a DISCLAIMER OF ALL
* WARRANTIES, see the file, "LICENSE.txt," in this distribution.
*
* This code has been developed at ZKM | Hertz-Lab as part of „The Intelligent
* Museum“ generously funded by the German Federal Cultural Foundation.
*/
#pragma once
#include <map>
#include <string>
typedef std::map<int, std::string> Labels;
static Labels labelsMap = {
{0, "english"},
{1, "french"},
{2, "german"},
{3, "russian"},
};
#include "ofMain.h"
#include "ofApp.h"
//========================================================================
int main() {
ofSetupOpenGL(500, 260, OF_WINDOW); // <-------- setup the GL context
// this kicks off the running of my app
// can be OF_WINDOW or OF_FULLSCREEN
// pass in width and height too:
ofRunApp(new ofApp());
}
/*
* ofxTensorFlow2
*
* Copyright (c) 2021 ZKM | Hertz-Lab
* Paul Bethge <bethge@zkm.de>
* Dan Wilcox <dan.wilcox@zkm.de>
*
* BSD Simplified License.
* For information on usage and redistribution, and for a DISCLAIMER OF ALL
* WARRANTIES, see the file, "LICENSE.txt," in this distribution.
*
* This code has been developed at ZKM | Hertz-Lab as part of „The Intelligent
* Museum“ generously funded by the German Federal Cultural Foundation.
*/
#include "ofApp.h"
//--------------------------------------------------------------
void ofApp::setup() {
ofSetFrameRate(60);
ofSetVerticalSync(true);
ofSetWindowTitle("example_keyword_spotting");
ofSetCircleResolution(80);
ofBackground(54, 54, 54);
// load the model, bail out on error
if(!model.load("model")) {
std::exit(EXIT_FAILURE);
}
// setup: define the input and output names
std::vector<std::string> inputNames = {
"serving_default_input",
};
std::vector<std::string> outputNames = {
"StatefulPartitionedCall"
};
model.setup(inputNames, outputNames);
// audio stream settings
bufferSize = 1023;
samplingRate = 48000; // take 16kHz if available then set downsamplingFactor to 1
// Neural network input parameters
// downsamplingFactor must be an integer of samplingRate / inputSamplingeRate
// downsampling is required for microphones that do not have 16kHz sampling
downsamplingFactor = 3;
inputSeconds = 5;
inputSamplingRate = 16000; // shall not be changed AI was trained on 16kHz
// recording settings
numPreviousBuffers = 10; // how many buffers to save before trigger happens
numBuffers = samplingRate * inputSeconds / bufferSize;
previousBuffers.setMaxLen(numPreviousBuffers);
sampleBuffers.setMaxLen(numBuffers);
// display
volHistory.assign(400, 0.0);
// apply settings to soundStream
soundStream.printDeviceList();
ofSoundStreamSettings settings;
auto devices = soundStream.getMatchingDevices("default");
if(!devices.empty()) {
settings.setInDevice(devices[0]);
}
settings.setInListener(this);
settings.sampleRate = samplingRate;
settings.numOutputChannels = 0;
settings.numInputChannels = 1;
settings.bufferSize = bufferSize;
soundStream.setup(settings);
// print words we know
ofLog() << "From src/labels.h:";
ofLog() << "----> words to spot";
for(const auto & label : labelsMap) {
ofLog() << label.second;
}
ofLog() << "<---- words to spot";
// warm up: inital inference involves initalization (takes longer)
auto test = cppflow::fill({1, 80000, 1}, 1.0f);
output = model.runModel(test);
ofLog() << "Setup done";
ofLog() << "============================";
}
//--------------------------------------------------------------
void ofApp::update() {
// lets scale the vol up to a 0-1 range
scaledVol = ofMap(smoothedVol, 0.0, 0.17, 0.0, 1.0, true);
// lets record the volume into an array
volHistory.push_back(scaledVol);
// if we are bigger than the size we want to record - lets drop the oldest value
if(volHistory.size() >= 400) {
volHistory.erase(volHistory.begin(), volHistory.begin()+1);
}
if(trigger) {
// inference, sets argMax and prob after running model
int argMax;
float prob;
model.classify(sampleBuffers, downsamplingFactor, argMax, prob);
// only display label when probabilty is high enough
if(prob >= minConfidence) {
displayLabel = labelsMap[argMax];
}
else {
displayLabel = " ";
}
// look up label
ofLog() << "Label: " << labelsMap[argMax];
ofLog() << "Probabilty: " << prob;
ofLog() << "============================";
// release the trigger signal and emit enable
trigger = false;
enable = true;
}
}
//--------------------------------------------------------------
void ofApp::draw() {
std::size_t historyWidth = 400;
std::size_t historyHeight = 150;
// draw current label
ofSetColor(64, 245, 221);
ofNoFill();
ofDrawBitmapString(displayLabel, 50, 50);
// draw the average volume
ofPushStyle();
ofPushMatrix();
ofTranslate(50, 50);
// draw the threshold line
ofDrawLine(0, historyHeight - volThreshold,
historyWidth, historyHeight - volThreshold);
ofSetColor(255);
// lets draw the volume history as a graph
ofBeginShape();
for(unsigned int i = 0; i < volHistory.size(); i++) {
if(i == 0) {
ofVertex(i, historyHeight);
}
ofVertex(i, historyHeight - volHistory[i] * 100);
if(i == volHistory.size() - 1) {
ofVertex(i, historyHeight);
}
}
ofEndShape(false);
ofPopMatrix();
ofPopStyle();
}
//--------------------------------------------------------------
void ofApp::audioIn(ofSoundBuffer & input) {
// calculate the root mean square which is a rough way to calculate volume
float sumVol = 0.0;
for(size_t i = 0; i < input.getNumFrames(); i++) {
float vol = input[i];
sumVol += vol * vol;
}
curVol = sumVol / (float)input.getNumFrames();
curVol = sqrt(curVol);
// smoothen the volume
smoothedVol *= 0.5;
smoothedVol += 0.5 * curVol;
// trigger recording if the smoothed volume is high enough
if(ofMap(smoothedVol, 0.0, 0.17, 0.0, 1.0, true) * 100 >= volThreshold && enable) {
enable = false;
ofLog() << "Start recording...";
// copy previous buffers to the recording
sampleBuffers = previousBuffers;
sampleBuffers.setMaxLen(numBuffers); // just to make sure (not tested)
recordingCounter = sampleBuffers.size();
// trigger recording in the next function call
recording = true;
}
// if we didnt just trigger
else {
// if recording: save the incoming buffer to the recording
// then trigger the neural network
if(recording) {
sampleBuffers.push(input.getBuffer());
recordingCounter++;
if(recordingCounter >= numBuffers) {
recording = false;
trigger = true;
ofLog() << "Done!";
}
}
// if not recording: save the incoming buffer to the previous buffer fifo
else {
previousBuffers.push(input.getBuffer());
}
}
}
//--------------------------------------------------------------
void ofApp::keyPressed(int key) {
}
//--------------------------------------------------------------
void ofApp::keyReleased(int key) {
}
//--------------------------------------------------------------
void ofApp::mouseMoved(int x, int y) {
}
//--------------------------------------------------------------
void ofApp::mouseDragged(int x, int y, int button) {
}
//--------------------------------------------------------------
void ofApp::mousePressed(int x, int y, int button) {
}
//--------------------------------------------------------------
void ofApp::mouseReleased(int x, int y, int button) {
}
//--------------------------------------------------------------
void ofApp::mouseEntered(int x, int y) {
}
//--------------------------------------------------------------
void ofApp::mouseExited(int x, int y) {
}
//--------------------------------------------------------------
void ofApp::windowResized(int w, int h) {
}
//--------------------------------------------------------------
void ofApp::gotMessage(ofMessage msg) {
}
//--------------------------------------------------------------
void ofApp::dragEvent(ofDragInfo dragInfo) {
}
/*
* ofxTensorFlow2
*
* Copyright (c) 2021 ZKM | Hertz-Lab
* Paul Bethge <bethge@zkm.de>
* Dan Wilcox <dan.wilcox@zkm.de>
*
* BSD Simplified License.
* For information on usage and redistribution, and for a DISCLAIMER OF ALL
* WARRANTIES, see the file, "LICENSE.txt," in this distribution.
*
* This code has been developed at ZKM | Hertz-Lab as part of „The Intelligent
* Museum“ generously funded by the German Federal Cultural Foundation.
*/
#pragma once
#include "ofMain.h"
#include "ofxTensorFlow2.h"
#include "labels.h"
// AudioBufferFifo & AudioClassifier model defined in utils.h
#include "utils.h"
class ofApp : public ofBaseApp {
public:
void setup();
void update();
void draw();
void audioIn(ofSoundBuffer & input);
void keyPressed(int key);
void keyReleased(int key);
void mouseMoved(int x, int y);
void mouseDragged(int x, int y, int button);
void mousePressed(int x, int y, int button);
void mouseReleased(int x, int y, int button);
void mouseEntered(int x, int y);
void mouseExited(int x, int y);
void windowResized(int w, int h);
void dragEvent(ofDragInfo dragInfo);
void gotMessage(ofMessage msg);
// audio
ofSoundStream soundStream;
// for ease of use:
// we want to keep the buffersize a multiple of the downsampling factor
// downsamplingFactor = micSamplingRate / neuralNetworkInputSamplingRate
std::size_t downsamplingFactor;
std::size_t bufferSize;
std::size_t samplingRate;
// since volume detection has some latency we keep a history of buffers
AudioBufferFifo previousBuffers;
std::size_t numPreviousBuffers;
// sampleBuffers acts as a buffer for recording (could be fused)
AudioBufferFifo sampleBuffers;
std::size_t numBuffers;
// volume
float curVol = 0.0;
float smoothedVol = 0.0;
float scaledVol = 0.0;
float volThreshold = 25;
// display
std::vector<float> volHistory;
std::string displayLabel = " ";
float minConfidence = 0.75;
// neural network
AudioClassifier model;
cppflow::tensor output;
std::size_t inputSeconds;
std::size_t inputSamplingRate;