Commit ca98b8e9 authored by pbethge's avatar pbethge
Browse files

add write to result to file

parent 248fe20a
......@@ -16,8 +16,7 @@ pip install -r requirements
The following parameters may be useful to look at
```python
vad_threshold = 0.8 # minimum confidence of the VAD to trigger KWS
kws_threshold = 0.95 # minimum confidence of the KWS to detect a word
kws_required_size = 4 # number of chunks -1 to feed into the KWS
lid_required_size = 4 # number of chunks -1 to feed into the KWS
frame_duration_ms = 250 # chunks size for the VAD in milliseconds (250ms is min)
```
The sample rate should be kept at 16kHz for both neural networks. If higher sample rates are required for recordings consider using downsampling.
......
......@@ -51,21 +51,18 @@ def processAudio(config, q):
data = q.get()
data = [item for sublist in data for item in sublist]
data_tensor = torch.tensor(data, dtype=float)
print(data_tensor)
# data_tensor /= 32768.0
data_tensor = torch.unsqueeze(data_tensor, 0)
out = model.classify_batch(data_tensor)
print(out[3])
# out = lid_model(data_tensor)[0].numpy()
# index = tf.math.argmax(out).numpy()
language = out[3][0]
print(language)
# if out[index] >= lid_threshold:
# print(classes[index])
# if you want to see the data please uncomment
# wav.write('results/'+classes[index]+'.wav', SAMPLE_RATE, np.asarray(data))
fname = str(time.ctime(time.time())) + "_" + language + '.wav'
wav.write(fname, SAMPLE_RATE, np.asarray(data))
except Exception as e:
print("Ooopsi: ", e)
q.task_done()
......@@ -83,8 +80,6 @@ got_voice = False
# wait a bit for libraries to load
print("Loading libraries...")
print("This may take up to 10 seconds!")
time.sleep(7)
stream = audio.open(format=FORMAT,
channels=CHANNELS,
rate=SAMPLE_RATE,
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment