Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Hertz-Lab
Research
Intelligent Museum
language-identification
Commits
651799bd
Commit
651799bd
authored
Jul 16, 2021
by
Paul Bethge
Browse files
simplify padding
parent
6f1c1db3
Changes
2
Hide whitespace changes
Inline
Side-by-side
src/audio/chop_up.py
View file @
651799bd
...
...
@@ -12,7 +12,7 @@ import os
import
time
import
auditok
from
.utils
import
pad
_with_data
,
pad_with_noise
,
pad_with_silence
,
to_array
from
src.audio
.utils
import
pad
,
to_array
def
chop_up_audio
(
file_name
,
desired_length_s
=
5
,
...
...
@@ -35,16 +35,8 @@ def chop_up_audio (file_name, desired_length_s = 5,
# extend tokens to desired length
audio_cuttings
=
[]
for
i
,
r
in
enumerate
(
regions
):
numpy_data
=
to_array
(
r
.
_data
,
2
,
1
)
if
padding
==
"Silence"
:
extended_token
=
pad_with_silence
(
numpy_data
,
nn_input_len
)
elif
padding
==
"Data"
:
extended_token
=
pad_with_data
(
numpy_data
,
nn_input_len
)
else
:
extended_token
=
pad_with_noise
(
numpy_data
,
nn_input_len
)
extended_token
=
pad
(
numpy_data
,
nn_input_len
,
padding
)
file_name_out
=
os
.
path
.
split
(
file_name
)[
-
1
][:
-
4
]
+
"_"
+
str
(
i
)
data_tuple
=
(
file_name_out
,
sample_rate
,
extended_token
)
audio_cuttings
.
append
(
data_tuple
)
...
...
src/audio/silero_vad/utils_vad.py
View file @
651799bd
...
...
@@ -719,7 +719,7 @@ def drop_chunks(tss: List[dict],
import
os
from
src.audio.utils
import
pad
_with_data
,
pad_with_noise
,
pad_with_silence
,
to_array
from
src.audio.utils
import
pad
from
src.audio.utils
import
LogicDataSource
,
LogicValidater
from
auditok.core
import
StreamTokenizer
...
...
@@ -759,20 +759,11 @@ class VADTokenizer():
# reconstruct audio regions from index regions
regions
=
[
wav
[
index
[
1
]
*
self
.
vad_resolution
:
index
[
2
]
*
self
.
vad_resolution
]
for
index
in
indices
]
# extend tokens to desired length
audio_cuttings
=
[]
for
i
,
r
in
enumerate
(
regions
):
numpy_data
=
r
.
cpu
().
detach
().
numpy
()
if
padding
==
"Silence"
:
extended_token
=
pad_with_silence
(
numpy_data
,
self
.
nn_input_len
)
elif
padding
==
"Data"
:
extended_token
=
pad_with_data
(
numpy_data
,
self
.
nn_input_len
)
else
:
extended_token
=
pad_with_noise
(
numpy_data
,
self
.
nn_input_len
)
extended_token
=
pad
(
numpy_data
,
self
.
nn_input_len
,
padding
)
file_name_out
=
os
.
path
.
split
(
file_path
)[
-
1
][:
-
4
]
+
"_"
+
str
(
i
)
data_tuple
=
(
file_name_out
,
self
.
sample_rate
,
extended_token
)
audio_cuttings
.
append
(
data_tuple
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment