Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Hertz-Lab
Research
Intelligent Museum
language-identification
Commits
40267ee9
Commit
40267ee9
authored
Oct 08, 2021
by
paul
Browse files
add uid to output
parent
77f5fd75
Changes
1
Hide whitespace changes
Inline
Side-by-side
data/common-voice/cv_to_wav.py
View file @
40267ee9
...
...
@@ -163,6 +163,7 @@ def traverse_csv(language, input_dir, output_dir, max_chops,
line
=
genders
[
gender_selected
].
pop
()
# get meta data
uid
=
line
[
0
]
mp3_filename
=
line
[
1
]
age
=
line
[
5
]
gender
=
line
[
6
]
...
...
@@ -202,7 +203,7 @@ def traverse_csv(language, input_dir, output_dir, max_chops,
wav_path
=
os
.
path
.
join
(
output_dir_wav
,
chip_name
+
".wav"
)
wav
.
write
(
wav_path
,
chip_fs
,
chip_data
)
output_clips
.
append
([
chip_name
+
".wav"
,
age
,
gender
,
accent
,
locale
])
output_clips
.
append
([
chip_name
+
".wav"
,
age
,
gender
,
accent
,
locale
,
uid
])
gender_counter
[
gender_selected
]
+=
1
# remove the intermediate file
...
...
@@ -216,7 +217,7 @@ def traverse_csv(language, input_dir, output_dir, max_chops,
# --- end loop ---
# write to csv
column_names
=
[
"path"
,
"age"
,
"gender"
,
"accent"
,
"locale"
]
column_names
=
[
"path"
,
"age"
,
"gender"
,
"accent"
,
"locale"
,
"uid"
]
pd
.
DataFrame
(
output_clips
,
columns
=
column_names
).
to_csv
(
output_clips_file
,
sep
=
'
\t
'
)
produced_files
=
sum
(
gender_counter
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment