NeonBohdan
commited on
Commit
•
598e67e
1
Parent(s):
1e4ef2c
Updated hifi-V3 config
Browse files- config.json +21 -29
- speaker_ids.json +1 -1
config.json
CHANGED
@@ -23,7 +23,7 @@
|
|
23 |
"distributed_url": "tcp://localhost:54321",
|
24 |
"mixed_precision": true,
|
25 |
"epochs": 1000,
|
26 |
-
"batch_size":
|
27 |
"eval_batch_size": 4,
|
28 |
"grad_clip": [
|
29 |
1000,
|
@@ -45,7 +45,7 @@
|
|
45 |
"use_grad_scaler": false,
|
46 |
"cudnn_enable": true,
|
47 |
"cudnn_deterministic": false,
|
48 |
-
"cudnn_benchmark":
|
49 |
"training_seed": 54321,
|
50 |
"model": "vits",
|
51 |
"num_loader_workers": 8,
|
@@ -58,7 +58,7 @@
|
|
58 |
"frame_shift_ms": null,
|
59 |
"frame_length_ms": null,
|
60 |
"stft_pad_mode": "reflect",
|
61 |
-
"sample_rate":
|
62 |
"resample": false,
|
63 |
"preemphasis": 0.0,
|
64 |
"ref_level_db": 20,
|
@@ -78,7 +78,7 @@
|
|
78 |
"do_amp_to_db_mel": true,
|
79 |
"pitch_fmax": 640.0,
|
80 |
"pitch_fmin": 0.0,
|
81 |
-
"signal_norm":
|
82 |
"min_level_db": -100,
|
83 |
"symmetric_norm": true,
|
84 |
"max_norm": 4.0,
|
@@ -100,7 +100,7 @@
|
|
100 |
"eos": "<EOS>",
|
101 |
"bos": "<BOS>",
|
102 |
"blank": "<BLNK>",
|
103 |
-
"characters": "\u0430\u0431\u0432\u0433\u0491\u0434\u0435\u0454\u0436\u0437\u0438\u0456\u0457\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044c\u044e\u044f\u044d\u0451\u044b\u044a",
|
104 |
"punctuations": "!'(),-.:;? ",
|
105 |
"phonemes": null,
|
106 |
"is_unique": true,
|
@@ -111,7 +111,7 @@
|
|
111 |
"loss_masking": null,
|
112 |
"sort_by_audio_len": false,
|
113 |
"min_audio_len": 32768,
|
114 |
-
"max_audio_len":
|
115 |
"min_text_len": 1,
|
116 |
"max_text_len": Infinity,
|
117 |
"compute_f0": false,
|
@@ -120,13 +120,10 @@
|
|
120 |
"start_by_longest": false,
|
121 |
"datasets": [
|
122 |
{
|
123 |
-
"name": "
|
124 |
-
"path": "./
|
125 |
-
"meta_file_train": "",
|
126 |
-
"ignored_speakers":
|
127 |
-
"obruchov",
|
128 |
-
"shepel"
|
129 |
-
],
|
130 |
"language": "uk",
|
131 |
"meta_file_val": "",
|
132 |
"meta_file_attn_mask": ""
|
@@ -135,7 +132,7 @@
|
|
135 |
"test_sentences": [
|
136 |
[
|
137 |
"\u0412\u0435\u0441\u0435\u043b\u043a\u0430, \u0442\u0430\u043a\u043e\u0436 \u0440\u0430\u0439\u0434\u0443\u0433\u0430 \u043e\u043f\u0442\u0438\u0447\u043d\u0435 \u044f\u0432\u0438\u0449\u0435 \u0432 \u0430\u0442\u043c\u043e\u0441\u0444\u0435\u0440\u0456, \u0449\u043e \u044f\u0432\u043b\u044f\u0454 \u0441\u043e\u0431\u043e\u044e \u043e\u0434\u043d\u0443, \u0434\u0432\u0456 \u0447\u0438 \u0434\u0435\u043a\u0456\u043b\u044c\u043a\u0430 \u0440\u0456\u0437\u043d\u043e\u043a\u043e\u043b\u044c\u043e\u0440\u043e\u0432\u0438\u0445 \u0434\u0443\u0433.",
|
138 |
-
"
|
139 |
null,
|
140 |
"uk"
|
141 |
]
|
@@ -147,7 +144,7 @@
|
|
147 |
"use_language_weighted_sampler": true,
|
148 |
"language_weighted_sampler_alpha": 1.0,
|
149 |
"model_args": {
|
150 |
-
"num_chars":
|
151 |
"out_channels": 513,
|
152 |
"spec_segment_size": 32,
|
153 |
"hidden_channels": 192,
|
@@ -166,38 +163,33 @@
|
|
166 |
"resblock_type_decoder": "2",
|
167 |
"resblock_kernel_sizes_decoder": [
|
168 |
3,
|
169 |
-
|
170 |
-
|
171 |
],
|
172 |
"resblock_dilation_sizes_decoder": [
|
173 |
[
|
174 |
1,
|
175 |
-
|
176 |
-
5
|
177 |
],
|
178 |
[
|
179 |
-
|
180 |
-
|
181 |
-
5
|
182 |
],
|
183 |
[
|
184 |
-
1,
|
185 |
3,
|
186 |
-
|
187 |
]
|
188 |
],
|
189 |
"upsample_rates_decoder": [
|
190 |
8,
|
191 |
8,
|
192 |
-
|
193 |
-
2
|
194 |
],
|
195 |
-
"upsample_initial_channel_decoder":
|
196 |
"upsample_kernel_sizes_decoder": [
|
197 |
16,
|
198 |
16,
|
199 |
-
|
200 |
-
4
|
201 |
],
|
202 |
"use_sdp": true,
|
203 |
"noise_scale": 1.0,
|
|
|
23 |
"distributed_url": "tcp://localhost:54321",
|
24 |
"mixed_precision": true,
|
25 |
"epochs": 1000,
|
26 |
+
"batch_size": 32,
|
27 |
"eval_batch_size": 4,
|
28 |
"grad_clip": [
|
29 |
1000,
|
|
|
45 |
"use_grad_scaler": false,
|
46 |
"cudnn_enable": true,
|
47 |
"cudnn_deterministic": false,
|
48 |
+
"cudnn_benchmark": false,
|
49 |
"training_seed": 54321,
|
50 |
"model": "vits",
|
51 |
"num_loader_workers": 8,
|
|
|
58 |
"frame_shift_ms": null,
|
59 |
"frame_length_ms": null,
|
60 |
"stft_pad_mode": "reflect",
|
61 |
+
"sample_rate": 22050,
|
62 |
"resample": false,
|
63 |
"preemphasis": 0.0,
|
64 |
"ref_level_db": 20,
|
|
|
78 |
"do_amp_to_db_mel": true,
|
79 |
"pitch_fmax": 640.0,
|
80 |
"pitch_fmin": 0.0,
|
81 |
+
"signal_norm": true,
|
82 |
"min_level_db": -100,
|
83 |
"symmetric_norm": true,
|
84 |
"max_norm": 4.0,
|
|
|
100 |
"eos": "<EOS>",
|
101 |
"bos": "<BOS>",
|
102 |
"blank": "<BLNK>",
|
103 |
+
"characters": "\u0430\u0431\u0432\u0433\u0491\u0434\u0435\u0454\u0436\u0437\u0438\u0456\u0457\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044c\u044e\u044f\u044d\u0451\u044b\u044a+",
|
104 |
"punctuations": "!'(),-.:;? ",
|
105 |
"phonemes": null,
|
106 |
"is_unique": true,
|
|
|
111 |
"loss_masking": null,
|
112 |
"sort_by_audio_len": false,
|
113 |
"min_audio_len": 32768,
|
114 |
+
"max_audio_len": 132300,
|
115 |
"min_text_len": 1,
|
116 |
"max_text_len": Infinity,
|
117 |
"compute_f0": false,
|
|
|
120 |
"start_by_longest": false,
|
121 |
"datasets": [
|
122 |
{
|
123 |
+
"name": "ljspeech",
|
124 |
+
"path": "./datasets/uk_Mykyta",
|
125 |
+
"meta_file_train": "metadata_not.csv",
|
126 |
+
"ignored_speakers": null,
|
|
|
|
|
|
|
127 |
"language": "uk",
|
128 |
"meta_file_val": "",
|
129 |
"meta_file_attn_mask": ""
|
|
|
132 |
"test_sentences": [
|
133 |
[
|
134 |
"\u0412\u0435\u0441\u0435\u043b\u043a\u0430, \u0442\u0430\u043a\u043e\u0436 \u0440\u0430\u0439\u0434\u0443\u0433\u0430 \u043e\u043f\u0442\u0438\u0447\u043d\u0435 \u044f\u0432\u0438\u0449\u0435 \u0432 \u0430\u0442\u043c\u043e\u0441\u0444\u0435\u0440\u0456, \u0449\u043e \u044f\u0432\u043b\u044f\u0454 \u0441\u043e\u0431\u043e\u044e \u043e\u0434\u043d\u0443, \u0434\u0432\u0456 \u0447\u0438 \u0434\u0435\u043a\u0456\u043b\u044c\u043a\u0430 \u0440\u0456\u0437\u043d\u043e\u043a\u043e\u043b\u044c\u043e\u0440\u043e\u0432\u0438\u0445 \u0434\u0443\u0433.",
|
135 |
+
"ljspeech",
|
136 |
null,
|
137 |
"uk"
|
138 |
]
|
|
|
144 |
"use_language_weighted_sampler": true,
|
145 |
"language_weighted_sampler_alpha": 1.0,
|
146 |
"model_args": {
|
147 |
+
"num_chars": 53,
|
148 |
"out_channels": 513,
|
149 |
"spec_segment_size": 32,
|
150 |
"hidden_channels": 192,
|
|
|
163 |
"resblock_type_decoder": "2",
|
164 |
"resblock_kernel_sizes_decoder": [
|
165 |
3,
|
166 |
+
5,
|
167 |
+
7
|
168 |
],
|
169 |
"resblock_dilation_sizes_decoder": [
|
170 |
[
|
171 |
1,
|
172 |
+
2
|
|
|
173 |
],
|
174 |
[
|
175 |
+
2,
|
176 |
+
6
|
|
|
177 |
],
|
178 |
[
|
|
|
179 |
3,
|
180 |
+
12
|
181 |
]
|
182 |
],
|
183 |
"upsample_rates_decoder": [
|
184 |
8,
|
185 |
8,
|
186 |
+
4
|
|
|
187 |
],
|
188 |
+
"upsample_initial_channel_decoder": 256,
|
189 |
"upsample_kernel_sizes_decoder": [
|
190 |
16,
|
191 |
16,
|
192 |
+
8
|
|
|
193 |
],
|
194 |
"use_sdp": true,
|
195 |
"noise_scale": 1.0,
|
speaker_ids.json
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
{
|
2 |
"uk": 0,
|
3 |
-
"
|
4 |
}
|
|
|
1 |
{
|
2 |
"uk": 0,
|
3 |
+
"mykyta": 0
|
4 |
}
|