from __gin__ import dynamic_registration import cached_conv as cc from cached_conv import convs import rave from rave import blocks from rave import core from rave import dataset from rave import descript_discriminator from rave import discriminator from rave import model from rave import pqmf import torch import torch.nn as nn # Macros: # ============================================================================== ACTIVATION = @blocks.Snake CAPACITY = 96 DILATIONS = [[1, 3, 9], [1, 3, 9], [1, 3, 9], [1, 3]] KERNEL_SIZE = 3 LATENT_SIZE = 128 N_BAND = 16 NOISE_AUGMENTATION = 0 PHASE_1_DURATION = 0 RATIOS = [4, 4, 4, 2] SAMPLING_RATE = 48000 # Parameters for blocks.AdaptiveInstanceNormalization: # ============================================================================== # None. # Parameters for variational/blocks.AdaptiveInstanceNormalization: # ============================================================================== # None. # Parameters for core.AudioDistanceV1: # ============================================================================== core.AudioDistanceV1.log_epsilon = 1e-07 core.AudioDistanceV1.multiscale_stft = @core.MultiScaleSTFT # Parameters for model.BetaWarmupCallback: # ============================================================================== model.BetaWarmupCallback.initial_value = 1e-06 model.BetaWarmupCallback.target_value = 0.005 model.BetaWarmupCallback.warmup_len = 20000 # Parameters for pqmf.CachedPQMF: # ============================================================================== pqmf.CachedPQMF.attenuation = 100 pqmf.CachedPQMF.n_band = %N_BAND # Parameters for cc.Conv1d: # ============================================================================== cc.Conv1d.bias = False # Parameters for variational/cc.Conv1d: # ============================================================================== variational/cc.Conv1d.bias = False # Parameters for cc.ConvTranspose1d: # ============================================================================== cc.ConvTranspose1d.bias = False # Parameters for descript_discriminator.DescriptDiscriminator: # ============================================================================== descript_discriminator.DescriptDiscriminator.bands = \ [(0.0, 0.1), (0.1, 0.25), (0.25, 0.5), (0.5, 0.75), (0.75, 1.0)] descript_discriminator.DescriptDiscriminator.fft_sizes = [2048, 1024, 512] descript_discriminator.DescriptDiscriminator.periods = [2, 3, 5, 7, 11] descript_discriminator.DescriptDiscriminator.rates = [] descript_discriminator.DescriptDiscriminator.sample_rate = 44100 # Parameters for variational/blocks.EncoderV2: # ============================================================================== variational/blocks.EncoderV2.activation = %ACTIVATION variational/blocks.EncoderV2.adain = @blocks.AdaptiveInstanceNormalization variational/blocks.EncoderV2.capacity = %CAPACITY variational/blocks.EncoderV2.data_size = %N_BAND variational/blocks.EncoderV2.dilations = %DILATIONS variational/blocks.EncoderV2.keep_dim = False variational/blocks.EncoderV2.kernel_size = %KERNEL_SIZE variational/blocks.EncoderV2.latent_size = %LATENT_SIZE variational/blocks.EncoderV2.n_out = 2 variational/blocks.EncoderV2.ratios = %RATIOS variational/blocks.EncoderV2.recurrent_layer = None variational/blocks.EncoderV2.spectrogram = None # Parameters for blocks.GeneratorV2: # ============================================================================== blocks.GeneratorV2.activation = %ACTIVATION blocks.GeneratorV2.adain = @blocks.AdaptiveInstanceNormalization blocks.GeneratorV2.amplitude_modulation = True blocks.GeneratorV2.capacity = %CAPACITY blocks.GeneratorV2.causal_convtranspose = False blocks.GeneratorV2.data_size = %N_BAND blocks.GeneratorV2.dilations = %DILATIONS blocks.GeneratorV2.keep_dim = False blocks.GeneratorV2.kernel_size = %KERNEL_SIZE blocks.GeneratorV2.latent_size = @core.get_augmented_latent_size() blocks.GeneratorV2.noise_module = None blocks.GeneratorV2.ratios = %RATIOS blocks.GeneratorV2.recurrent_layer = None # Parameters for core.get_augmented_latent_size: # ============================================================================== core.get_augmented_latent_size.latent_size = %LATENT_SIZE core.get_augmented_latent_size.noise_augmentation = %NOISE_AUGMENTATION # Parameters for convs.get_padding: # ============================================================================== convs.get_padding.dilation = 1 convs.get_padding.mode = 'causal' convs.get_padding.stride = 1 # Parameters for variational/convs.get_padding: # ============================================================================== variational/convs.get_padding.dilation = 1 variational/convs.get_padding.mode = 'causal' variational/convs.get_padding.stride = 1 # Parameters for core.MultiScaleSTFT: # ============================================================================== core.MultiScaleSTFT.magnitude = True core.MultiScaleSTFT.normalized = False core.MultiScaleSTFT.num_mels = None core.MultiScaleSTFT.random_crop = True core.MultiScaleSTFT.sample_rate = %SAMPLING_RATE core.MultiScaleSTFT.scales = [2048, 1024, 512, 256, 128] # Parameters for blocks.normalization: # ============================================================================== blocks.normalization.mode = 'weight_norm' # Parameters for variational/blocks.normalization: # ============================================================================== variational/blocks.normalization.mode = 'weight_norm' # Parameters for model.RAVE: # ============================================================================== model.RAVE.audio_distance = @core.AudioDistanceV1 model.RAVE.decoder = @blocks.GeneratorV2 model.RAVE.discriminator = @descript_discriminator.DescriptDiscriminator model.RAVE.enable_pqmf_decode = True model.RAVE.enable_pqmf_encode = True model.RAVE.encoder = @blocks.VariationalEncoder model.RAVE.feature_matching_fun = @feature_matching/core.mean_difference model.RAVE.freeze_encoder = False model.RAVE.gan_loss = @core.hinge_gan model.RAVE.latent_size = %LATENT_SIZE model.RAVE.multiband_audio_distance = @core.AudioDistanceV1 model.RAVE.num_skipped_features = 1 model.RAVE.phase_1_duration = %PHASE_1_DURATION model.RAVE.pqmf = @pqmf.CachedPQMF model.RAVE.sampling_rate = %SAMPLING_RATE model.RAVE.update_discriminator_every = 4 model.RAVE.valid_signal_crop = True model.RAVE.warmup_quantize = None model.RAVE.weights = {'feature_matching': 20} # Parameters for blocks.Snake: # ============================================================================== # None. # Parameters for variational/blocks.Snake: # ============================================================================== # None. # Parameters for dataset.split_dataset: # ============================================================================== dataset.split_dataset.max_residual = 1000 # Parameters for blocks.VariationalEncoder: # ============================================================================== blocks.VariationalEncoder.encoder = @variational/blocks.EncoderV2