diff --git a/bins/svc/inference.py b/bins/svc/inference.py index da0031d1..dbe4fc71 100644 --- a/bins/svc/inference.py +++ b/bins/svc/inference.py @@ -50,7 +50,7 @@ def prepare_for_audio_file(args, cfg, num_workers=1): acoustic_extractor.extract_utt_acoustic_features_serial( metadata, temp_audio_dir, cfg ) - if cfg.preprocess.use_min_max_norm_mel == True: + if cfg.preprocess.use_min_max_norm_mel is True: acoustic_extractor.cal_mel_min_max( dataset=audio_name, output_path=preprocess_path, cfg=cfg, metadata=metadata ) diff --git a/bins/svc/preprocess.py b/bins/svc/preprocess.py index 453b5001..1c2080b0 100644 --- a/bins/svc/preprocess.py +++ b/bins/svc/preprocess.py @@ -101,7 +101,7 @@ def preprocess(cfg, args): new_datasets_list.extend(new_datasets) cfg.dataset.extend(new_datasets_list) print("Augmentation datasets: ", cfg.dataset) - except: + except Exception: # TODO: better exception handling print("No Data Augmentation.") # Dump metadata of datasets (singers, train/test durations, etc.) @@ -145,7 +145,7 @@ def preprocess(cfg, args): continue dataset_dir = os.path.join(output_path, dataset) metadata = [] - for split in ["train", "test"] if not "eval" in dataset else ["test"]: + for split in ["train", "test"] if "eval" not in dataset else ["test"]: metadata_file_path = os.path.join(src_dataset_dir, "{}.json".format(split)) with open(metadata_file_path, "r") as f: metadata.extend(json.load(f)) diff --git a/bins/svc/train.py b/bins/svc/train.py index 0c20d5b4..a581ba97 100644 --- a/bins/svc/train.py +++ b/bins/svc/train.py @@ -80,7 +80,7 @@ def main(): # Data Augmentation if ( - type(cfg.preprocess.data_augment) == list + isinstance(cfg.preprocess.data_augment, list) and len(cfg.preprocess.data_augment) > 0 ): new_datasets_list = [] diff --git a/bins/tta/preprocess.py b/bins/tta/preprocess.py index 58c73bf7..34627a6f 100644 --- a/bins/tta/preprocess.py +++ b/bins/tta/preprocess.py @@ -108,7 +108,7 @@ def preprocess(cfg, args): new_datasets_list.extend(new_datasets) cfg.dataset.extend(new_datasets_list) print("Augmentation datasets: ", cfg.dataset) - except: + except Exception: # TODO: better exception handling print("No Data Augmentation.") # Dump metadata of datasets (singers, train/test durations, etc.) @@ -157,7 +157,7 @@ def preprocess(cfg, args): continue dataset_dir = os.path.join(output_path, dataset) metadata = [] - for split in ["train", "test"] if not "eval" in dataset else ["test"]: + for split in ["train", "test"] if "eval" not in dataset else ["test"]: metadata_file_path = os.path.join(src_dataset_dir, "{}.json".format(split)) with open(metadata_file_path, "r") as f: metadata.extend(json.load(f)) diff --git a/bins/tts/preprocess.py b/bins/tts/preprocess.py index 914c0b44..5d793c8e 100644 --- a/bins/tts/preprocess.py +++ b/bins/tts/preprocess.py @@ -134,7 +134,7 @@ def preprocess(cfg, args): new_datasets_list.extend(new_datasets) cfg.dataset.extend(new_datasets_list) print("Augmentation datasets: ", cfg.dataset) - except: + except Exception: # TODO: better exception handling print("No Data Augmentation.") # json files @@ -198,7 +198,7 @@ def preprocess(cfg, args): continue dataset_dir = os.path.join(output_path, dataset) metadata = [] - for split in ["train", "test"] if not "eval" in dataset else ["test"]: + for split in ["train", "test"] if "eval" not in dataset else ["test"]: metadata_file_path = os.path.join(src_dataset_dir, "{}.json".format(split)) with open(metadata_file_path, "r") as f: metadata.extend(json.load(f)) diff --git a/bins/tts/train.py b/bins/tts/train.py index 241ee933..f3e9839b 100644 --- a/bins/tts/train.py +++ b/bins/tts/train.py @@ -79,7 +79,7 @@ def main(): # Data Augmentation if ( - type(cfg.preprocess.data_augment) == list + isinstance(cfg.preprocess.data_augment, list) and len(cfg.preprocess.data_augment) > 0 ): new_datasets_list = [] diff --git a/bins/vocoder/preprocess.py b/bins/vocoder/preprocess.py index 23f756dd..132fc874 100644 --- a/bins/vocoder/preprocess.py +++ b/bins/vocoder/preprocess.py @@ -78,7 +78,7 @@ def preprocess(cfg, args): new_datasets_list.extend(new_datasets) cfg.dataset.extend(new_datasets_list) print("Augmentation datasets: ", cfg.dataset) - except: + except Exception: # TODO: better exception handling print("No Data Augmentation.") # Dump metadata of datasets (singers, train/test durations, etc.) @@ -119,7 +119,7 @@ def preprocess(cfg, args): continue dataset_dir = os.path.join(output_path, dataset) metadata = [] - for split in ["train", "test"] if not "eval" in dataset else ["test"]: + for split in ["train", "test"] if "eval" not in dataset else ["test"]: metadata_file_path = os.path.join(src_dataset_dir, "{}.json".format(split)) with open(metadata_file_path, "r") as f: metadata.extend(json.load(f)) diff --git a/evaluation/features/long_term_average_spectrum.py b/evaluation/features/long_term_average_spectrum.py index b6724eec..e8bf65b4 100644 --- a/evaluation/features/long_term_average_spectrum.py +++ b/evaluation/features/long_term_average_spectrum.py @@ -9,7 +9,7 @@ def extract_ltas(audio, fs=None, n_fft=1024, hop_length=256): """Extract Long-Term Average Spectrum for a given audio.""" - if fs != None: + if fs is not None: y, _ = librosa.load(audio, sr=fs) else: y, fs = librosa.load(audio) diff --git a/evaluation/features/signal_to_noise_ratio.py b/evaluation/features/signal_to_noise_ratio.py index 42abf0e1..a9ff64d1 100644 --- a/evaluation/features/signal_to_noise_ratio.py +++ b/evaluation/features/signal_to_noise_ratio.py @@ -79,7 +79,7 @@ def getHarmonics(fund, sr, nHarmonics=6, aliased=False): def extract_snr(audio, sr=None): """Extract Signal-to-Noise Ratio for a given audio.""" - if sr != None: + if sr is not None: audio, _ = librosa.load(audio, sr=sr) else: audio, sr = librosa.load(audio, sr=sr) diff --git a/evaluation/features/singing_power_ratio.py b/evaluation/features/singing_power_ratio.py index 60051016..c86a42d2 100644 --- a/evaluation/features/singing_power_ratio.py +++ b/evaluation/features/singing_power_ratio.py @@ -37,7 +37,7 @@ def extract_spr( pitch_min: lower limit for f0 quantization. """ # Load audio - if fs != None: + if fs is not None: audio, _ = librosa.load(audio, sr=fs) else: audio, fs = librosa.load(audio) diff --git a/evaluation/metrics/energy/energy_pearson_coefficients.py b/evaluation/metrics/energy/energy_pearson_coefficients.py index 55df77e9..b0596604 100644 --- a/evaluation/metrics/energy/energy_pearson_coefficients.py +++ b/evaluation/metrics/energy/energy_pearson_coefficients.py @@ -42,7 +42,7 @@ def extract_energy_pearson_coeffcients( pearson = PearsonCorrCoef() # Load audio - if fs != None: + if fs is not None: audio_ref, _ = librosa.load(audio_ref, sr=fs) audio_deg, _ = librosa.load(audio_deg, sr=fs) else: diff --git a/evaluation/metrics/energy/energy_rmse.py b/evaluation/metrics/energy/energy_rmse.py index 92e1f67e..263489e9 100644 --- a/evaluation/metrics/energy/energy_rmse.py +++ b/evaluation/metrics/energy/energy_rmse.py @@ -37,7 +37,7 @@ def extract_energy_rmse( db_scale = kwargs["db_scale"] # Load audio - if fs != None: + if fs is not None: audio_ref, _ = librosa.load(audio_ref, sr=fs) audio_deg, _ = librosa.load(audio_deg, sr=fs) else: diff --git a/evaluation/metrics/f0/f0_pearson_coefficients.py b/evaluation/metrics/f0/f0_pearson_coefficients.py index 6ab3c065..c5a97816 100644 --- a/evaluation/metrics/f0/f0_pearson_coefficients.py +++ b/evaluation/metrics/f0/f0_pearson_coefficients.py @@ -46,7 +46,7 @@ def extract_fpc( pearson = PearsonCorrCoef() # Load audio - if fs != None: + if fs is not None: audio_ref, _ = librosa.load(audio_ref, sr=fs) audio_deg, _ = librosa.load(audio_deg, sr=fs) else: diff --git a/evaluation/metrics/f0/f0_periodicity_rmse.py b/evaluation/metrics/f0/f0_periodicity_rmse.py index 3f1db492..1ae7829c 100644 --- a/evaluation/metrics/f0/f0_periodicity_rmse.py +++ b/evaluation/metrics/f0/f0_periodicity_rmse.py @@ -31,7 +31,7 @@ def extract_f0_periodicity_rmse( method = kwargs["method"] # Load audio - if fs != None: + if fs is not None: audio_ref, _ = librosa.load(audio_ref, sr=fs) audio_deg, _ = librosa.load(audio_deg, sr=fs) else: diff --git a/evaluation/metrics/f0/f0_rmse.py b/evaluation/metrics/f0/f0_rmse.py index 337e9ae3..a89c6377 100644 --- a/evaluation/metrics/f0/f0_rmse.py +++ b/evaluation/metrics/f0/f0_rmse.py @@ -45,7 +45,7 @@ def extract_f0rmse( need_mean = kwargs["need_mean"] # Load audio - if fs != None: + if fs is not None: audio_ref, _ = librosa.load(audio_ref, sr=fs) audio_deg, _ = librosa.load(audio_deg, sr=fs) else: diff --git a/evaluation/metrics/f0/v_uv_f1.py b/evaluation/metrics/f0/v_uv_f1.py index c81c0c84..2087993a 100644 --- a/evaluation/metrics/f0/v_uv_f1.py +++ b/evaluation/metrics/f0/v_uv_f1.py @@ -44,7 +44,7 @@ def extract_f1_v_uv( method = kwargs["method"] # Load audio - if fs != None: + if fs is not None: audio_ref, _ = librosa.load(audio_ref, sr=fs) audio_deg, _ = librosa.load(audio_deg, sr=fs) else: diff --git a/evaluation/metrics/similarity/speaker_similarity.py b/evaluation/metrics/similarity/speaker_similarity.py index ce90779f..535c0d21 100644 --- a/evaluation/metrics/similarity/speaker_similarity.py +++ b/evaluation/metrics/similarity/speaker_similarity.py @@ -108,7 +108,7 @@ def extract_similarity(path_ref, path_deg, **kwargs): "microsoft/wavlm-base-plus-sv" ) model = WavLMForXVector.from_pretrained("microsoft/wavlm-base-plus-sv") - except: + except Exception: # TODO: better exception handling feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained( "pretrained/wavlm", sampling_rate=16000 ) diff --git a/evaluation/metrics/spectrogram/mel_cepstral_distortion.py b/evaluation/metrics/spectrogram/mel_cepstral_distortion.py index d4e4825f..7b476dc7 100644 --- a/evaluation/metrics/spectrogram/mel_cepstral_distortion.py +++ b/evaluation/metrics/spectrogram/mel_cepstral_distortion.py @@ -17,7 +17,7 @@ def extract_mcd(audio_ref, audio_deg, **kwargs): fs = kwargs["fs"] mcd_toolbox = Calculate_MCD(MCD_mode="dtw_sl") - if fs != None: + if fs is not None: mcd_toolbox.SAMPLING_RATE = fs mcd_value = mcd_toolbox.calculate_mcd(audio_ref, audio_deg) diff --git a/evaluation/metrics/spectrogram/multi_resolution_stft_distance.py b/evaluation/metrics/spectrogram/multi_resolution_stft_distance.py index 2cbece73..1107d10e 100644 --- a/evaluation/metrics/spectrogram/multi_resolution_stft_distance.py +++ b/evaluation/metrics/spectrogram/multi_resolution_stft_distance.py @@ -29,7 +29,7 @@ def extract_mstft( method = kwargs["method"] # Load audio - if fs != None: + if fs is not None: audio_ref, _ = librosa.load(audio_ref, sr=fs) audio_deg, _ = librosa.load(audio_deg, sr=fs) else: diff --git a/evaluation/metrics/spectrogram/pesq.py b/evaluation/metrics/spectrogram/pesq.py index 5c71d16a..1d4738e5 100644 --- a/evaluation/metrics/spectrogram/pesq.py +++ b/evaluation/metrics/spectrogram/pesq.py @@ -24,7 +24,7 @@ def extract_pesq(audio_ref, audio_deg, **kwargs): method = kwargs["method"] # Load audio - if fs != None: + if fs is not None: audio_ref, _ = librosa.load(audio_ref, sr=fs) audio_deg, _ = librosa.load(audio_deg, sr=fs) else: diff --git a/evaluation/metrics/spectrogram/scale_invariant_signal_to_distortion_ratio.py b/evaluation/metrics/spectrogram/scale_invariant_signal_to_distortion_ratio.py index 3a16f8c2..f24a6308 100644 --- a/evaluation/metrics/spectrogram/scale_invariant_signal_to_distortion_ratio.py +++ b/evaluation/metrics/spectrogram/scale_invariant_signal_to_distortion_ratio.py @@ -19,7 +19,7 @@ def extract_si_sdr(audio_ref, audio_deg, **kwargs): si_sdr = ScaleInvariantSignalDistortionRatio() - if fs != None: + if fs is not None: audio_ref, _ = librosa.load(audio_ref, sr=fs) audio_deg, _ = librosa.load(audio_deg, sr=fs) else: diff --git a/evaluation/metrics/spectrogram/scale_invariant_signal_to_noise_ratio.py b/evaluation/metrics/spectrogram/scale_invariant_signal_to_noise_ratio.py index 2748021e..90416360 100644 --- a/evaluation/metrics/spectrogram/scale_invariant_signal_to_noise_ratio.py +++ b/evaluation/metrics/spectrogram/scale_invariant_signal_to_noise_ratio.py @@ -19,7 +19,7 @@ def extract_si_snr(audio_ref, audio_deg, **kwargs): si_snr = ScaleInvariantSignalNoiseRatio() - if fs != None: + if fs is not None: audio_ref, _ = librosa.load(audio_ref, sr=fs) audio_deg, _ = librosa.load(audio_deg, sr=fs) else: diff --git a/evaluation/metrics/spectrogram/short_time_objective_intelligibility.py b/evaluation/metrics/spectrogram/short_time_objective_intelligibility.py index e493ec43..21845640 100644 --- a/evaluation/metrics/spectrogram/short_time_objective_intelligibility.py +++ b/evaluation/metrics/spectrogram/short_time_objective_intelligibility.py @@ -25,7 +25,7 @@ def extract_stoi(audio_ref, audio_deg, **kwargs): method = kwargs["method"] # Load audio - if fs != None: + if fs is not None: audio_ref, _ = librosa.load(audio_ref, sr=fs) audio_deg, _ = librosa.load(audio_deg, sr=fs) else: diff --git a/models/base/base_inference.py b/models/base/base_inference.py index 2713f19a..54e4c845 100644 --- a/models/base/base_inference.py +++ b/models/base/base_inference.py @@ -14,7 +14,6 @@ from tqdm import tqdm from models.vocoders.vocoder_inference import synthesis -from torch.utils.data import DataLoader from utils.util import set_all_random_seed from utils.util import load_config diff --git a/models/codec/ns3_codec/quantize/rvq.py b/models/codec/ns3_codec/quantize/rvq.py index d22d88d5..241e7367 100644 --- a/models/codec/ns3_codec/quantize/rvq.py +++ b/models/codec/ns3_codec/quantize/rvq.py @@ -15,7 +15,7 @@ class ResidualVQ(nn.Module): def __init__(self, *, num_quantizers, codebook_size, **kwargs): super().__init__() VQ = FactorizedVectorQuantize - if type(codebook_size) == int: + if isinstance(codebook_size, int): codebook_size = [codebook_size] * num_quantizers self.layers = nn.ModuleList( [VQ(codebook_size=2**size, **kwargs) for size in codebook_size] diff --git a/models/codec/ns3_codec/transformer.py b/models/codec/ns3_codec/transformer.py index 146d0f36..83623555 100644 --- a/models/codec/ns3_codec/transformer.py +++ b/models/codec/ns3_codec/transformer.py @@ -129,7 +129,7 @@ def forward(self, x, key_padding_mask, conditon=None): else: x = self.ln_1(x) - if key_padding_mask != None: + if key_padding_mask is not None: key_padding_mask_input = ~(key_padding_mask.bool()) else: key_padding_mask_input = None @@ -186,7 +186,7 @@ def __init__( ) self.use_cln = use_cln if use_cln is not None else cfg.use_cln - if enc_emb_tokens != None: + if enc_emb_tokens is not None: self.use_enc_emb = True self.enc_emb_tokens = enc_emb_tokens else: diff --git a/models/svc/base/svc_dataset.py b/models/svc/base/svc_dataset.py index d8ef7d5a..ffd0bd01 100644 --- a/models/svc/base/svc_dataset.py +++ b/models/svc/base/svc_dataset.py @@ -317,7 +317,7 @@ def __init__(self, args, cfg, infer_type): target_singer = args.target_singer self.cfg = cfg self.trans_key = args.trans_key - assert type(target_singer) == str + assert isinstance(target_singer, str) self.target_singer = target_singer.split("_")[-1] self.target_dataset = target_singer.replace( @@ -481,9 +481,9 @@ def __getitem__(self, index): if self.trans_key: try: self.trans_key = int(self.trans_key) - except: + except Exception: # TODO: better exception handling pass - if type(self.trans_key) == int: + if isinstance(self.trans_key, int): frame_pitch = transpose_key(frame_pitch, self.trans_key) elif self.trans_key: assert self.target_singer diff --git a/models/tta/autoencoder/autoencoder_dataset.py b/models/tta/autoencoder/autoencoder_dataset.py index 8b9b4bdf..539eb486 100644 --- a/models/tta/autoencoder/autoencoder_dataset.py +++ b/models/tta/autoencoder/autoencoder_dataset.py @@ -77,9 +77,6 @@ def __getitem__(self, index): def __len__(self): return len(self.metadata) - def __len__(self): - return len(self.metadata) - class AutoencoderKLCollator(BaseOfflineCollator): def __init__(self, cfg): diff --git a/models/tta/autoencoder/autoencoder_trainer.py b/models/tta/autoencoder/autoencoder_trainer.py index 1faf02fa..862003c0 100644 --- a/models/tta/autoencoder/autoencoder_trainer.py +++ b/models/tta/autoencoder/autoencoder_trainer.py @@ -91,7 +91,7 @@ def build_criterion(self): return AutoencoderLossWithDiscriminator(self.cfg.model.loss) def get_state_dict(self): - if self.scheduler != None: + if self.scheduler is not None: state_dict = { "model": self.model.state_dict(), "optimizer_ae": self.optimizer["opt_ae"].state_dict(), @@ -119,7 +119,7 @@ def load_model(self, checkpoint): self.model.load_state_dict(checkpoint["model"]) self.optimizer["opt_ae"].load_state_dict(checkpoint["optimizer_ae"]) self.optimizer["opt_disc"].load_state_dict(checkpoint["optimizer_disc"]) - if self.scheduler != None: + if self.scheduler is not None: self.scheduler.load_state_dict(checkpoint["scheduler"]) def build_model(self): diff --git a/models/tta/ldm/audioldm_trainer.py b/models/tta/ldm/audioldm_trainer.py index bd4a2418..6b67f871 100644 --- a/models/tta/ldm/audioldm_trainer.py +++ b/models/tta/ldm/audioldm_trainer.py @@ -15,7 +15,6 @@ from torch.utils.data import ConcatDataset, DataLoader from transformers import T5EncoderModel -from diffusers import DDPMScheduler class AudioLDMTrainer(BaseTrainer): @@ -122,7 +121,7 @@ def build_criterion(self): return criterion def get_state_dict(self): - if self.scheduler != None: + if self.scheduler is not None: state_dict = { "model": self.model.state_dict(), "optimizer": self.optimizer.state_dict(), @@ -147,7 +146,7 @@ def load_model(self, checkpoint): self.model.load_state_dict(checkpoint["model"]) self.optimizer.load_state_dict(checkpoint["optimizer"]) - if self.scheduler != None: + if self.scheduler is not None: self.scheduler.load_state_dict(checkpoint["scheduler"]) def build_model(self): diff --git a/models/tta/ldm/inference_utils/vocoder.py b/models/tta/ldm/inference_utils/vocoder.py index 19e17c1e..2e07e65a 100644 --- a/models/tta/ldm/inference_utils/vocoder.py +++ b/models/tta/ldm/inference_utils/vocoder.py @@ -216,7 +216,7 @@ class DiscriminatorP(torch.nn.Module): def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False): super(DiscriminatorP, self).__init__() self.period = period - norm_f = weight_norm if use_spectral_norm == False else spectral_norm + norm_f = weight_norm if use_spectral_norm is False else spectral_norm self.convs = nn.ModuleList( [ norm_f( @@ -314,7 +314,7 @@ def forward(self, y, y_hat): class DiscriminatorS(torch.nn.Module): def __init__(self, use_spectral_norm=False): super(DiscriminatorS, self).__init__() - norm_f = weight_norm if use_spectral_norm == False else spectral_norm + norm_f = weight_norm if use_spectral_norm is False else spectral_norm self.convs = nn.ModuleList( [ norm_f(Conv1d(1, 128, 15, 1, padding=7)), diff --git a/models/tts/base/tts_inferece.py b/models/tts/base/tts_inferece.py index f49ace0f..9d7f5d2c 100644 --- a/models/tts/base/tts_inferece.py +++ b/models/tts/base/tts_inferece.py @@ -158,7 +158,7 @@ def _load_model( assert checkpoint_dir is not None # Load the latest accelerator state dicts ls = [ - str(i) for i in Path(checkpoint_dir).glob("*") if not "audio" in str(i) + str(i) for i in Path(checkpoint_dir).glob("*") if "audio" not in str(i) ] ls.sort(key=lambda x: int(x.split("_")[-3].split("-")[-1]), reverse=True) checkpoint_path = ls[0] diff --git a/models/tts/base/tts_trainer.py b/models/tts/base/tts_trainer.py index c565ab2f..aa669ece 100644 --- a/models/tts/base/tts_trainer.py +++ b/models/tts/base/tts_trainer.py @@ -9,7 +9,6 @@ import torch import time from pathlib import Path -import torch from tqdm import tqdm import re import logging @@ -175,7 +174,7 @@ def _check_resume(self): self.args.resume_type = "finetune" checkpoint_dir = self.args.ar_model_ckpt_dir self.logger.info( - f"Training NAR model at stage 2 using the checkpoint of AR model at stage 1." + "Training NAR model at stage 2 using the checkpoint of AR model at stage 1." ) self.logger.info(f"Resuming from checkpoint: {checkpoint_dir}") diff --git a/models/tts/naturalspeech2/ns2_dataset.py b/models/tts/naturalspeech2/ns2_dataset.py index df10f3fa..99a02e69 100644 --- a/models/tts/naturalspeech2/ns2_dataset.py +++ b/models/tts/naturalspeech2/ns2_dataset.py @@ -36,7 +36,7 @@ def __init__(self, cfg, dataset, is_valid=False): self.cfg = cfg - assert cfg.preprocess.use_mel == False + assert cfg.preprocess.use_mel is False if cfg.preprocess.use_mel: self.utt2melspec_path = {} for utt_info in self.metadata: @@ -52,7 +52,7 @@ def __init__(self, cfg, dataset, is_valid=False): uid + ".npy", ) - assert cfg.preprocess.use_code == True + assert cfg.preprocess.use_code is True if cfg.preprocess.use_code: self.utt2code_path = {} for utt_info in self.metadata: @@ -68,7 +68,7 @@ def __init__(self, cfg, dataset, is_valid=False): uid + ".npy", ) - assert cfg.preprocess.use_spkid == True + assert cfg.preprocess.use_spkid is True if cfg.preprocess.use_spkid: self.utt2spkid = {} for utt_info in self.metadata: @@ -78,7 +78,7 @@ def __init__(self, cfg, dataset, is_valid=False): self.utt2spkid[utt] = utt_info["speaker"] - assert cfg.preprocess.use_pitch == True + assert cfg.preprocess.use_pitch is True if cfg.preprocess.use_pitch: self.utt2pitch_path = {} for utt_info in self.metadata: @@ -94,7 +94,7 @@ def __init__(self, cfg, dataset, is_valid=False): uid + ".npy", ) - assert cfg.preprocess.use_duration == True + assert cfg.preprocess.use_duration is True if cfg.preprocess.use_duration: self.utt2duration_path = {} for utt_info in self.metadata: @@ -110,7 +110,7 @@ def __init__(self, cfg, dataset, is_valid=False): uid + ".npy", ) - assert cfg.preprocess.use_phone == True + assert cfg.preprocess.use_phone is True if cfg.preprocess.use_phone: self.utt2phone = {} for utt_info in self.metadata: @@ -120,7 +120,7 @@ def __init__(self, cfg, dataset, is_valid=False): self.utt2phone[utt] = utt_info["phones"] - assert cfg.preprocess.use_len == True + assert cfg.preprocess.use_len is True if cfg.preprocess.use_len: self.utt2len = {} for utt_info in self.metadata: diff --git a/models/tts/naturalspeech2/wavenet.py b/models/tts/naturalspeech2/wavenet.py index bc964ea6..70215a0a 100644 --- a/models/tts/naturalspeech2/wavenet.py +++ b/models/tts/naturalspeech2/wavenet.py @@ -98,7 +98,7 @@ def forward(self, x, x_mask, cond, diffusion_step, spk_query_emb): cond = self.cond_proj(cond) # (B, 2*d, T) y = x + diffusion_step - if x_mask != None: + if x_mask is not None: y = y * x_mask.to(y.dtype)[:, None, :] # (B, 2*d, T) if self.has_cattn: @@ -120,7 +120,7 @@ def forward(self, x, x_mask, cond, diffusion_step, spk_query_emb): residual, skip = torch.chunk(y, 2, dim=1) - if x_mask != None: + if x_mask is not None: residual = residual * x_mask.to(y.dtype)[:, None, :] skip = skip * x_mask.to(y.dtype)[:, None, :] diff --git a/models/tts/valle/valle_dataset.py b/models/tts/valle/valle_dataset.py index 6e605508..daebf244 100644 --- a/models/tts/valle/valle_dataset.py +++ b/models/tts/valle/valle_dataset.py @@ -35,7 +35,7 @@ def __init__(self, cfg, dataset, is_valid=False): assert isinstance(dataset, str) - assert cfg.preprocess.use_acoustic_token == True + assert cfg.preprocess.use_acoustic_token is True if cfg.preprocess.use_acoustic_token: self.utt2acousticToken_path = {} for utt_info in self.metadata: @@ -121,7 +121,7 @@ def __init__(self, args, cfg): super().__init__(args, cfg) # prepare data - assert cfg.preprocess.use_acoustic_token == True + assert cfg.preprocess.use_acoustic_token is True if cfg.preprocess.use_acoustic_token: self.utt2acousticToken = {} for utt_info in self.metadata: diff --git a/models/vocoders/diffusion/diffusion_vocoder_inference.py b/models/vocoders/diffusion/diffusion_vocoder_inference.py index f9a1afbc..3131e75a 100644 --- a/models/vocoders/diffusion/diffusion_vocoder_inference.py +++ b/models/vocoders/diffusion/diffusion_vocoder_inference.py @@ -87,10 +87,10 @@ def synthesis_audios(cfg, model, mels, f0s=None, batch_size=None, fast_inference # Pad the given list into tensors mel_batches, mel_frames = pad_mels_to_tensors(mels, batch_size) - if f0s != None: + if f0s is not None: f0_batches = pad_f0_to_tensors(f0s, batch_size) - if f0s == None: + if f0s is None: for mel_batch, mel_frame in zip(mel_batches, mel_frames): for i in range(mel_batch.shape[0]): mel = mel_batch[i] diff --git a/models/vocoders/dsp/world/world.py b/models/vocoders/dsp/world/world.py index 59f28e8e..92d2595c 100644 --- a/models/vocoders/dsp/world/world.py +++ b/models/vocoders/dsp/world/world.py @@ -15,7 +15,6 @@ import pickle import json import re -import torchaudio from cuhkszsvc.configs.config_parse import get_wav_path, get_wav_file_path from utils.io import has_existed diff --git a/models/vocoders/gan/discriminator/mpd.py b/models/vocoders/gan/discriminator/mpd.py index f28711d1..6bbe9d71 100644 --- a/models/vocoders/gan/discriminator/mpd.py +++ b/models/vocoders/gan/discriminator/mpd.py @@ -8,7 +8,6 @@ import torch.nn as nn from torch.nn import Conv2d, Conv1d from torch.nn.utils import weight_norm, spectral_norm -from torch import nn from modules.vocoder_blocks import * LRELU_SLOPE = 0.1 @@ -19,7 +18,7 @@ def __init__(self, cfg, period, kernel_size=5, stride=3, use_spectral_norm=False super(DiscriminatorP, self).__init__() self.period = period self.d_mult = cfg.model.mpd.discriminator_channel_mult_factor - norm_f = weight_norm if use_spectral_norm == False else spectral_norm + norm_f = weight_norm if use_spectral_norm is False else spectral_norm self.convs = nn.ModuleList( [ norm_f( @@ -130,7 +129,7 @@ def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False): super(DiscriminatorP_vits, self).__init__() self.period = period self.use_spectral_norm = use_spectral_norm - norm_f = weight_norm if use_spectral_norm == False else spectral_norm + norm_f = weight_norm if use_spectral_norm is False else spectral_norm self.convs = nn.ModuleList( [ norm_f( @@ -207,7 +206,7 @@ def forward(self, x): class DiscriminatorS(torch.nn.Module): def __init__(self, use_spectral_norm=False): super(DiscriminatorS, self).__init__() - norm_f = weight_norm if use_spectral_norm == False else spectral_norm + norm_f = weight_norm if use_spectral_norm is False else spectral_norm self.convs = nn.ModuleList( [ norm_f(Conv1d(1, 16, 15, 1, padding=7)), diff --git a/models/vocoders/gan/discriminator/mrd.py b/models/vocoders/gan/discriminator/mrd.py index 38ee80bf..6c5a1cc4 100644 --- a/models/vocoders/gan/discriminator/mrd.py +++ b/models/vocoders/gan/discriminator/mrd.py @@ -7,7 +7,6 @@ import torch.nn.functional as F import torch.nn as nn from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm -from torch import nn LRELU_SLOPE = 0.1 @@ -27,7 +26,7 @@ def __init__(self, cfg, resolution): self.lrelu_slope = LRELU_SLOPE norm_f = ( - weight_norm if cfg.model.mrd.use_spectral_norm == False else spectral_norm + weight_norm if cfg.model.mrd.use_spectral_norm is False else spectral_norm ) if cfg.model.mrd.mrd_override: print( @@ -37,7 +36,7 @@ def __init__(self, cfg, resolution): ) norm_f = ( weight_norm - if cfg.model.mrd.mrd_use_spectral_norm == False + if cfg.model.mrd.mrd_use_spectral_norm is False else spectral_norm ) self.d_mult = cfg.model.mrd.discriminator_channel_mult_factor diff --git a/models/vocoders/gan/discriminator/msd.py b/models/vocoders/gan/discriminator/msd.py index 4c1556ae..6e28984a 100644 --- a/models/vocoders/gan/discriminator/msd.py +++ b/models/vocoders/gan/discriminator/msd.py @@ -8,7 +8,6 @@ import torch.nn as nn from torch.nn import Conv1d, AvgPool1d from torch.nn.utils import weight_norm, spectral_norm -from torch import nn from modules.vocoder_blocks import * @@ -19,7 +18,7 @@ class DiscriminatorS(nn.Module): def __init__(self, use_spectral_norm=False): super(DiscriminatorS, self).__init__() - norm_f = weight_norm if use_spectral_norm == False else spectral_norm + norm_f = weight_norm if use_spectral_norm is False else spectral_norm self.convs = nn.ModuleList( [ diff --git a/models/vocoders/gan/discriminator/mssbcqtd.py b/models/vocoders/gan/discriminator/mssbcqtd.py index 213de544..b7eb8bd2 100644 --- a/models/vocoders/gan/discriminator/mssbcqtd.py +++ b/models/vocoders/gan/discriminator/mssbcqtd.py @@ -6,7 +6,6 @@ import torch import torch.nn.functional as F import torch.nn as nn -from torch import nn from modules.vocoder_blocks import * from einops import rearrange diff --git a/models/vocoders/gan/gan_vocoder_inference.py b/models/vocoders/gan/gan_vocoder_inference.py index 4354d5b5..5395a670 100644 --- a/models/vocoders/gan/gan_vocoder_inference.py +++ b/models/vocoders/gan/gan_vocoder_inference.py @@ -19,10 +19,10 @@ def vocoder_inference(cfg, model, mels, f0s=None, device=None, fast_inference=Fa with torch.no_grad(): mels = mels.to(device) - if f0s != None: + if f0s is not None: f0s = f0s.to(device) - if f0s == None and not cfg.preprocess.extract_amplitude_phase: + if f0s is None and not cfg.preprocess.extract_amplitude_phase: output = model.forward(mels) elif cfg.preprocess.extract_amplitude_phase: ( @@ -52,10 +52,10 @@ def synthesis_audios(cfg, model, mels, f0s=None, batch_size=None, fast_inference # Pad the given list into tensors mel_batches, mel_frames = pad_mels_to_tensors(mels, batch_size) - if f0s != None: + if f0s is not None: f0_batches = pad_f0_to_tensors(f0s, batch_size) - if f0s == None: + if f0s is None: for mel_batch, mel_frame in zip(mel_batches, mel_frames): for i in range(mel_batch.shape[0]): mel = mel_batch[i] diff --git a/models/vocoders/gan/generator/melgan.py b/models/vocoders/gan/generator/melgan.py index d13c5fe6..b6ecde2f 100644 --- a/models/vocoders/gan/generator/melgan.py +++ b/models/vocoders/gan/generator/melgan.py @@ -3,9 +3,6 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -import torch -import torch.nn as nn -import torch.nn.functional as F import torch import torch.nn as nn import torch.nn.functional as F diff --git a/models/vocoders/vocoder_inference.py b/models/vocoders/vocoder_inference.py index 95e354c5..e160b6a2 100644 --- a/models/vocoders/vocoder_inference.py +++ b/models/vocoders/vocoder_inference.py @@ -281,7 +281,7 @@ def _load_model(self, checkpoint_dir, from_multi_gpu=False): ls = [ str(i) for i in Path(checkpoint_dir).glob("*") - if not "audio" in str(i) + if "audio" not in str(i) ] ls.sort( key=lambda x: int(x.split("/")[-1].split("_")[0].split("-")[-1]), @@ -443,7 +443,7 @@ def load_nnvocoder( else: # Load from accelerator state dict weights_file = os.path.join(weights_file, "checkpoint") - ls = [str(i) for i in Path(weights_file).glob("*") if not "audio" in str(i)] + ls = [str(i) for i in Path(weights_file).glob("*") if "audio" not in str(i)] ls.sort(key=lambda x: int(x.split("_")[-3].split("-")[-1]), reverse=True) checkpoint_path = ls[0] accelerator = accelerate.Accelerator() @@ -461,7 +461,7 @@ def tensorize(data, device, n_samples): """ data: a list of numpy array """ - assert type(data) == list + assert isinstance(data, list) if n_samples: data = data[:n_samples] data = [torch.as_tensor(x, device=device) for x in data] diff --git a/modules/naturalpseech2/transformers.py b/modules/naturalpseech2/transformers.py index d094c465..2eea77fc 100644 --- a/modules/naturalpseech2/transformers.py +++ b/modules/naturalpseech2/transformers.py @@ -130,7 +130,7 @@ def forward(self, x, key_padding_mask, conditon=None): else: x = self.ln_1(x) - if key_padding_mask != None: + if key_padding_mask is not None: key_padding_mask_input = ~(key_padding_mask.bool()) else: key_padding_mask_input = None @@ -187,7 +187,7 @@ def __init__( ) self.use_cln = use_cln if use_cln is not None else cfg.use_cln - if enc_emb_tokens != None: + if enc_emb_tokens is not None: self.use_enc_emb = True self.enc_emb_tokens = enc_emb_tokens else: diff --git a/modules/wenet_extractor/efficient_conformer/encoder.py b/modules/wenet_extractor/efficient_conformer/encoder.py index 8b4a91b8..abf799b5 100755 --- a/modules/wenet_extractor/efficient_conformer/encoder.py +++ b/modules/wenet_extractor/efficient_conformer/encoder.py @@ -158,11 +158,13 @@ def __init__( # efficient conformer configs self.stride_layer_idx = ( - [stride_layer_idx] if type(stride_layer_idx) == int else stride_layer_idx + [stride_layer_idx] + if isinstance(stride_layer_idx, int) + else stride_layer_idx ) - self.stride = [stride] if type(stride) == int else stride + self.stride = [stride] if isinstance(stride, int) else stride self.group_layer_idx = ( - [group_layer_idx] if type(group_layer_idx) == int else group_layer_idx + [group_layer_idx] if isinstance(group_layer_idx, int) else group_layer_idx ) self.grouped_size = group_size # group size of every GroupedAttention layer diff --git a/modules/wenet_extractor/squeezeformer/encoder.py b/modules/wenet_extractor/squeezeformer/encoder.py index a51568bc..455c43d1 100755 --- a/modules/wenet_extractor/squeezeformer/encoder.py +++ b/modules/wenet_extractor/squeezeformer/encoder.py @@ -108,10 +108,10 @@ def __init__( super(SqueezeformerEncoder, self).__init__() self.global_cmvn = global_cmvn self.reduce_idx: Optional[Union[int, List[int]]] = ( - [reduce_idx] if type(reduce_idx) == int else reduce_idx + [reduce_idx] if isinstance(reduce_idx, int) else reduce_idx ) self.recover_idx: Optional[Union[int, List[int]]] = ( - [recover_idx] if type(recover_idx) == int else recover_idx + [recover_idx] if isinstance(recover_idx, int) else recover_idx ) self.check_ascending_list() if reduce_idx is None: diff --git a/preprocessors/bigdata.py b/preprocessors/bigdata.py index da541191..f1d47743 100644 --- a/preprocessors/bigdata.py +++ b/preprocessors/bigdata.py @@ -5,7 +5,6 @@ import os import json -import os from collections import defaultdict from tqdm import tqdm diff --git a/preprocessors/csd.py b/preprocessors/csd.py index 645a8b3d..4fd0462e 100644 --- a/preprocessors/csd.py +++ b/preprocessors/csd.py @@ -5,8 +5,6 @@ import os import json -import os -import glob from tqdm import tqdm import torchaudio import pandas as pd diff --git a/preprocessors/ljspeech.py b/preprocessors/ljspeech.py index f3c19be8..b0945955 100644 --- a/preprocessors/ljspeech.py +++ b/preprocessors/ljspeech.py @@ -98,9 +98,9 @@ def get_uid2utt(ljspeech_path, dataset, cfg): def split_dataset( lines, test_rate=0.05, valid_rate=0.05, test_size=None, valid_size=None ): - if test_size == None: + if test_size is None: test_size = int(len(lines) * test_rate) - if valid_size == None: + if valid_size is None: valid_size = int(len(lines) * valid_rate) random.shuffle(lines) diff --git a/preprocessors/opera.py b/preprocessors/opera.py index c421fbb8..1581e326 100644 --- a/preprocessors/opera.py +++ b/preprocessors/opera.py @@ -5,7 +5,6 @@ import os import json -import os from tqdm import tqdm import torchaudio from glob import glob diff --git a/processors/content_extractor.py b/processors/content_extractor.py index 34b54917..d49cf710 100644 --- a/processors/content_extractor.py +++ b/processors/content_extractor.py @@ -129,7 +129,7 @@ def offline_resolution_transformation(self, content, target_len): try: with open(err_log_dir, "r") as f: err_num = int(f.read()) - except: + except Exception: # TODO: better exception handling with open(err_log_dir, "w") as f: f.write("0") err_num = 0 @@ -154,7 +154,7 @@ def log_for_ReTrans(self, err): try: with open(err_log_dir, "r") as f: err_num = int(f.read()) - except: + except Exception: # TODO: better exception handling with open(err_log_dir, "w") as f: f.write("0") err_num = 0 @@ -249,7 +249,7 @@ def save_feature(self, utt, content_feature): content_feature (tensor): content feature of one utterance """ uid = utt["Uid"] - assert self.extractor_type != None + assert self.extractor_type is not None out_dir = os.path.join( self.cfg.preprocess.processed_dir, utt["Dataset"], self.extractor_type ) @@ -315,7 +315,7 @@ def __init__(self, cfg): super(ContentvecExtractor, self).__init__(cfg, extractor_type="contentvec") def load_model(self): - assert self.model == None + assert self.model is None # Load model ckpt_path = self.cfg.preprocess.contentvec_file print("Load Contentvec Model...") @@ -445,8 +445,8 @@ def __init__(self, cfg): self.preprocessor = None def load_model(self): - assert self.model == None - assert self.preprocessor == None + assert self.model is None + assert self.preprocessor is None print("Loading MERT Model: ...", self.cfg.preprocess.mert_model) diff --git a/processors/data_augment.py b/processors/data_augment.py index 2fc18336..e56459a5 100644 --- a/processors/data_augment.py +++ b/processors/data_augment.py @@ -278,10 +278,10 @@ def wav_manipulation( ], "aug_type must be one of formant_shift, pitch_shift, time_stretch, equalizer" assert aug_type == "None" or ( - formant_shift == False - and pitch_shift == False - and time_stretch == False - and equalizer == False + formant_shift is False + and pitch_shift is False + and time_stretch is False + and equalizer is False ), "if aug_type is specified, other argument must be False" if aug_type != "None": diff --git a/utils/HyperParams/hps.py b/utils/HyperParams/hps.py index cc6f474c..737b71c4 100644 --- a/utils/HyperParams/hps.py +++ b/utils/HyperParams/hps.py @@ -14,7 +14,7 @@ class HyperParams: def __init__(self, **kwargs): for k, v in kwargs.items(): - if type(v) == dict: + if isinstance(v, dict): v = HyperParams(**v) self[k] = v diff --git a/utils/data_utils.py b/utils/data_utils.py index 8c0bc2ff..756afd86 100644 --- a/utils/data_utils.py +++ b/utils/data_utils.py @@ -149,7 +149,7 @@ def load_frame_pitch( pitch_statistic = [] for utt_info in meta_data: utt = utt_info["Dataset"] + "_" + utt_info["Uid"] - if not utt2spk[utt] in spk2utt: + if utt2spk[utt] not in spk2utt: spk2utt[utt2spk[utt]] = [] spk2utt[utt2spk[utt]].append(utt) @@ -242,7 +242,7 @@ def load_phone_pitch( pitch_statistic = [] for utt_info in tqdm(meta_data): utt = utt_info["Dataset"] + "_" + utt_info["Uid"] - if not utt2spk[utt] in spk2utt: + if utt2spk[utt] not in spk2utt: spk2utt[utt2spk[utt]] = [] spk2utt[utt2spk[utt]].append(utt) @@ -364,7 +364,7 @@ def load_energy( energy_statistic = [] for utt_info in meta_data: utt = utt_info["Dataset"] + "_" + utt_info["Uid"] - if not utt2spk[utt] in spk2utt: + if utt2spk[utt] not in spk2utt: spk2utt[utt2spk[utt]] = [] spk2utt[utt2spk[utt]].append(utt) @@ -438,7 +438,7 @@ def load_frame_energy( energy_statistic = [] for utt_info in meta_data: utt = utt_info["Dataset"] + "_" + utt_info["Uid"] - if not utt2spk[utt] in spk2utt: + if utt2spk[utt] not in spk2utt: spk2utt[utt2spk[utt]] = [] spk2utt[utt2spk[utt]].append(utt) diff --git a/utils/f0.py b/utils/f0.py index 169b1403..dfefe1c9 100644 --- a/utils/f0.py +++ b/utils/f0.py @@ -91,7 +91,7 @@ def get_f0_features_using_pyin(audio, cfg): hop_length=cfg.hop_size, ) # Set nan to 0 - f0[voiced_flag == False] = 0 + f0[voiced_flag is False] = 0 return f0 diff --git a/utils/hubert.py b/utils/hubert.py index 84b509fb..47774d90 100644 --- a/utils/hubert.py +++ b/utils/hubert.py @@ -11,7 +11,6 @@ import numpy as np from fairseq import checkpoint_utils from tqdm import tqdm -import torch def load_hubert_model(hps): diff --git a/utils/stft.py b/utils/stft.py index bcec4c84..14b27d62 100644 --- a/utils/stft.py +++ b/utils/stft.py @@ -10,10 +10,7 @@ from librosa.util import pad_center, tiny from librosa.filters import mel as librosa_mel_fn -import torch -import numpy as np import librosa.util as librosa_util -from scipy.signal import get_window def window_sumsquare( diff --git a/utils/util.py b/utils/util.py index b7eaf1aa..e26b017f 100644 --- a/utils/util.py +++ b/utils/util.py @@ -14,13 +14,12 @@ import json5 import numpy as np -import glob from torch.nn import functional as F try: from ruamel.yaml import YAML as yaml -except: +except ImportError: from ruamel_yaml import YAML as yaml import torch @@ -62,7 +61,7 @@ def pad_f0_to_tensors(f0s, batched=None): # Initialize tensors = [] - if batched == None: + if batched is None: # Get the max frame for padding size = -1 for f0 in f0s: @@ -124,7 +123,7 @@ def pad_mels_to_tensors(mels, batched=None): mel_frames = [] # Split mel-specs into batches to avoid cuda memory exceed - if batched == None: + if batched is None: # Get the max frame for padding size = -1 for mel in mels: @@ -393,7 +392,7 @@ def override_config(base_config, new_config): dict: updated configuration dict """ for k, v in new_config.items(): - if type(v) == dict: + if isinstance(v, dict): if k not in base_config.keys(): base_config[k] = {} base_config[k] = override_config(base_config[k], v) @@ -413,7 +412,7 @@ def get_lowercase_keys_config(cfg): """ updated_cfg = dict() for k, v in cfg.items(): - if type(v) == dict: + if isinstance(v, dict): v = get_lowercase_keys_config(v) updated_cfg[k.lower()] = v return updated_cfg @@ -475,7 +474,7 @@ def save_config(save_path, cfg): class JsonHParams: def __init__(self, **kwargs): for k, v in kwargs.items(): - if type(v) == dict: + if isinstance(v, dict): v = JsonHParams(**v) self[k] = v diff --git a/utils/world.py b/utils/world.py index ce5f61bd..40d75b91 100644 --- a/utils/world.py +++ b/utils/world.py @@ -13,7 +13,6 @@ import os from tqdm import tqdm import pickle -import torchaudio def get_mcep_params(fs):