diff --git a/CMakeLists.txt b/CMakeLists.txt index f8461547..aa021eb8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required (VERSION 3.1) +cmake_minimum_required(VERSION 3.1) if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) @@ -7,13 +7,19 @@ endif() set(PROJECT_NAME PRSice) project(${PROJECT_NAME}) set(CMAKE_CXX_FLAGS "-g -Wall") - +include(CheckCXXCompilerFlag) +CHECK_CXX_COMPILER_FLAG("-std=c++17" COMPILER_SUPPORTS_CXX17) +if(COMPILER_SUPPORTS_CXX17) + message(STATUS "${COMPILER_SUPPORTS_CXX17}") +else(COMPILER_SUPPORTS_CXX17) + message(FATAL_ERROR "${COMPILER_SUPPORTS_CXX17}") +endif(COMPILER_SUPPORTS_CXX17) option(march "Use --march." OFF) if(march) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") endif() # Use C++11 -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 17) # Require (at least) it set(CMAKE_CXX_STANDARD_REQUIRED ON) # Don't use e.g. GNU extension (like -std=gnu++11) for portability diff --git a/PRSice.R b/PRSice.R index bd39171a..e9fd8c99 100755 --- a/PRSice.R +++ b/PRSice.R @@ -19,7 +19,7 @@ In_Regression <- R2 <- print.p <- R <- P <- value <- Phenotype <- Set <- PRS.R2 <- LCI <- UCI <- quant.ref <- NULL -r.version <- "2.2.12" +r.version <- "2.2.13" # Help Messages -------------------------------------- help_message <- "usage: Rscript PRSice.R [options] <-b base_file> <-t target_file> <--prsice prsice_location>\n @@ -298,7 +298,7 @@ help_message <- --keep-ambig Keep ambiguous SNPs. Only use this option\n if you are certain that the base and target\n has the same A1 and A2 alleles\n - --keep-ambig-as-is Will not flip ambiguous SNPs when they are kept.\n + --flip-ambig Force flipping of ambiguous SNPs when they are kept.\n Will also set the --keep-ambig flag\n --id-delim This parameter causes sample IDs to be parsed as\n ; the default delimiter\n @@ -541,7 +541,7 @@ option_list <- list( make_option(c("--id-delim"), type = "character"), make_option(c("--logit-perm"), action = "store_true", dest = "logit_perm"), make_option(c("--keep-ambig"), action = "store_true", dest = "keep_ambig"), - make_option(c("--keep-ambig-as-is"), action = "store_true", dest = "keep_ambig_as_is"), + make_option(c("--flip-ambig"), action = "store_true", dest = "flip_ambig"), make_option(c("--memory"), type = "character", dest="memory"), make_option(c("-o", "--out"), type = "character", default = "PRSice"), make_option(c("--perm"), type = "numeric"), @@ -691,7 +691,7 @@ flags <- "ignore-fid", "index", "keep-ambig", - "keep-ambig-as-is", + "flip-ambig", "logit-perm", "no-clump", "no-default", diff --git a/Release/packing.sh b/Release/packing.sh index ceb4ac1e..22887812 100644 --- a/Release/packing.sh +++ b/Release/packing.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +rm *.zip zip PRSice_linux.zip PRSice.R TOY* PRSice_linux zip PRSice_mac.zip PRSice.R TOY* PRSice_mac zip PRSice_win64.zip PRSice.R TOY* PRSice_win64.exe diff --git a/helpers/help_messages.txt b/helpers/help_messages.txt index 9beb49df..1e8b0237 100644 --- a/helpers/help_messages.txt +++ b/helpers/help_messages.txt @@ -2,23 +2,23 @@ m_help_message = "usage: PRSice [options] <-b base_file> <-t target_file>\n" // Base file "\nBase File:\n" - " --A1 Column header containing allele 1 (effective allele)\n" + " --a1 Column header containing allele 1 (effective allele)\n" " Default: A1\n" - " --A2 Column header containing allele 2 (non-effective allele)\n" + " --a2 Column header containing allele 2 (non-effective allele)\n" " Default: A2\n" " --base | -b Base association file\n" " --base-info Base INFO score filtering. Format should be\n" - " ,. SNPs with info \n" + " :. SNPs with info \n" " score less than will be ignored\n" " Column name default: INFO\n" " Threshold default: 0.9\n" " --base-maf Base MAF filtering. Format should be\n" - " ,. SNPs with maf\n" + " :. SNPs with maf\n" " less than will be ignored. An\n" " additional column can also be added (e.g.\n" " also filter MAF for cases), using the\n" " following format:\n" - " ,:,\n" + " :,:\n" " --beta Whether the test statistic is in the form of \n" " BETA or OR. If set, test statistic is assume\n" " to be in the form of BETA. Mutually exclusive\n" @@ -259,8 +259,6 @@ m_help_message = " --keep-ambig Keep ambiguous SNPs. Only use this option\n" " if you are certain that the base and target\n" " has the same A1 and A2 alleles\n" - " --keep-ambig-as-is Will not flip ambiguous SNPs when they are kept.\n" - " Will also set the --keep-ambig flag\n" " --logit-perm When performing permutation, still use logistic\n" " regression instead of linear regression. This\n" " will substantially slow down PRSice\n" diff --git a/inc/commander.hpp b/inc/commander.hpp index 778c4802..06a42352 100644 --- a/inc/commander.hpp +++ b/inc/commander.hpp @@ -43,13 +43,13 @@ #endif const std::string version = "2.2.13"; -const std::string date = "2020-02-27"; +const std::string date = "2020-03-10"; class Commander { public: Commander(); virtual ~Commander(); - bool init(int argc, char* argv[], Reporter& reporter); + bool process_command(int argc, char* argv[], Reporter& reporter); std::string get_base_name() const { return misc::remove_extension( @@ -93,7 +93,6 @@ class Commander bool keep_ambig() const { return m_keep_ambig; } bool nonfounders() const { return m_include_nonfounders; } bool ultra_aggressive() const { return m_ultra_aggressive; } - bool ambig_no_flip() const { return m_ambig_no_flip; } protected: const std::vector supported_types = {"bed", "ped", "bgen"}; @@ -105,7 +104,6 @@ class Commander std::string m_help_message; size_t m_memory = 1e10; int m_allow_inter = false; - int m_ambig_no_flip = false; int m_include_nonfounders = false; int m_keep_ambig = false; int m_print_all_scores = false; @@ -114,7 +112,7 @@ class Commander int m_user_no_default = false; bool m_provided_memory = false; bool m_set_delim = false; - + bool m_ran_base_check = false; BaseFile m_base_info; CalculatePRS m_prs_info; Clumping m_clump_info; @@ -145,14 +143,31 @@ class Commander * \param reporter is the object to report all messages * \return true if we want to continue the program */ + + bool init(int argc, char* argv[], bool& early_termination, + Reporter& reporter); bool parse_command(int argc, char* argv[], const char* optString, - const struct option longOpts[], Reporter& reporter); - void usage(); + const struct option longOpts[], bool& early_termination, + Reporter& reporter); + void set_help_message(); bool clump_check(); bool ref_check(); bool covariate_check(); + std::unordered_set get_cov_names(); + std::tuple, + std::unordered_map> + get_covariate_header(); + size_t + find_cov_idx(const std::unordered_set& included, + const std::unordered_map& ref_index, + std::string& missing); + void reorganize_cov_name(const std::vector& cov_header); + bool + process_factor_cov(const std::unordered_set& included, + const std::unordered_map& ref_index, + const std::unordered_set ori_input); bool filter_check(); bool misc_check(); bool prset_check(); @@ -164,28 +179,16 @@ class Commander static std::vector transform_covariate(const std::string& cov_in); - + static size_t find_first_end(const std::string_view& cov, const size_t idx); + static std::vector parse_range(const std::string_view& cov); + static std::vector get_range(const std::string_view& cov, + const size_t start, const size_t end); + static void update_covariate_range(const std::vector& range, + std::vector& res); ///////////////////////////////////////////////// /// REFACTORED FUNCTIONS ///////////////////////////////////////////////// - int32_t maximum_thread() - { - int32_t max_threads = 1; -#if defined(WIN32) || defined(_WIN32) \ - || defined(__WIN32) && !defined(__CYGWIN__) - // max thread estimation using windows - SYSTEM_INFO sysinfo; - GetSystemInfo(&sysinfo); - max_threads = sysinfo.dwNumberOfProcessors; - int32_t known_procs = max_threads; -#else - int32_t known_procs = - static_cast(sysconf(_SC_NPROCESSORS_ONLN)); - max_threads = (known_procs == -1) ? 1 : known_procs; -#endif - return max_threads; - } inline void set_string(const std::string& input, const std::string& c, size_t base_index) { @@ -210,10 +213,15 @@ class Commander target = input; target_boolean = true; } + template inline bool convert_to_numeric_vector(const std::vector& token, std::vector& target) { + if (target.empty()) + target.reserve(token.size()); + else + target.reserve(target.size() + token.size()); try { for (auto&& bar : token) target.push_back(misc::convert(bar)); @@ -231,9 +239,7 @@ class Commander { // should always have an input if (input.empty()) return false; - std::string comma = ""; - if (m_parameter_log.find(c) != m_parameter_log.end()) { comma = ","; } - m_parameter_log[c].append(comma + input); + append_log(c, input); if (!input.empty() && input.back() == ',') { m_error_message.append( @@ -271,6 +277,7 @@ class Commander } throw std::runtime_error("Error: Undefined input"); } + size_t number_boolean(const std::string& input, bool& result) { size_t bool_length = 0; @@ -286,8 +293,16 @@ class Commander { if (bool_length != input.length()) { - return misc::string_to_size_t( + // if the boolean string doesn't take up the whole of the input + // string + size_t num_repeat = misc::convert( input.substr(0, input.length() - bool_length).c_str()); + if (static_cast(num_repeat) < 0) + { + throw std::runtime_error( + "Error: Negative number of boolean required. "); + } + return num_repeat; } else { @@ -299,6 +314,7 @@ class Commander throw std::runtime_error("Error: None Numeric Pattern"); } } + inline bool validate_command(Reporter& reporter); inline bool parse_binary_vector(const std::string& input, const std::string& c, std::vector& target) @@ -320,7 +336,7 @@ class Commander for (auto&& bin : token) { // check if this is true or false, if, not, try parsing - std::transform(bin.begin(), bin.end(), bin.begin(), ::toupper); + misc::to_upper(bin); try { bool value = false; @@ -349,6 +365,7 @@ class Commander return true; } + // return false when we can't extract the unit inline bool extract_unit(const std::string& input, double& value, std::string& unit) { @@ -367,14 +384,19 @@ class Commander { } } - if (!valid) return false; - if (unit_length == 0) { unit = "b"; } + if (!valid) + return false; + else if (unit_length == 0) + { + unit = ""; + } else { unit = input.substr(input.length() - unit_length); } return true; } + inline size_t unit_power(const std::string& unit) { const std::unordered_map unit_map = { @@ -389,7 +411,7 @@ class Commander { check_duplicate(c); std::string in = input; - std::transform(in.begin(), in.end(), in.begin(), ::tolower); + misc::to_lower(in); m_parameter_log[c] = in; const size_t weight = memory ? 1024 : 1000; double value; @@ -399,18 +421,44 @@ class Commander m_error_message.append("Error: Invalid input: " + in + "\n"); return false; } + if (value <= 0) + { + m_error_message.append("Error: Non-zero positive number required. " + + misc::to_string(target) + + " provided, please check if you have " + "provided the correct input\n"); + return false; + } size_t unit_power_level; try { - unit_power_level = unit_power(unit) + default_power; + // only use default when unit isn't provided + if (unit.empty()) { unit_power_level = default_power; } + else + unit_power_level = unit_power(unit); } catch (...) { m_error_message.append("Error: Invalid input: " + in + "\n"); return false; } - value *= pow(weight, unit_power_level); - if (trunc(value) != value && value < 0) + double power = pow(weight, unit_power_level); + if (value > std::numeric_limits::max() + || power > std::numeric_limits::max() + || misc::overflow(value, power)) + { + m_error_message.append("Error: Value input is exceptionally large. " + "PRSice won't be able to handle this\n"); + return false; + } + value *= power; + if (value > std::numeric_limits::max()) + { + m_error_message.append("Error: Value input is exceptionally large. " + "PRSice won't be able to handle this\n"); + return false; + } + if (trunc(value) != value) { m_error_message.append("Error: Non-integer value obtained: " + misc::to_string(target) + "\n"); @@ -456,18 +504,21 @@ class Commander return false; } } - - inline void load_string_vector(const std::string& input, - const std::string& c, - std::vector& target) + inline void append_log(const std::string& c, const std::string& input) { - if (input.empty()) return; if (m_parameter_log.find(c) == m_parameter_log.end()) { m_parameter_log[c] = input; } else { m_parameter_log[c] = "," + input; } + } + inline void load_string_vector(const std::string& input, + const std::string& c, + std::vector& target) + { + if (input.empty()) return; + append_log(c, input); if (!input.empty() && input.back() == ',') { m_error_message.append( @@ -481,15 +532,14 @@ class Commander inline bool set_memory(const std::string& input) { - bool error = parse_unit_value(input, "memory", 2, m_memory, true); - return !error; + return parse_unit_value(input, "memory", 2, m_memory, true); } inline bool set_missing(const std::string& in) { std::string input = in; check_duplicate("missing"); - std::transform(input.begin(), input.end(), input.begin(), ::tolower); + misc::to_lower(input); switch (input.at(0)) { case 'c': @@ -519,7 +569,7 @@ class Commander inline bool set_model(const std::string& in) { std::string input = in; - std::transform(input.begin(), input.end(), input.begin(), ::tolower); + misc::to_lower(input); check_duplicate("model"); switch (input.at(0)) { @@ -549,7 +599,7 @@ class Commander inline bool set_score(const std::string& in) { std::string input = in; - std::transform(input.begin(), input.end(), input.begin(), ::tolower); + misc::to_lower(input); check_duplicate("score"); if (input == "avg") { m_prs_info.scoring_method = SCORING::AVERAGE; } else if (input == "std") @@ -579,7 +629,7 @@ class Commander bool no_default, bool case_sensitive = true, bool print_error = true) { - if ((no_default && !static_cast(m_base_info.has_column[index])) + if ((no_default && !m_base_info.has_column[index]) || m_base_info.column_name[index].empty()) { m_base_info.has_column[index] = false; @@ -591,6 +641,7 @@ class Commander if (has_col) { m_base_info.column_index[index] = col_index; } else if (m_base_info.has_column[index] && print_error) { + // cannot find column but user has provided a column name m_error_message.append(warning + ": " + m_base_info.column_name[index] + " not found in base file\n"); @@ -604,52 +655,57 @@ class Commander inline bool set_base_info_threshold(const std::vector& ref) { const std::vector info = - misc::split(m_base_info.column_name[+BASE_INDEX::INFO], ","); + misc::split(m_base_info.column_name[+BASE_INDEX::INFO], ":"); + if (info.size() != 2) + { + m_error_message.append("Error: Invalid format of " + "--base-info. Should be " + "ColName:Threshold.\n"); + return false; + } const bool has_input = m_base_info.has_column[+BASE_INDEX::INFO]; size_t index; + // first, try and see if the header can be found in base const bool found = index_check(info[0], ref, index); if (found) m_base_info.column_index[+BASE_INDEX::INFO] = index; m_base_info.has_column[+BASE_INDEX::INFO] = found; - if (!found) + if (!has_input && !found) + { + // do nothing, because we can't find the default + } + else { - if (has_input) + if (!found) { m_error_message.append("Warning: INFO field not found in base " "file, will ignore INFO filtering\n"); } - } - else if (info.size() != 2) // assume default always valid - { - m_error_message.append("Error: Invalid format of " - "--base-info. Should be " - "ColName,Threshold.\n"); - return false; - } - try - { - m_base_filter.info_score = misc::convert(info[1]); - if (!misc::within_bound(m_base_filter.info_score, 0.0, 1.0)) + else { - if (has_input) + // we have found valid formatted info, now check if threshold is + // correct + try { - m_error_message.append("Error: Base INFO threshold " - "must be within 0 and 1!\n"); + m_base_filter.info_score = misc::convert(info[1]); + if (!misc::within_bound(m_base_filter.info_score, + 0.0, 1.0)) + { + m_error_message.append("Error: Base INFO threshold " + "must be within 0 and 1!\n"); + return false; + } + } + catch (...) + { + m_error_message.append( + "Error: Invalid argument passed to --base-info: " + + m_base_info.column_name[+BASE_INDEX::INFO] + + "! Second argument must be numeric\n"); return false; } } } - catch (...) - { - if (has_input) - { - m_error_message.append( - "Error: Invalid argument passed to --base-info: " - + m_base_info.column_name[+BASE_INDEX::INFO] - + "! Second argument must be numeric\n"); - return false; - } - } return true; } /** @@ -659,6 +715,13 @@ class Commander const std::vector& detail, size_t& column_index, int& has_column, double& maf) { + if (detail.size() != 2) + { + throw std::runtime_error( + "Error: Invalid format of --base-maf. " + "Should be ColName:Threshold." + "or ColName:Threshold,ColName:Threshold.\n"); + } size_t index = 0; bool found = index_check(detail[0], ref, index); has_column = found; @@ -668,7 +731,7 @@ class Commander m_error_message.append( "Warning: MAF field not found in base file. " "Will not perform MAF filtering on the base file\n"); - return true; + return false; } double cur_maf; try @@ -676,18 +739,16 @@ class Commander cur_maf = misc::convert(detail[1]); if (!misc::within_bound(cur_maf, 0.0, 1.0)) { - m_error_message.append("Error: Base MAF threshold must " - "be within 0 and 1!\n"); - return false; + throw std::runtime_error("Error: Base MAF threshold must " + "be within 0 and 1!\n"); } } catch (...) { - m_error_message.append( + throw std::runtime_error( "Error: Invalid argument passed to --base-maf: " + m_base_info.column_name[+BASE_INDEX::MAF] + "! Threshold must be numeric\n"); - return false; } maf = cur_maf; return true; @@ -697,35 +758,53 @@ class Commander { const std::string maf_error = "Error: Invalid format of --base-maf. " - "Should be ColName,Threshold." - "or ColName,Threshold:ColName,Threshold.\n"; + "Should be ColName:Threshold." + "or ColName:Threshold,ColName:Threshold.\n"; std::vector case_control = - misc::split(m_base_info.column_name[+BASE_INDEX::MAF], ":"); - const bool print_error = m_base_info.has_column[+BASE_INDEX::MAF]; + misc::split(m_base_info.column_name[+BASE_INDEX::MAF], ","); + const bool user_require_maf_filter = + m_base_info.has_column[+BASE_INDEX::MAF]; // only process the maf filter if it is provided - if (!print_error) return true; + if (!user_require_maf_filter) return true; if (case_control.size() > 2) { - if (print_error) { m_error_message.append(maf_error); } + if (user_require_maf_filter) { m_error_message.append(maf_error); } return false; } std::vector detail; - if (case_control.size() > 0) + // process the control filter threshold + detail = misc::split(case_control.front(), ":"); + try { - detail = misc::split(case_control.front(), ","); - return process_maf( + // TODO: Still think the best course of action is to error out + // instead of silently dropping a filtering option + bool parse_control_ok = process_maf( ref, detail, m_base_info.column_index[+BASE_INDEX::MAF], m_base_info.has_column[+BASE_INDEX::MAF], m_base_filter.maf); + // if we can't parse the control, we will return true (say + // everything is ok) but will ignore MAF filtering + if (!parse_control_ok) return true; + if (case_control.size() == 2) + { + detail = misc::split(case_control.back(), ":"); + bool parse_case_ok = + process_maf(ref, detail, + m_base_info.column_index[+BASE_INDEX::MAF_CASE], + m_base_info.has_column[+BASE_INDEX::MAF_CASE], + m_base_filter.maf_case); + // if we can't parse the case MAF filtering threshold, we will + // also disable the fitering of the control + if (!parse_case_ok) + m_base_info.has_column[+BASE_INDEX::MAF] = false; + return true; + } + return true; } - if (case_control.size() == 2) + catch (const std::runtime_error& er) { - detail = misc::split(case_control.back(), ","); - return process_maf(ref, detail, - m_base_info.column_index[+BASE_INDEX::MAF_CASE], - m_base_info.has_column[+BASE_INDEX::MAF_CASE], - m_base_filter.maf_case); + m_error_message.append(er.what()); + return false; } - return true; } /*! * \brief Get the column index based on file header and the input string @@ -742,10 +821,7 @@ class Commander for (size_t i = 0; i < ref.size(); ++i) { tmp = ref[i]; - if (!case_sensitive) - { - std::transform(tmp.begin(), tmp.end(), tmp.begin(), ::toupper); - } + if (!case_sensitive) { misc::to_upper(tmp); } if (target == tmp) { index = i; @@ -757,8 +833,28 @@ class Commander bool get_statistic_column(const std::vector& column_names); bool base_check(); + bool base_column_check(std::vector& column_names); bool get_statistic_flag(); std::string get_program_header(const std::string& name); + + + int32_t maximum_thread() + { + int32_t max_threads = 1; +#if defined(WIN32) || defined(_WIN32) \ + || defined(__WIN32) && !defined(__CYGWIN__) + // max thread estimation using windows + SYSTEM_INFO sysinfo; + GetSystemInfo(&sysinfo); + max_threads = sysinfo.dwNumberOfProcessors; + int32_t known_procs = max_threads; +#else + int32_t known_procs = + static_cast(sysconf(_SC_NPROCESSORS_ONLN)); + max_threads = (known_procs == -1) ? 1 : known_procs; +#endif + return max_threads; + } }; #endif // COMMANDER_H diff --git a/inc/genotype.hpp b/inc/genotype.hpp index 06d0dec7..2b33c043 100644 --- a/inc/genotype.hpp +++ b/inc/genotype.hpp @@ -428,11 +428,6 @@ class Genotype m_keep_ambig = keep; return *this; } - Genotype& ambig_no_flip(bool keep) - { - m_ambig_no_flip = keep; - return *this; - } Genotype& reference() { m_is_ref = true; @@ -555,7 +550,6 @@ class Genotype uint32_t m_num_female = 0; uint32_t m_num_ambig_sex = 0; uint32_t m_num_non_founder = 0; - bool m_ambig_no_flip = false; bool m_genotype_stored = false; bool m_use_proxy = false; bool m_has_prs_instruction = false; diff --git a/inc/misc.hpp b/inc/misc.hpp index 669fd9ce..c8584bcb 100644 --- a/inc/misc.hpp +++ b/inc/misc.hpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #if defined __APPLE__ @@ -159,9 +160,10 @@ inline bool to_bool(const std::string& input) } template -inline bool within_bound(const T &input, const T& low_bound, const T &up_bound){ +inline bool within_bound(const T& input, const T& low_bound, const T& up_bound) +{ assert(low_bound <= up_bound); - return !(input < low_bound && input > up_bound); + return !(input < low_bound || input > up_bound); } // TODO: Delete this, doesn't seems to give robust answer @@ -297,10 +299,27 @@ inline std::vector split(const std::string& seq, return result; } + +inline std::vector split(std::string_view str, + std::string_view delims = " ") +{ + std::vector output; + // output.reserve(str.size() / 2); + for (auto first = str.data(), second = str.data(), + last = first + str.size(); + second != last && first != last; first = second + 1) + { + second = std::find_first_of(first, last, std::cbegin(delims), + std::cend(delims)); + if (first != second) output.emplace_back(first, second - first); + } + return output; +} + inline void split(std::vector& result, const std::string& seq, const std::string& separators = "\t ") { - std::size_t prev = 0, pos, idx= 0; + std::size_t prev = 0, pos, idx = 0; const size_t init_size = result.size(); // assuming we have the same size // result.clear(); @@ -328,10 +347,9 @@ inline void split(std::vector& result, const std::string& seq, } ++idx; } - if(idx < init_size){ - result.resize(idx); - } + if (idx < init_size) { result.resize(idx); } } + template inline T convert(const std::string& str) { @@ -343,6 +361,7 @@ inline T convert(const std::string& str) { throw std::runtime_error("Unable to convert the input"); } return obj; } + template inline std::string to_string(T value) { @@ -350,6 +369,35 @@ inline std::string to_string(T value) out << value; return out.str(); } + +// NOTE: Didn't work for non-ASCII characters +inline void to_upper(std::string& str) +{ + std::transform(str.begin(), str.end(), str.begin(), ::toupper); +} +inline void to_lower(std::string& str) +{ + std::transform(str.begin(), str.end(), str.begin(), ::tolower); +} +inline void to_upper(const std::string& input, std::string& out) +{ + out.resize(input.size()); + std::transform(input.begin(), input.end(), out.begin(), ::toupper); +} +inline void to_lower(const std::string& input, std::string& out) +{ + out.resize(input.size()); + std::transform(input.begin(), input.end(), out.begin(), ::tolower); +} +template +inline bool overflow(const T a, const T b) +{ + if (a == 0 || b == 0) return false; + T result = a * b; + return !(a == result / b); +} + + // trim functions from https://stackoverflow.com/a/217605 // trim from start (in place) inline void ltrim(std::string& s) @@ -1100,11 +1148,18 @@ inline size_t string_to_size_t(const char* p) // from https://stackoverflow.com/a/874160 inline bool hasEnding(const std::string& fullString, const std::string& ending) { - if (fullString.length() >= ending.length()) + if (fullString.empty()) + throw std::runtime_error( + "Error: Cannot look for ending of an empty string"); + else if (ending.empty()) + throw std::runtime_error( + "Error: Undefined behaviour. Cannot look for empty ending in " + "string"); + else if (fullString.length() >= ending.length()) { - return (0 - == fullString.compare(fullString.length() - ending.length(), - ending.length(), ending)); + return (fullString.compare(fullString.length() - ending.length(), + ending.length(), ending) + == 0); } else { diff --git a/inc/reporter.hpp b/inc/reporter.hpp index 0c66763d..83122b3b 100644 --- a/inc/reporter.hpp +++ b/inc/reporter.hpp @@ -30,7 +30,9 @@ class Reporter { public: Reporter() {} - Reporter(const std::string& log_name, size_t width = 60) : m_width(width) + Reporter(bool test) : m_unit_test(test) {} + Reporter(const std::string& log_name, size_t width = 60, bool test = false) + : m_width(width), m_unit_test(test) { m_log_file.open(log_name.c_str()); if (!m_log_file.is_open()) @@ -59,6 +61,7 @@ class Reporter const size_t m_error_prefix_size = 6; const size_t m_warning_prefix_size = 8; size_t m_width = 60; + bool m_unit_test = false; #if defined(WIN32) || defined(_WIN32) \ || defined(__WIN32) && !defined(__CYGWIN__) const std::string m_error_color_start = ""; diff --git a/inc/storage.hpp b/inc/storage.hpp index 3c5d7638..f4ddf24d 100644 --- a/inc/storage.hpp +++ b/inc/storage.hpp @@ -68,7 +68,7 @@ struct BaseFile std::vector column_index = std::vector(+BASE_INDEX::MAX + 1, 0); std::vector column_name = { - "CHR", "A2", "BP", "SE", "INFO,0.9", "", "", "A1", "SNP", "P", ""}; + "CHR", "A2", "BP", "SE", "INFO:0.9", "", "", "A1", "SNP", "P", ""}; // use int as vector is abnormal std::vector has_column = std::vector(+BASE_INDEX::MAX + 1, false); std::string file_name; @@ -189,7 +189,7 @@ struct QCFiltering struct Clumping { double r2 = 0.1; - double proxy = -1; + double proxy = 0.0; double pvalue = 1; size_t distance = 250000; int no_clump = false; diff --git a/src/binarygen.cpp b/src/binarygen.cpp index 0b2ad339..0b1d90d5 100644 --- a/src/binarygen.cpp +++ b/src/binarygen.cpp @@ -669,8 +669,9 @@ void BinaryGen::gen_snp_vector( } else { - if (ambig && flipping && m_ambig_no_flip) flipping = false; processed_snps.insert(cur_id); + if (ambig) + { flipping = (A1 != m_existed_snps[target_index].ref()); } genotype->m_existed_snps[target_index].add_snp_info( file_idx, byte_pos, chr_num, SNP_position, A1, A2, flipping, m_is_ref); diff --git a/src/binaryplink.cpp b/src/binaryplink.cpp index 437d5b86..18cbb1fd 100644 --- a/src/binaryplink.cpp +++ b/src/binaryplink.cpp @@ -487,7 +487,11 @@ void BinaryPlink::gen_snp_vector( byte_pos = static_cast( bed_offset + ((num_snp_read - 1) * (unfiltered_sample_ct4))); - if (flipping && ambig && m_ambig_no_flip) flipping = false; + if (ambig) + { + flipping = (bim_token[+BIM::A1] + != m_existed_snps[base_idx->second].ref()); + } genotype->m_existed_snps[base_idx->second].add_snp_info( idx, byte_pos, chr_num, loc, bim_token[+BIM::A1], bim_token[+BIM::A2], flipping, m_is_ref); diff --git a/src/commander.cpp b/src/commander.cpp index 204dfad7..3b46f11a 100644 --- a/src/commander.cpp +++ b/src/commander.cpp @@ -16,13 +16,37 @@ #include "commander.hpp" -Commander::Commander() { set_help_message(); } +Commander::Commander() +{ + set_help_message(); + m_reference.is_ref = true; +} -bool Commander::init(int argc, char* argv[], Reporter& reporter) +bool Commander::process_command(int argc, char* argv[], Reporter& reporter) { + bool early_termination = false; + bool error = init(argc, argv, early_termination, reporter); + if (early_termination) return false; + error |= validate_command(reporter); + std::string message = get_program_header(argv[0]); + for (auto&& com : m_parameter_log) + { message.append(" \\\n --" + com.first + " " + com.second); } + message.append("\n"); + reporter.report(message, false); + if (error) throw std::runtime_error(m_error_message); + if (!m_error_message.empty()) reporter.report(m_error_message); + return true; +} +bool Commander::init(int argc, char* argv[], bool& early_termination, + Reporter& reporter) +{ + // initialize get_opt. To be honest, with PRSice usage, this shouldn't be + // required, but then it is required for our unit test where we repeatedly + // test the get_opt + optind = 0; if (argc <= 1) { - usage(); + reporter.report(m_help_message); throw std::runtime_error("Please provide the required parameters"); } const char* optString = "b:B:c:C:f:F:g:h?i:k:l:L:m:n:o:p:s:t:u:v"; @@ -56,12 +80,11 @@ bool Commander::init(int argc, char* argv[], Reporter& reporter) {"all-score", no_argument, &m_print_all_scores, 1}, {"beta", no_argument, &m_base_info.is_beta, 1}, {"fastscore", no_argument, &m_p_thresholds.fastscore, 1}, - {"full-back", required_argument, &m_prset.full_as_background, 1}, + {"full-back", no_argument, &m_prset.full_as_background, 1}, {"hard", no_argument, &m_target.hard_coded, 1}, {"ignore-fid", no_argument, &m_pheno_info.ignore_fid, 1}, {"index", no_argument, &m_base_info.is_index, 1}, {"keep-ambig", no_argument, &m_keep_ambig, 1}, - {"keep-ambig-as-is", no_argument, &m_ambig_no_flip, 1}, {"logit-perm", no_argument, &m_perm_info.logit_perm, 1}, {"no-clump", no_argument, &m_clump_info.no_clump, 1}, {"non-cumulate", no_argument, &m_prs_info.non_cumulate, 1}, @@ -76,6 +99,8 @@ bool Commander::init(int argc, char* argv[], Reporter& reporter) // long flags, need to work on them {"A1", required_argument, nullptr, 0}, {"A2", required_argument, nullptr, 0}, + {"a1", required_argument, nullptr, 0}, + {"a2", required_argument, nullptr, 0}, {"background", required_argument, nullptr, 0}, {"bar-levels", required_argument, nullptr, 0}, {"base-info", required_argument, nullptr, 0}, @@ -124,12 +149,13 @@ bool Commander::init(int argc, char* argv[], Reporter& reporter) {"wind-3", required_argument, nullptr, 0}, {"x-range", required_argument, nullptr, 0}, {nullptr, 0, nullptr, 0}}; - return parse_command(argc, argv, optString, longOpts, reporter); + return parse_command(argc, argv, optString, longOpts, early_termination, + reporter); } bool Commander::parse_command(int argc, char* argv[], const char* optString, const struct option longOpts[], - Reporter& reporter) + bool& early_termination, Reporter& reporter) { int32_t max_threads = maximum_thread(); int longIndex = 0; @@ -146,9 +172,9 @@ bool Commander::parse_command(int argc, char* argv[], const char* optString, command = longOpts[longIndex].name; if (longOpts[longIndex].flag != nullptr) break; // reorganize all long ops according to alphabetical order - else if (command == "A1") + else if (command == "A1" || command == "a1") set_string(optarg, command, +BASE_INDEX::EFFECT); - else if (command == "A2") + else if (command == "A2" || command == "a2") set_string(optarg, command, +BASE_INDEX::NONEFFECT); else if (command == "background") set_string(optarg, command, m_prset.background); @@ -171,7 +197,7 @@ bool Commander::parse_command(int argc, char* argv[], const char* optString, set_string(optarg, command, +BASE_INDEX::CHR); else if (command == "clump-kb") { - error |= !parse_unit_value(optarg, command, 2, + error |= !parse_unit_value(optarg, command, 1, m_clump_info.distance); m_clump_info.provided_distance = true; } @@ -352,17 +378,46 @@ bool Commander::parse_command(int argc, char* argv[], const char* optString, m_p_thresholds.set_threshold); break; case 'h': - case '?': usage(); return false; + reporter.report(m_help_message); + early_termination = true; + return true; case 'v': std::cerr << version << " (" << date << ") " << std::endl; - return false; + early_termination = true; + return true; + case '?': default: - throw "Error: Undefined operator, please use --help for more " + throw "Error: Undefined operator, please use " + "--help for more " "information!"; } opt = getopt_long(argc, argv, optString, longOpts, &longIndex); } - error |= !base_check(); + if (m_allow_inter) m_parameter_log["allow-inter"] = ""; + if (m_p_thresholds.fastscore) m_parameter_log["fastscore"] = ""; + if (m_pheno_info.ignore_fid) m_parameter_log["ignore-fid"] = ""; + if (m_include_nonfounders) m_parameter_log["nonfounders"] = ""; + if (m_base_info.is_index) m_parameter_log["index"] = ""; + if (m_keep_ambig) m_parameter_log["keep-ambig"] = ""; + if (m_perm_info.logit_perm) m_parameter_log["logit-perm"] = ""; + if (m_clump_info.no_clump) m_parameter_log["no-clump"] = ""; + if (m_p_thresholds.no_full) m_parameter_log["no-full"] = ""; + if (m_prs_info.no_regress) m_parameter_log["no-regress"] = ""; + if (m_prs_info.non_cumulate) m_parameter_log["non-cumulate"] = ""; + if (m_print_all_scores) m_parameter_log["all-score"] = ""; + if (m_print_snp) m_parameter_log["print-snp"] = ""; + if (m_base_info.is_beta) m_parameter_log["beta"] = ""; + if (m_base_info.is_or) m_parameter_log["or"] = ""; + if (m_target.hard_coded) m_parameter_log["hard"] = ""; + if (m_ultra_aggressive) m_parameter_log["ultra"] = ""; + if (m_prs_info.use_ref_maf) m_parameter_log["use-ref-maf"] = ""; + if (m_user_no_default) m_parameter_log["no-default"] = ""; + return error; +} + +bool Commander::validate_command(Reporter& reporter) +{ + bool error = !base_check(); error |= !clump_check(); error |= !covariate_check(); error |= !filter_check(); @@ -388,36 +443,9 @@ bool Commander::parse_command(int argc, char* argv[], const char* optString, reporter.report(error_reason + ". Maybe the path to file does not exists?"); else - return false; + return true; } - if (m_allow_inter) m_parameter_log["allow-inter"] = ""; - if (m_p_thresholds.fastscore) m_parameter_log["fastscore"] = ""; - if (m_pheno_info.ignore_fid) m_parameter_log["ignore-fid"] = ""; - if (m_include_nonfounders) m_parameter_log["nonfounders"] = ""; - if (m_base_info.is_index) m_parameter_log["index"] = ""; - if (m_keep_ambig) m_parameter_log["keep-ambig"] = ""; - if (m_perm_info.logit_perm) m_parameter_log["logit-perm"] = ""; - if (m_clump_info.no_clump) m_parameter_log["no-clump"] = ""; - if (m_p_thresholds.no_full) m_parameter_log["no-full"] = ""; - if (m_prs_info.no_regress) m_parameter_log["no-regress"] = ""; - if (m_prs_info.non_cumulate) m_parameter_log["non-cumulate"] = ""; - if (m_print_all_scores) m_parameter_log["all-score"] = ""; - if (m_print_snp) m_parameter_log["print-snp"] = ""; - if (m_base_info.is_beta) m_parameter_log["beta"] = ""; - if (m_base_info.is_or) m_parameter_log["or"] = ""; - if (m_target.hard_coded) m_parameter_log["hard"] = ""; - if (m_ultra_aggressive) m_parameter_log["ultra"] = ""; - if (m_prs_info.use_ref_maf) m_parameter_log["use-ref-maf"] = ""; - if (m_user_no_default) m_parameter_log["no-default"] = ""; - std::string message = get_program_header(argv[0]); - for (auto&& com : m_parameter_log) - { message.append(" \\\n --" + com.first + " " + com.second); } - message.append("\n"); - reporter.report(message, false); - - if (!m_error_message.empty()) reporter.report(m_error_message); - if (error) throw std::runtime_error(m_error_message); - return true; + return error; } std::string Commander::get_program_header(const std::string& name) @@ -429,7 +457,7 @@ std::string Commander::get_program_header(const std::string& name) char buffer[80]; timeinfo = localtime(&start_time); strftime(buffer, sizeof(buffer), "%Y-%m-%d %H:%M:%S", timeinfo); - std::string message = "\nPRSice " + version + " (" + date + ") \n"; + std::string message = "\n\nPRSice " + version + " (" + date + ") \n"; message.append("https://github.com/choishingwan/PRSice\n"); message.append("(C) 2016-2020 Shing Wan (Sam) Choi and Paul F. O'Reilly\n"); message.append("GNU General Public License v3\n\n"); @@ -457,31 +485,31 @@ void Commander::set_help_message() "usage: PRSice [options] <-b base_file> <-t target_file>\n" // Base file "\nBase File:\n" - " --A1 Column header containing allele 1 " + " --a1 Column header containing allele 1 " "(effective allele)\n" " Default: A1\n" - " --A2 Column header containing allele 2 " + " --a2 Column header containing allele 2 " "(non-effective allele)\n" " Default: A2\n" " --base | -b Base association file\n" " --base-info Base INFO score filtering. Format should " "be\n" - " ,. SNPs with info " + " :. SNPs with info " "\n" " score less than will be " "ignored\n" " Column name default: INFO\n" " Threshold default: 0.9\n" " --base-maf Base MAF filtering. Format should be\n" - " ,. SNPs with maf\n" + " :. SNPs with maf\n" " less than will be ignored. " "An\n" " additional column can also be added " "(e.g.\n" " also filter MAF for cases), using the\n" " following format:\n" - " ,:,\n" + " :,:\n" " --beta Whether the test statistic is in the form " "of \n" " BETA or OR. If set, test statistic is " @@ -891,8 +919,6 @@ void Commander::set_help_message() " if you are certain that the base and " "target\n" " has the same A1 and A2 alleles\n" - " --keep-ambig-as-is Will not flip ambiguous SNPs when they " - "are kept.\n" " Will also set the --keep-ambig flag\n" " --logit-perm When performing permutation, still use " "logistic\n" @@ -949,10 +975,6 @@ void Commander::set_help_message() " --help | -h Display this help message\n"; } -// Print the help message -void Commander::usage() { fprintf(stderr, "%s\n", m_help_message.c_str()); } - - std::vector get_base_header(const std::string& file) { if (file.empty()) @@ -993,9 +1015,11 @@ bool Commander::get_statistic_column( const std::vector& column_names) { bool has_col; + // don't allow both OR and BETA to be set if (m_base_info.is_or && m_base_info.is_beta) return false; if (m_base_info.is_or || m_base_info.is_beta) { + // guess default based on --or and --beta const std::string target = m_base_info.is_or ? "OR" : "BETA"; m_base_info.column_name[+BASE_INDEX::STAT] = target; has_col = in_file(column_names, +BASE_INDEX::STAT, "Error", @@ -1037,11 +1061,12 @@ bool Commander::get_statistic_column( } } } + bool Commander::get_statistic_flag() { std::string stat_temp = m_base_info.column_name[+BASE_INDEX::STAT]; - std::transform(stat_temp.begin(), stat_temp.end(), stat_temp.begin(), - ::toupper); + // guess flag based on stat provided + misc::to_upper(stat_temp); if (stat_temp == "OR") { m_base_info.is_or = true; @@ -1061,11 +1086,19 @@ bool Commander::get_statistic_flag() } return true; } + bool Commander::base_check() { - bool error = false; + m_ran_base_check = true; std::vector column_names = get_base_header(m_base_info.file_name); + return base_column_check(column_names); +} + +bool Commander::base_column_check(std::vector& column_names) +{ + + bool error = false; for (auto&& c : column_names) { misc::trim(c); } if (m_base_info.is_index) { @@ -1113,8 +1146,11 @@ bool Commander::base_check() if (!m_user_no_default && !has_col) { error |= !get_statistic_column(column_names); } // Statistic is ok, but beta and or not provided + // use has_column vector instead of has_col as get_statistic_column might + // have found the state column? if (m_base_info.has_column[+BASE_INDEX::STAT]) { + // flag not provided, need to guess if (!(m_base_info.is_or || m_base_info.is_beta)) { error |= !get_statistic_flag(); } } @@ -1127,7 +1163,6 @@ bool Commander::clump_check() { bool error = false; if (m_clump_info.no_clump) return true; - if (m_clump_info.use_proxy && !misc::within_bound(m_clump_info.proxy, 0.0, 1.0)) { @@ -1179,7 +1214,7 @@ bool Commander::ref_check() "Error: Unsupported LD format: " + m_reference.type + "\n"); } } - if (m_ref_filter.geno < 0 || m_ref_filter.geno > 1) + if (!misc::within_bound(m_ref_filter.geno, 0.0, 1.0)) { error = true; m_error_message.append("Error: LD genotype missingness threshold " @@ -1204,6 +1239,12 @@ bool Commander::ref_check() m_error_message.append("Error: LD hard threshold must be larger " "than 0 and smaller than 1!\n"); } + if (!misc::within_bound(m_ref_filter.dose_threshold, 0.0, 1.0)) + { + error = true; + m_error_message.append("Error: LD dosage threshold must be larger " + "than 0 and smaller than 1!\n"); + } else if (!m_reference.file_name.empty() || m_reference.file_name.empty()) { @@ -1219,6 +1260,12 @@ bool Commander::ref_check() m_parameter_log["dose-thres"] = std::to_string(m_target_filter.dose_threshold); } + if (!misc::within_bound(m_ref_filter.info_score, 0.0, 1.0)) + { + error = true; + m_error_message.append("Error: LD INFO score threshold must be " + "larger than 0 and smaller than 1!\n"); + } } if (!misc::within_bound(m_ref_filter.maf, 0.0, 1.0)) { @@ -1226,14 +1273,142 @@ bool Commander::ref_check() m_error_message.append("Error: LD MAF threshold must be larger than " "0 and smaller than 1!\n"); } - if (!misc::within_bound(m_ref_filter.info_score, 0.0, 1.0)) + return !error; +} + +size_t Commander::find_first_end(const std::string_view& cov, const size_t idx) +{ + if (cov.at(idx) != '[') + { + throw std::runtime_error( + "Error: Invalid format. Expect string to start with ["); + } + for (size_t i = idx + 1; i < cov.length(); ++i) { - error = true; - m_error_message.append("Error: LD INFO score threshold must be " - "larger than 0 and smaller than 1!\n"); + if (cov.at(i) == ']') return i; + if (cov.at(i) == '[') + throw std::runtime_error( + "Error: Invalid format, we don't allow embedded range"); } - return !error; + throw std::runtime_error( + "Error: Invalid format, [ must accompany with a ]"); } +std::vector Commander::parse_range(const std::string_view& cov) +{ + std::vector res; + if (cov.at(0) == '-' || cov.find("--") != std::string::npos) + { throw std::runtime_error("Error: Do not accept negative ranges"); } + std::vector token = misc::split(cov, "-"); + // from_chars will be faster, but less robust (1.5 will be converted to 1) + if (token.size() == 1) { res = {misc::convert(std::string(cov))}; } + else + { + size_t start, end; + start = misc::convert(std::string(token.front())); + end = misc::convert(std::string(token.back())); + if (start > end) { std::swap(start, end); } + res.resize(end - start + 1, start); + std::iota(res.begin(), res.end(), start); + } + return res; +} +std::vector Commander::get_range(const std::string_view& cov, + const size_t start, const size_t end) +{ + // need to remove [] + if (end >= cov.length() || start >= end) + { throw std::runtime_error("Error: Wrong start and end format"); } + if (!(cov.at(start) == '[' && cov.at(end) == ']')) + { + throw std::runtime_error("Error: Invalid input. Expect something " + "starts with [ and end with ]"); + } + std::vector token = + misc::split(cov.substr(start + 1, end - start - 1), "."); + std::vector results, tmp; + for (auto&& value : token) + { + // now try to account for - + tmp = parse_range(value); + results.insert(results.end(), tmp.begin(), tmp.end()); + } + std::sort(results.begin(), results.end()); + results.erase(std::unique(results.begin(), results.end()), results.end()); + return results; +} + +void Commander::update_covariate_range(const std::vector& range, + std::vector& res) +{ + if (range.empty()) + { + throw std::runtime_error( + "Error: Invalid input. Something is wrong with Sam"); + } + if (res.empty()) + { + res.reserve(range.size()); + for (auto&& value : range) { res.push_back(std::to_string(value)); } + } + else + { + // there are content in res, so we will duplicate it w.r.t number in + // range + std::vector tmp; + tmp.reserve(res.size() * range.size()); + for (auto&& r : res) + { + for (auto&& value : range) + { tmp.push_back(r + std::to_string(value)); } + } + res.clear(); + res = tmp; + } +} + +std::vector +Commander::transform_covariate(const std::string& cov_in) +{ + // do not allow embedded range + if (cov_in.empty() || cov_in.at(0) != '@') + { return std::vector {cov_in}; } + // remove first @ + std::string cov = cov_in; + cov.erase(0, 1); + std::vector result; + std::vector range; + std::size_t prev = 0, pos; + while ((pos = cov.find_first_of("[", prev)) != std::string::npos) + { + if (pos > prev) + { + std::string_view substring = cov.substr(prev, pos - prev); + if (result.empty()) + result.emplace_back(substring); + else + { + for (size_t i = 0; i < result.size(); ++i) + { result[i] = result[i].append(substring); } + } + size_t end = find_first_end(cov, pos); + update_covariate_range(get_range(cov, pos, end), result); + pos = end; + } + prev = pos + 1; + } + if (prev < cov.length()) + { + if (result.empty()) + result.emplace_back(cov.substr(prev, std::string::npos)); + else + { + std::string_view substring = cov.substr(prev, std::string::npos); + for (auto&& c : result) { c.append(substring); } + } + } + return result; +} +/* std::vector Commander::transform_covariate(const std::string& cov_in) { @@ -1247,7 +1422,7 @@ Commander::transform_covariate(const std::string& cov_in) std::vector range; std::string cov = cov_in; std::string prefix, suffix; - // simplify to reasonable use cases + // Remove the first @ cov.erase(0, 1); // find the start of range by identifying [ open = misc::split(cov, "["); @@ -1315,12 +1490,10 @@ Commander::transform_covariate(const std::string& cov_in) } } return final_covariates; -} +}*/ -bool Commander::covariate_check() +std::unordered_set Commander::get_cov_names() { - // it is valid to have empty covariate file - if (m_pheno_info.cov_file.empty()) return true; // first, transform all the covariates // the actual column name to be included (after parsing) std::unordered_set included; @@ -1335,15 +1508,18 @@ bool Commander::covariate_check() transformed_cov = transform_covariate(cov); for (auto&& trans : transformed_cov) { included.insert(trans); } } - bool error = false; - // now try to read the header of the covariate file + return included; +} +std::tuple, std::unordered_map> +Commander::get_covariate_header() +{ std::ifstream cov_file; cov_file.open(m_pheno_info.cov_file.c_str()); if (!cov_file.is_open()) { m_error_message.append("Error: Cannot open covariate file: " + m_pheno_info.cov_file + "\n"); - return false; + throw std::runtime_error("Cannot open"); } std::string line; std::getline(cov_file, line); @@ -1353,85 +1529,124 @@ bool Commander::covariate_check() { m_error_message.append( "Error: First line of covariate file is empty!\n"); - return false; + throw std::runtime_error("Empty line"); } - const std::vector cov_header = misc::split(line); - std::string missing = ""; std::unordered_map ref_index; - // now get the index for each column name in the covariate file + auto cov_header = misc::split(line); for (size_t i = 0; i < cov_header.size(); ++i) { ref_index[cov_header[i]] = i; } - // when user provide a covariate file but not the covariate name, we - // will just read in every covariates - if (m_pheno_info.cov_colname.size() == 0) - { - for (size_t i = (1 + !m_pheno_info.ignore_fid); i < cov_header.size(); - ++i) - { included.insert(cov_header[i]); } - } - size_t valid_cov = 0; + return {cov_header, ref_index}; +} +size_t Commander::find_cov_idx( + const std::unordered_set& included, + const std::unordered_map& ref_index, + std::string& missing) +{ + missing = ""; m_pheno_info.col_index_of_cov.clear(); + size_t valid_cov = 0; + std::string comma = ""; for (auto&& cov : included) { // now for each covariate found in the covariate file, we add their // index to the storage - if (ref_index.find(cov) != ref_index.end()) + auto idx = ref_index.find(cov); + if (idx != ref_index.end()) { - m_pheno_info.col_index_of_cov.push_back(ref_index[cov]); + m_pheno_info.col_index_of_cov.push_back(idx->second); ++valid_cov; } - else if (missing.empty()) - { - missing = cov; - } else { - missing.append("," + cov); + missing.append(comma + cov); + comma = ","; } } - if (!missing.empty()) - { - m_error_message.append("Warning: Covariate(s) missing from file: " - + missing + ". Header of file is: " + line - + "\n"); - } - if (valid_cov == 0) - { - error = true; - m_error_message.append("Error: No valid Covariate!\n"); - } - // we will now push back the covariate name according to the order they - // appeared in the file + return valid_cov; +} + +void Commander::reorganize_cov_name(const std::vector& cov_header) +{ m_pheno_info.cov_colname.clear(); std::sort(m_pheno_info.col_index_of_cov.begin(), m_pheno_info.col_index_of_cov.end()); for (auto&& c : m_pheno_info.col_index_of_cov) { m_pheno_info.cov_colname.push_back(cov_header[c]); } +} +bool Commander::process_factor_cov( + const std::unordered_set& included, + const std::unordered_map& ref_index, + const std::unordered_set ori_input) +{ // now start to process the factor covariates + + std::vector transformed_cov; for (auto cov : m_pheno_info.factor_cov) { if (cov.empty()) continue; transformed_cov = transform_covariate(cov); for (auto&& trans : transformed_cov) { - if (included.find(trans) != included.end()) - { - m_pheno_info.col_index_of_factor_cov.push_back( - ref_index[trans]); - } - else if (ori_input.find(cov) == ori_input.end()) + auto&& ref = ref_index.find(trans); + if (included.find(trans) != included.end() + && ref != ref_index.end()) + { m_pheno_info.col_index_of_factor_cov.push_back(ref->second); } + else if (ori_input.find(trans) == ori_input.end()) { - // only complain if untransform input isn't found in cov-col - error = true; + // only complain if transform input isn't found in transformed + // --cov-col + // so if @PC[1.3.5] isn't found, and cov-col is @PC[1-10], then + // we still allow such input m_error_message.append("Error: All factor covariates must be " "found in covariate list. " + trans + " not found in covariate list"); + return false; } } } std::sort(m_pheno_info.col_index_of_factor_cov.begin(), m_pheno_info.col_index_of_factor_cov.end()); + return true; +} +bool Commander::covariate_check() +{ + // it is valid to have empty covariate file + if (m_pheno_info.cov_file.empty()) return true; + bool error = false; + // now try to read the header of the covariate file + // first, transform all the covariates + // the actual column name to be included (after parsing) + std::unordered_set included = get_cov_names(); + std::unordered_set ori_input = included; + try + { + auto [cov_header, ref_index] = get_covariate_header(); + if (m_pheno_info.cov_colname.size() == 0) + { + for (size_t i = (1 + !m_pheno_info.ignore_fid); + i < cov_header.size(); ++i) + { included.insert(cov_header[i]); } + } + std::string missing; + size_t valid_cov = find_cov_idx(included, ref_index, missing); + if (!missing.empty()) + { + m_error_message.append( + "Warning: Covariate(s) missing from file: " + missing + ".\n"); + } + if (valid_cov == 0) + { + error = true; + m_error_message.append("Error: No valid Covariate!\n"); + } + reorganize_cov_name(cov_header); + error |= !process_factor_cov(included, ref_index, ori_input); + } + catch (std::runtime_error&) + { + return false; + } return !error; } @@ -1454,13 +1669,6 @@ bool Commander::filter_check() "imputation input.\n"); } } - if (m_target.type == "bgen" - && !misc::within_bound(m_target_filter.hard_threshold, 0.0, 1.0)) - { - error = true; - m_error_message.append( - "Error: Hard threshold must be between 0 and 1!\n"); - } if (!m_extract_file.empty() && !m_exclude_file.empty()) { error = true; @@ -1468,13 +1676,30 @@ bool Commander::filter_check() "Error: Can only use --extract or --exclude but not both\n"); } - if (!misc::within_bound(m_target_filter.info_score, 0.0, 1.0)) + if (m_target.type == "bgen") { - error = true; - m_error_message.append( - "Error: INFO score threshold cannot be bigger than 1.0 " - "or smaller than 0.0\n"); + if (!misc::within_bound(m_target_filter.info_score, 0.0, 1.0)) + { + error = true; + m_error_message.append( + "Error: INFO score threshold cannot be bigger than 1.0 " + "or smaller than 0.0\n"); + } + if (!misc::within_bound(m_target_filter.hard_threshold, 0.0, 1.0)) + { + error = true; + m_error_message.append( + "Error: Hard threshold must be between 0 and 1!\n"); + } + if (!misc::within_bound(m_target_filter.dose_threshold, 0.0, 1.0)) + { + error = true; + m_error_message.append( + "Error: Dosage threshold must be between 0 and 1!\n"); + } } + + if (!misc::within_bound(m_target_filter.geno, 0.0, 1.0)) { error = true; @@ -1501,6 +1726,15 @@ bool Commander::misc_check() m_error_message.append( "Error: Number of thread must be larger than 1\n"); } + if (m_keep_ambig) + { + m_error_message.append( + "Warning: By selecting --keep-ambig, PRSice assume the base and " + "target are reporting alleles on the same strand and will " + "therefore only perform dosage flip for the ambiguous SNPs. If you " + "are unsure of what the strand is, then you should not select the " + "--keep-ambig option\n"); + } if (!m_perm_info.run_perm && !m_perm_info.run_set_perm && m_perm_info.logit_perm) { @@ -1514,7 +1748,7 @@ bool Commander::misc_check() // of thread used m_parameter_log["thread"] = std::to_string(m_prs_info.thread); m_parameter_log["out"] = m_out_prefix; - bool use_reference = + const bool use_reference = !(m_reference.file_list.empty() && m_reference.file_name.empty()); if (m_prs_info.use_ref_maf && !use_reference) { @@ -1527,10 +1761,15 @@ bool Commander::misc_check() } if (m_allow_inter) { - if ((m_target.type != "bgen" && m_reference.type != "bgen") + if ((m_target.type != "bgen" + && m_reference.type != "bgen") // none are bgen || (use_reference && m_reference.type != "bgen" - && !m_target.hard_coded) - || (!use_reference && m_target.type != "bgen")) + && !m_target.hard_coded) // reference file isn't bgen and not + // require hard coding PRS + || (!use_reference + && m_target.type != "bgen") // Doesn't have a reference file and + // target isn't bgen either + ) { m_allow_inter = false; m_error_message.append( @@ -1551,7 +1790,6 @@ bool Commander::misc_check() "hard-coded bgen file. Will disable it\n"); m_ultra_aggressive = false; } - if (m_ambig_no_flip) m_keep_ambig = true; return !error; } @@ -1568,10 +1806,7 @@ bool Commander::prset_check() if (m_prset.feature.empty() && !m_prset.gtf.empty()) { - m_prset.feature.push_back("exon"); - m_prset.feature.push_back("gene"); - m_prset.feature.push_back("protein_coding"); - m_prset.feature.push_back("CDS"); + m_prset.feature = {"exon", "gene", "protein_coding", "CDS"}; m_parameter_log["feature"] = "exon,gene,protein_coding,CDS"; } if (m_perm_info.run_perm && m_perm_info.run_set_perm) @@ -1641,6 +1876,22 @@ bool Commander::prsice_check() std::unique(m_p_thresholds.bar_levels.begin(), m_p_thresholds.bar_levels.end()), m_p_thresholds.bar_levels.end()); + // now check if there are any negative / out bound in bar level + auto max_threshold = *max_element(m_p_thresholds.bar_levels.begin(), + m_p_thresholds.bar_levels.end()); + auto min_threshold = *min_element(m_p_thresholds.bar_levels.begin(), + m_p_thresholds.bar_levels.end()); + if (max_threshold > 1.0) + { + error = true; + m_error_message.append("Error: Cannot have p-value level > 1\n"); + } + if (min_threshold < 0.0) + { + error = true; + m_error_message.append( + "Error: Cannot have p-value level less than 0\n"); + } std::string bar_message = ""; for (auto&& b : m_p_thresholds.bar_levels) { @@ -1671,6 +1922,11 @@ bool Commander::prsice_check() m_error_message.append( "Error: Invalid p-value threshold boundary!\n"); } + if (!misc::within_bound(m_p_thresholds.inter, 0.0, 1.0)) + { + error = true; + m_error_message.append("Error: Invalid p-value step-size!\n"); + } m_parameter_log["interval"] = misc::to_string(m_p_thresholds.inter); m_parameter_log["lower"] = misc::to_string(m_p_thresholds.lower); m_parameter_log["upper"] = misc::to_string(m_p_thresholds.upper); @@ -1731,29 +1987,55 @@ bool Commander::target_check() bool Commander::pheno_check() { + assert(m_ran_base_check); // pheno check must be performed after base check bool error = false; - if (m_pheno_info.pheno_col.size() != 0 && m_pheno_info.pheno_file.empty()) + if (!m_pheno_info.pheno_col.empty() && m_pheno_info.pheno_file.empty()) { error = true; m_error_message.append("Error: You must provide a phenotype file for " "multiple phenotype analysis"); + return !error; + } + // check for duplicates + if (!m_pheno_info.pheno_col.empty()) + { + std::unordered_set phenos(m_pheno_info.pheno_col.begin(), + m_pheno_info.pheno_col.end()); + if (phenos.size() != m_pheno_info.pheno_col.size()) + { + error = true; + m_error_message.append( + "Error: Duplicated phenotype column detected. Please make sure " + "you have provided the correct input\n"); + return !error; + } } if (m_pheno_info.binary.empty()) { // add the default + const size_t repeat = + m_pheno_info.pheno_col.empty() ? 1 : m_pheno_info.pheno_col.size(); if (m_base_info.is_beta) { m_parameter_log["binary-target"] = "F"; - m_pheno_info.binary.push_back(false); + if (repeat > 1) + { + m_parameter_log["binary-target"] = std::to_string(repeat) + "F"; + } + m_pheno_info.binary.resize(repeat, false); } else { m_parameter_log["binary-target"] = "T"; - m_pheno_info.binary.push_back(true); + if (repeat > 1) + { + m_parameter_log["binary-target"] = std::to_string(repeat) + "T"; + } + m_pheno_info.binary.resize(repeat, true); } } - // now check if the bar-level is sensible + // now check if binary-target is sensible if (m_pheno_info.pheno_col.size() != m_pheno_info.binary.size()) { if (m_pheno_info.pheno_col.empty() && m_pheno_info.binary.size() == 1) diff --git a/src/main.cpp b/src/main.cpp index 67d0ede9..849b2382 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -50,7 +50,7 @@ int main(int argc, char* argv[]) Commander commander; try { - if (!commander.init(argc, argv, reporter)) + if (!commander.process_command(argc, argv, reporter)) { return 0; // only require the usage information } @@ -77,7 +77,6 @@ int main(int argc, char* argv[]) target_file = &target_file->keep_nonfounder(commander.nonfounders()) .keep_ambig(commander.keep_ambig()) - .ambig_no_flip(commander.ambig_no_flip()) .intermediate(commander.use_inter()) .set_prs_instruction(commander.get_prs_instruction()) .set_weight(); diff --git a/src/reporter.cpp b/src/reporter.cpp index 88a8d6d2..da218c88 100644 --- a/src/reporter.cpp +++ b/src/reporter.cpp @@ -30,6 +30,7 @@ void split(std::vector& result, const char* str, char c = ' ') void Reporter::report(const std::string& input, bool wrap) { + if (m_unit_test) return; // split by new line std::vector paragraph; std::vector line; diff --git a/test/src/binplink_test.cpp b/test/src/binplink_test.cpp index 02da94d2..c74830e0 100644 --- a/test/src/binplink_test.cpp +++ b/test/src/binplink_test.cpp @@ -28,7 +28,7 @@ class BPLINK_GEN_SAMPLE_TARGET : public ::testing::Test geno.is_ref = false; Phenotype pheno; pheno.ignore_fid = false; - reporter = new Reporter(std::string(path + "LOG")); + reporter = new Reporter(std::string(path + "LOG"), true); plink = new BinaryPlink(geno, pheno, " ", reporter); } void TearDown() override @@ -131,7 +131,7 @@ TEST(BPLINK_EXTERNAL, EXTERNAL_SAMPLE) uint32_t thread = 1; bool ignore_fid = false, keep_ambig = false, keep_nonfounder = false, is_ref = false; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), true); BinaryPlink plinkBinary(file_list, file, thread, ignore_fid, keep_nonfounder, keep_ambig, is_ref, &reporter); plinkBinary.load_samples("", "", delim, true); @@ -146,7 +146,7 @@ TEST(BPLINK_SAMPLE_CHECK, DUPLICATE_SAMPLE) size_t thread = 1; bool ignore_fid = false, keep_ambig = false, keep_nonfounder = false, is_ref = false; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), true); BinaryPlink plinkBinary(file_list, file, thread, ignore_fid, keep_nonfounder, keep_ambig, is_ref, &reporter); try @@ -171,7 +171,7 @@ TEST(BPLINK_FOUNDER, FOUNDER_REMOVE) uint32_t thread = 1; bool ignore_fid = false, keep_ambig = false, keep_nonfounder = false, is_ref = false; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), true); BinaryPlink plinkBinary(file_list, file, thread, ignore_fid, keep_nonfounder, keep_ambig, is_ref, &reporter); plinkBinary.load_samples("", "", delim, true); @@ -199,7 +199,7 @@ class BPLINK_GEN_SNP_TARGET : public ::testing::Test uint32_t thread = 1; bool ignore_fid = false, keep_ambig = false, keep_nonfounder = false, is_ref = false; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), true); plink = new BinaryPlink(file_list, file, thread, ignore_fid, keep_nonfounder, keep_ambig, is_ref, reporter); plink->load_samples("", "", true, reporter); @@ -210,7 +210,7 @@ class BPLINK_GEN_SNP_TARGET : public ::testing::Test TEST_F(BPLINK_GEN_SNP_TARGET, SIMPLE_READ) { std::string out = path + "test"; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), true); Region exclusion_region("", reporter); double maf = 0.0; @@ -238,7 +238,7 @@ TEST_F(BPLINK_GEN_SNP_TARGET, SIMPLE_READ) TEST_F(BPLINK_GEN_SNP_TARGET, MAF_FILTERING_1) { std::string out = path + "test"; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), true); Region exclusion_region("", reporter); double maf = 0.2; @@ -266,7 +266,7 @@ TEST_F(BPLINK_GEN_SNP_TARGET, MAF_FILTERING_1) TEST_F(BPLINK_GEN_SNP_TARGET, MAF_FILTERING_2) { std::string out = path + "test"; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), true); Region exclusion_region("", reporter); double maf = 0.2068; @@ -293,7 +293,7 @@ TEST_F(BPLINK_GEN_SNP_TARGET, MAF_FILTERING_2) TEST_F(BPLINK_GEN_SNP_TARGET, GENO_FILTERING) { std::string out = path + "test"; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), true); Region exclusion_region("", reporter); double maf = 0.2068; @@ -321,7 +321,7 @@ TEST_F(BPLINK_GEN_SNP_TARGET, GENO_FILTERING) TEST_F(BPLINK_GEN_SNP_TARGET, MAF_GENO_FILTERING) { std::string out = path + "test"; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), true); Region exclusion_region("", reporter); double maf = 0.2068; @@ -352,7 +352,7 @@ TEST_F(BPLINK_GEN_SNP_TARGET, TEST_EXCLUSION_FUNCTION) // function. We don't need to test the exclusion performance as that should // be tested in region's unit testing std::string out = path + "test"; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), true); // NOTE: The end range boundary is exclusion. Any SNP with exact match on // that number will not be excluded. E.g. SNP on chr1 2842568 Region exclusion_region("1:2832179-2842568", reporter); @@ -386,7 +386,7 @@ TEST(BPLINK_GEN_SNP, DUP_SNP) uint32_t thread = 1; bool ignore_fid = false, keep_ambig = false, keep_nonfounder = false, is_ref = false; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), true); BinaryPlink plink(file_list, file, thread, ignore_fid, keep_nonfounder, keep_ambig, is_ref, reporter); plink.load_samples("", "", true, reporter); @@ -438,7 +438,7 @@ TEST(BPLINK_GEN_SNP, SEQ_INPUT) uint32_t thread = 1; bool ignore_fid = false, keep_ambig = false, keep_nonfounder = false, is_ref = false; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), true); BinaryPlink plink(file_list, file, thread, ignore_fid, keep_nonfounder, keep_ambig, is_ref, reporter); plink.load_samples("", "", true, reporter); @@ -478,7 +478,7 @@ class BPLINK_BASE_READ : public ::testing::Test uint32_t thread = 1; bool ignore_fid = false, keep_ambig = false, keep_nonfounder = false, is_ref = false; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), true); plink = new BinaryPlink(file_list, file, thread, ignore_fid, keep_nonfounder, keep_ambig, is_ref, reporter); plink->load_samples("", "", true, reporter); @@ -524,7 +524,7 @@ TEST_F(BPLINK_BASE_READ, SIMPLE_BETA) bool perform_shrinkage = false; std::vector feature; Region region(feature, 0, 0, false, false); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), true); try { plink->read_base(base, out, index, barlevels, lower, inter, upper, @@ -567,7 +567,7 @@ TEST_F(BPLINK_BASE_READ, SIMPLE_OR) bool perform_shrinkage = false; std::vector feature; Region region(feature, 0, 0, false, false); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), true); try { plink->read_base(base, out, index, barlevels, lower, inter, upper, @@ -607,7 +607,7 @@ TEST_F(BPLINK_BASE_READ, PROBLEM_OR) bool perform_shrinkage = false; std::vector feature; Region region(feature, 0, 0, false, false); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), true); try { plink->read_base(base, out, index, barlevels, lower, inter, upper, @@ -652,7 +652,7 @@ TEST_F(BPLINK_BASE_READ, WRONG_P) bool perform_shrinkage = false; std::vector feature; Region region(feature, 0, 0, false, false); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), true); try { plink->read_base(base, out, index, barlevels, lower, inter, upper, @@ -697,7 +697,7 @@ TEST_F(BPLINK_BASE_READ, WRONG_COORDINATE) bool perform_shrinkage = false; std::vector feature; Region region(feature, 0, 0, false, false); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), true); try { plink->read_base(base, out, index, barlevels, lower, inter, upper, diff --git a/test/src/commander_test.cpp b/test/src/commander_test.cpp index 0e377b48..6f72845a 100644 --- a/test/src/commander_test.cpp +++ b/test/src/commander_test.cpp @@ -5,6 +5,7 @@ #include "reporter.hpp" #include "storage.hpp" #include "gtest/gtest.h" +#include TEST(COMMANDER_BASIC, INIT) { @@ -22,63 +23,1085 @@ TEST(COMMANDER_BASIC, INIT) ASSERT_DOUBLE_EQ(commander.max_memory(2.0), 2.0); } -TEST(COMMANDER_BASIC, USAGE) +class mockCommander : public Commander { - Commander commander; - Reporter reporter(std::string(path + "LOG")); - int argc = 2; - char name[7], help[7]; - strcpy(name, "PRSice"); - strcpy(help, "--help"); - char* argv[2] = {name, help}; - try +public: + static std::vector + transform_covariate(const std::string& cov_in) { - ASSERT_FALSE(commander.init(argc, argv, reporter)); + return Commander::transform_covariate(cov_in); } - catch (...) + bool check_parse_unit_value(const std::string& input, const std::string& c, + const size_t default_power, size_t& target, + bool memory = false) { - FAIL(); + return parse_unit_value(input, c, default_power, target, memory); + } + + static bool find_first_end_wrapper(const std::string_view& cov, + const size_t idx, size_t& res) + { + try + { + res = find_first_end(cov, idx); + return true; + } + catch (const std::runtime_error&) + { + return false; + } + } + static bool parse_range_wrapper(std::string_view cov, + std::vector& res) + { + try + { + res = parse_range(cov); + return true; + } + catch (std::runtime_error&) + { + return false; + } + } + static bool get_range_wrapper(std::string_view cov, size_t start, + size_t end, std::vector& res) + { + try + { + res = get_range(cov, start, end); + return true; + } + catch (std::runtime_error&) + { + return false; + } + } + static bool + update_covariate_ranges_wrapper(std::vector& result, + std::vector ranges) + { + try + { + update_covariate_range(ranges, result); + return true; + } + catch (...) + { + return false; + } + } + static bool transform_wrapper(const std::string& str, + std::vector& result) + { + try + { + result = transform_covariate(str); + return true; + } + catch (...) + { + return false; + } + } + bool parse_command_wrapper(const std::string& command) + { + bool early_terminate = false; + return parse_command_wrapper(command, early_terminate); + } + bool parse_command_wrapper(const std::string& command, + bool& early_terminate) + { + Reporter reporter(std::string("LOG"), 60, true); + std::vector argv_str = misc::split("PRSice " + command); + std::vector cstrings; + cstrings.reserve(argv_str.size()); + for (size_t i = 0; i < argv_str.size(); ++i) + { cstrings.push_back(const_cast(argv_str[i].c_str())); } + int argc = static_cast(argv_str.size()); + try + { + early_terminate = false; + // return false if error + return !init(argc, &cstrings[0], early_terminate, reporter); + } + catch (...) + { + // error = false + return false; + } } + + bool no_default() const { return m_user_no_default; } + bool target_check_wrapper() { return target_check(); } + bool prsice_check_wrapper() { return prsice_check(); } + bool clump_check_wrapper() { return clump_check(); } + bool ref_check_wrapper() { return ref_check(); } + bool misc_check_wrapper() { return misc_check(); } + bool filter_check_wrapper() { return filter_check(); } + bool prset_check_wrapper() { return prset_check(); } + bool base_check_wrapper() + { + try + { + return base_check(); + } + catch (const std::runtime_error&) + { + return false; + } + } + bool base_column_check_wrapper(std::vector& column_names) + { + return base_column_check(column_names); + } + bool pheno_check_wrapper(bool is_beta) + { + m_ran_base_check = true; + m_base_info.is_beta = is_beta; + m_base_info.is_or = !is_beta; + return pheno_check(); + } + std::string get_error() const { return m_error_message; } + int32_t max_thread() { return maximum_thread(); } + auto get_cov_names_wrap() { return get_cov_names(); } + size_t + find_cov_idx_wrap(const std::unordered_set& included, + const std::unordered_map& ref_index, + std::string& missing) + { + return find_cov_idx(included, ref_index, missing); + } + void reorganize_cov_name_wrap(const std::vector& cov_header) + { + reorganize_cov_name(cov_header); + } + bool process_factor_cov_wrap( + const std::unordered_set& included, + const std::unordered_map& ref_index, + const std::unordered_set& ori_input) + { + return process_factor_cov(included, ref_index, ori_input); + } +}; + +TEST(COMMAND_PARSING, USAGE) +{ + mockCommander commander; + bool early_terminate = false; + ASSERT_FALSE(commander.parse_command_wrapper("--help", early_terminate)); + ASSERT_TRUE(early_terminate); + ASSERT_FALSE(commander.parse_command_wrapper("-h", early_terminate)); + ASSERT_TRUE(early_terminate); + // this is a throw error + ASSERT_FALSE(commander.parse_command_wrapper("", early_terminate)); + ASSERT_FALSE(early_terminate); + // this should fail, as ? is reserved for invalid operators + ASSERT_FALSE(commander.parse_command_wrapper("-?", early_terminate)); + ASSERT_FALSE(early_terminate); + // version check should be similar to --help + ASSERT_FALSE(commander.parse_command_wrapper("-v", early_terminate)); + ASSERT_TRUE(early_terminate); + ASSERT_FALSE(commander.parse_command_wrapper("--version", early_terminate)); + ASSERT_TRUE(early_terminate); +} + +void check_bar_threshold(const std::string& command, + const std::vector& expected, + const bool expect_fail) +{ + mockCommander commander; + if (expect_fail) { ASSERT_FALSE(commander.parse_command_wrapper(command)); } + else + { + ASSERT_TRUE(commander.parse_command_wrapper(command)); + auto res = commander.get_p_threshold(); + ASSERT_EQ(expected.size(), res.bar_levels.size()); + for (size_t i = 0; i < res.bar_levels.size(); ++i) + { ASSERT_DOUBLE_EQ(res.bar_levels[i], expected[i]); } + } +} +TEST(COMMAND_PARSING, BAR_LEVELS_VALID) +{ + // valid + check_bar_threshold("--bar-levels 0.1,0.2,0.3,0.4,0.5", + std::vector {0.1, 0.2, 0.3, 0.4, 0.5}, false); + // we have not deal with duplicates yet + check_bar_threshold("--bar-levels 0.1,0.2,0.3,0.3,0.4,0.5", + std::vector {0.1, 0.2, 0.3, 0.3, 0.4, 0.5}, + false); + // Have not sorted either + check_bar_threshold("--bar-levels 0.5,0.2,0.3,0.3,0.4,0.1", + std::vector {0.5, 0.2, 0.3, 0.3, 0.4, 0.1}, + false); + // supposed to fail but init didn't check for these either + // negative number is no no + check_bar_threshold("--bar-levels 0.1,-0.2,0.3,0.4,0.5", + std::vector {0.1, -0.2, 0.3, 0.4, 0.5}, false); + // No zero surely? + check_bar_threshold("--bar-levels 0,0.2,0.3,0.3,0.4,0.5", + std::vector {0, 0.2, 0.3, 0.3, 0.4, 0.5}, + false); + // Number that is too big is also prohibited + check_bar_threshold("--bar-levels 0.5,0.2,0.3,0.3,0.4,0.1,2", + std::vector {0.5, 0.2, 0.3, 0.3, 0.4, 0.1, 2}, + false); +} +TEST(COMMAND_PARSING, BAR_LEVELS_INVALID) +{ + // the only situation where this will fail in init is if there are + // non-numeric inputs + check_bar_threshold("--bar-levels 0.1,0.2,0.3,a,0.4,0.5", + std::vector {}, true); + // if the value overflow,it should also error out + check_bar_threshold("--bar-levels 0.1,0.2,0.3,0.4,1.79769e+309", + std::vector {}, true); +} +TEST(COMMAND_PARSING, FASTSCORE) +{ + mockCommander set; + std::string command = "--fastscore"; + // default is not using --fastscore + ASSERT_FALSE(set.get_p_threshold().fastscore); + ASSERT_TRUE(set.parse_command_wrapper(command)); + ASSERT_TRUE(set.get_p_threshold().fastscore); +} +TEST(COMMAND_PARSING, NO_FULL) +{ + mockCommander set; + std::string command = "--no-full"; + ASSERT_FALSE(set.get_p_threshold().no_full); + ASSERT_TRUE(set.parse_command_wrapper(command)); + ASSERT_TRUE(set.get_p_threshold().no_full); +} +TEST(COMMAND_PARSING, NO_CLUMP) +{ + mockCommander set; + std::string command = "--no-clump"; + ASSERT_FALSE(set.get_clump_info().no_clump); + ASSERT_TRUE(set.parse_command_wrapper(command)); + ASSERT_TRUE(set.get_clump_info().no_clump); +} +TEST(COMMAND_PARSING, HARD_CODED) +{ + mockCommander set; + std::string command = "--hard"; + ASSERT_FALSE(set.get_target().hard_coded); + ASSERT_TRUE(set.parse_command_wrapper(command)); + ASSERT_TRUE(set.get_target().hard_coded); +} +TEST(COMMAND_PARSING, ALLOW_INTER) +{ + mockCommander set; + std::string command = "--allow-inter"; + ASSERT_FALSE(set.use_inter()); + ASSERT_TRUE(set.parse_command_wrapper(command)); + ASSERT_TRUE(set.use_inter()); +} +TEST(COMMAND_PARSING, NON_FOUNDERS) +{ + mockCommander set; + std::string command = "--nonfounders"; + ASSERT_FALSE(set.nonfounders()); + ASSERT_TRUE(set.parse_command_wrapper(command)); + ASSERT_TRUE(set.nonfounders()); +} +TEST(COMMAND_PARSING, BETA) +{ + mockCommander set; + std::string command = "--beta"; + ASSERT_FALSE(set.get_base().is_beta); + ASSERT_FALSE(set.get_base().is_or); + ASSERT_TRUE(set.parse_command_wrapper(command)); + ASSERT_TRUE(set.get_base().is_beta); + ASSERT_FALSE(set.get_base().is_or); +} +TEST(COMMAND_PARSING, OR) +{ + mockCommander set; + std::string command = "--or"; + ASSERT_FALSE(set.get_base().is_or); + ASSERT_FALSE(set.get_base().is_beta); + ASSERT_TRUE(set.parse_command_wrapper(command)); + ASSERT_TRUE(set.get_base().is_or); + ASSERT_FALSE(set.get_base().is_beta); +} +TEST(COMMAND_PARSING, INDEX) +{ + mockCommander set; + std::string command = "--index"; + ASSERT_FALSE(set.get_base().is_index); + ASSERT_TRUE(set.parse_command_wrapper(command)); + ASSERT_TRUE(set.get_base().is_index); +} +TEST(COMMAND_PARSING, ALLSCORE) +{ + mockCommander set; + std::string command = "--all-score"; + // default is not using --allscore + ASSERT_FALSE(set.all_scores()); + ASSERT_TRUE(set.parse_command_wrapper(command)); + ASSERT_TRUE(set.all_scores()); +} +TEST(COMMAND_PARSING, IGNORE_FID) +{ + mockCommander set; + std::string command = "--ignore-fid"; + ASSERT_FALSE(set.get_pheno().ignore_fid); + ASSERT_TRUE(set.parse_command_wrapper(command)); + ASSERT_TRUE(set.get_pheno().ignore_fid); +} +TEST(COMMAND_PARSING, KEEP_AMBIG) +{ + mockCommander set; + std::string command = "--keep-ambig"; + ASSERT_FALSE(set.keep_ambig()); + ASSERT_TRUE(set.parse_command_wrapper(command)); + ASSERT_TRUE(set.keep_ambig()); +} + +TEST(COMMAND_PARSING, NON_CUMULATE) +{ + mockCommander set; + std::string command = "--non-cumulate"; + ASSERT_FALSE(set.get_prs_instruction().non_cumulate); + ASSERT_TRUE(set.parse_command_wrapper(command)); + ASSERT_TRUE(set.get_prs_instruction().non_cumulate); +} +TEST(COMMAND_PARSING, NO_REGRESS) +{ + mockCommander set; + std::string command = "--no-regress"; + ASSERT_FALSE(set.get_prs_instruction().no_regress); + ASSERT_TRUE(set.parse_command_wrapper(command)); + ASSERT_TRUE(set.get_prs_instruction().no_regress); +} +TEST(COMMAND_PARSING, PRINT_SNP) +{ + mockCommander set; + std::string command = "--print-snp"; + ASSERT_FALSE(set.print_snp()); + ASSERT_TRUE(set.parse_command_wrapper(command)); + ASSERT_TRUE(set.print_snp()); +} +TEST(COMMAND_PARSING, USE_REF_MAF) +{ + mockCommander set; + std::string command = "--use-ref-maf"; + ASSERT_FALSE(set.get_prs_instruction().use_ref_maf); + ASSERT_TRUE(set.parse_command_wrapper(command)); + ASSERT_TRUE(set.get_prs_instruction().use_ref_maf); +} +TEST(COMMAND_PARSING, LOGIT_PERM) +{ + mockCommander set; + std::string command = "--logit-perm"; + ASSERT_FALSE(set.get_perm().logit_perm); + ASSERT_TRUE(set.parse_command_wrapper(command)); + ASSERT_TRUE(set.get_perm().logit_perm); +} +TEST(COMMAND_PARSING, FULL_BACK) +{ + mockCommander set; + std::string command = "--full-back"; + ASSERT_FALSE(set.get_set().full_as_background); + ASSERT_TRUE(set.parse_command_wrapper(command)); + ASSERT_TRUE(set.get_set().full_as_background); +} +TEST(COMMAND_PARSING, NO_DEFAULT) +{ + mockCommander set; + std::string command = "--no-default"; + ASSERT_FALSE(set.no_default()); + ASSERT_TRUE(set.parse_command_wrapper(command)); + ASSERT_TRUE(set.no_default()); +} +std::string get_base_name(const mockCommander& commander, size_t idx) +{ + return commander.get_base().column_name[idx]; +} +bool get_has_base(const mockCommander& commander, size_t idx) +{ + return commander.get_base().has_column[idx]; +} +void check_set_base_flag(const std::string& command, + const std::string& expected, + const std::string& default_str, size_t idx) +{ + mockCommander commander; + ASSERT_STREQ(get_base_name(commander, idx).c_str(), default_str.c_str()); + ASSERT_FALSE(get_has_base(commander, idx)); + if (default_str != expected) + ASSERT_STRNE(get_base_name(commander, idx).c_str(), expected.c_str()); + ASSERT_TRUE(commander.parse_command_wrapper(command + " " + expected)); + ASSERT_STREQ(get_base_name(commander, idx).c_str(), expected.c_str()); + ASSERT_TRUE(get_has_base(commander, idx)); } +TEST(COMMAND_PARSING, SET_BASE) +{ + check_set_base_flag("--A1", "a", "A1", +BASE_INDEX::EFFECT); + check_set_base_flag("--a1", "b", "A1", +BASE_INDEX::EFFECT); + check_set_base_flag("--A2", "c", "A2", +BASE_INDEX::NONEFFECT); + check_set_base_flag("--a2", "d", "A2", +BASE_INDEX::NONEFFECT); + check_set_base_flag("--stat", "statistic", "", +BASE_INDEX::STAT); + check_set_base_flag("--pvalue", "insignificant", "P", +BASE_INDEX::P); + check_set_base_flag("-p", "postdoc", "P", +BASE_INDEX::P); + check_set_base_flag("--chr", "chromosome", "CHR", +BASE_INDEX::CHR); + check_set_base_flag("--bp", "location", "BP", +BASE_INDEX::BP); + check_set_base_flag("--snp", "cnv", "SNP", +BASE_INDEX::RS); + mockCommander commander; + ASSERT_TRUE(commander.get_base().file_name.empty()); + ASSERT_TRUE(commander.parse_command_wrapper("--base BaseInfo")); + ASSERT_STREQ(commander.get_base().file_name.c_str(), "BaseInfo"); + ASSERT_TRUE(commander.parse_command_wrapper("-b Basic")); + ASSERT_STREQ(commander.get_base().file_name.c_str(), "Basic"); + check_set_base_flag("--base-info", "INFO_FILTER", "INFO:0.9", + +BASE_INDEX::INFO); + check_set_base_flag("--base-maf", "MAF_FILTER", "", +BASE_INDEX::MAF); +} -TEST(COMMANDER_BASIC, NO_ARG) +void check_binary_target(const std::string& command, + const std::vector expected, bool expect_fail) { - Commander commander; - Reporter reporter(std::string(path + "LOG")); - int argc = 1; - std::string name = "PRSice"; - char name_c[7]; - strcpy(name_c, name.c_str()); - char* argv[1] = {name_c}; - try + mockCommander commander; + // no default at the beginning + ASSERT_TRUE(commander.get_pheno().binary.empty()); + bool success = + commander.parse_command_wrapper("--binary-target " + command); + if (expect_fail) { ASSERT_FALSE(success); } + else { - commander.init(argc, argv, reporter); - FAIL(); + ASSERT_TRUE(success); + ASSERT_EQ(commander.get_pheno().binary.size(), expected.size()); + for (size_t i = 0; i < expected.size(); ++i) + { ASSERT_EQ(commander.get_pheno().binary[i], expected[i]); } } - catch (...) +} +TEST(COMMAND_PARSING, BINARY_TARGET_INVALID) +{ + // we no longer allow numeric representation of T/F as we need those for + // parsing + check_binary_target("1", std::vector {true}, true); + check_binary_target("0", std::vector {false}, true); + // way too much + check_binary_target("1e200T", std::vector {false}, true); + // Wrong spelling + check_binary_target("Tru", std::vector {false}, true); + // Non numeric multiplier + check_binary_target("aT", std::vector {false}, true); + // negative multiplier + check_binary_target("-1T", std::vector {false}, true); + check_binary_target("F,-10T", std::vector {false}, true); + // this in theory is correct, but as the second argument starts with -, and + // PRSice doesn't have a -1 parameter, it will cause an error + check_binary_target("F, -10T", std::vector {false}, true); + // this is "valid" but wrong in the sense that 3F will not be processed and + // PRSice can in theory continue to run until we reach check + check_binary_target("F,2T, 3F", std::vector {false, true, true}, + false); +} +TEST(COMMAND_PARSING, BINARY_TARGET_VALID) +{ + // try differnent form of binary target input + // valid + check_binary_target("T", std::vector {true}, false); + check_binary_target("True", std::vector {true}, false); + check_binary_target("true", std::vector {true}, false); + check_binary_target("1true", std::vector {true}, false); + check_binary_target("1T", std::vector {true}, false); + check_binary_target("F", std::vector {false}, false); + check_binary_target("False", std::vector {false}, false); + check_binary_target("false", std::vector {false}, false); + check_binary_target("1false", std::vector {false}, false); + check_binary_target("1F", std::vector {false}, false); + // more complex + check_binary_target("4T", std::vector {true, true, true, true}, + false); + check_binary_target( + "6F", std::vector {false, false, false, false, false, false}, + false); + check_binary_target("True,3F", + std::vector {true, false, false, false}, false); + check_binary_target("True,3F", + std::vector {true, false, false, false}, false); + // check if it append properly + mockCommander commander; + // no default at the beginning + ASSERT_TRUE(commander.get_pheno().binary.empty()); + std::vector expected = {true, true, true, false}; + ASSERT_TRUE(commander.parse_command_wrapper("--binary-target 3T,F")); + ASSERT_EQ(commander.get_pheno().binary.size(), expected.size()); + for (size_t i = 0; i < expected.size(); ++i) + { ASSERT_EQ(commander.get_pheno().binary[i], expected[i]); } + // now second invoke of --binary-target + ASSERT_TRUE(commander.parse_command_wrapper("--binary-target 2T")); + expected.push_back(true); + expected.push_back(true); + for (size_t i = 0; i < expected.size(); ++i) + { ASSERT_EQ(commander.get_pheno().binary[i], expected[i]); } +} +TEST(COMMAND_PARSING, DOSAGE) +{ + mockCommander commander; + ASSERT_DOUBLE_EQ(commander.get_target_qc().dose_threshold, 0.0); + ASSERT_DOUBLE_EQ(commander.get_target_qc().hard_threshold, 0.1); + ASSERT_TRUE(commander.parse_command_wrapper("--dose-thres 1.0")); + ASSERT_DOUBLE_EQ(commander.get_target_qc().dose_threshold, 1.0); + ASSERT_TRUE(commander.parse_command_wrapper("--hard-thres -0.1")); + ASSERT_DOUBLE_EQ(commander.get_target_qc().hard_threshold, -0.1); + // out bound check + ASSERT_FALSE(commander.parse_command_wrapper("--hard-thres 1e400")); + ASSERT_DOUBLE_EQ(commander.get_ref_qc().dose_threshold, 0.0); + ASSERT_DOUBLE_EQ(commander.get_ref_qc().hard_threshold, 0.1); + ASSERT_TRUE(commander.parse_command_wrapper("--ld-dose-thres 1.0")); + ASSERT_DOUBLE_EQ(commander.get_ref_qc().dose_threshold, 1.0); + ASSERT_TRUE(commander.parse_command_wrapper("--ld-hard-thres -0.1")); + ASSERT_DOUBLE_EQ(commander.get_ref_qc().hard_threshold, -0.1); + // out bound check + ASSERT_FALSE(commander.parse_command_wrapper("--ld-hard-thres 1e400")); +} +TEST(COMMAND_PARSING, TARGET_DEFAULT) +{ + mockCommander commander; + // check default values + ASSERT_FALSE(commander.get_target().is_ref); + ASSERT_DOUBLE_EQ(commander.get_target_qc().geno, 1.0); + ASSERT_DOUBLE_EQ(commander.get_target_qc().info_score, 0.0); + ASSERT_DOUBLE_EQ(commander.get_target_qc().maf, 0.0); + ASSERT_TRUE(commander.get_pheno().pheno_file.empty()); + ASSERT_TRUE(commander.get_pheno().pheno_col.empty()); + ASSERT_TRUE(commander.get_pheno().prevalence.empty()); + ASSERT_TRUE(commander.get_target().remove.empty()); + ASSERT_TRUE(commander.get_target().keep.empty()); + ASSERT_TRUE(commander.get_target().file_name.empty()); + ASSERT_TRUE(commander.get_target().file_list.empty()); + ASSERT_STREQ(commander.get_target().type.c_str(), "bed"); +} + +TEST(COMMAND_PARSING, CLUMP_DEFAULT) +{ + mockCommander commander; + // check default values + ASSERT_TRUE(commander.get_reference().is_ref); + ASSERT_DOUBLE_EQ(commander.get_target_qc().geno, 1.0); + ASSERT_DOUBLE_EQ(commander.get_target_qc().info_score, 0.0); + ASSERT_DOUBLE_EQ(commander.get_target_qc().maf, 0.0); + ASSERT_TRUE(commander.get_reference().remove.empty()); + ASSERT_TRUE(commander.get_reference().keep.empty()); + ASSERT_TRUE(commander.get_reference().file_name.empty()); + ASSERT_TRUE(commander.get_reference().file_list.empty()); + ASSERT_STREQ(commander.get_reference().type.c_str(), "bed"); + ASSERT_DOUBLE_EQ(commander.get_clump_info().r2, 0.1); + ASSERT_DOUBLE_EQ(commander.get_clump_info().proxy, 0.0); + ASSERT_EQ(commander.get_clump_info().distance, 250000); + ASSERT_FALSE(commander.get_clump_info().provided_distance); + ASSERT_DOUBLE_EQ(commander.get_clump_info().pvalue, 1.0); +} +TEST(COMMAND_PARSING, CLUMP_SETTINGS) +{ + mockCommander commander; + ASSERT_TRUE(commander.parse_command_wrapper("--clump-p 0.1")); + ASSERT_DOUBLE_EQ(commander.get_clump_info().pvalue, 0.1); + ASSERT_TRUE(commander.parse_command_wrapper("--clump-r2 0.5")); + ASSERT_DOUBLE_EQ(commander.get_clump_info().r2, 0.5); + ASSERT_TRUE(commander.parse_command_wrapper("--clump-kb 100")); + ASSERT_DOUBLE_EQ(commander.get_clump_info().distance, 100000); + ASSERT_TRUE(commander.get_clump_info().provided_distance); + ASSERT_TRUE(commander.parse_command_wrapper("--clump-kb 100kb")); + ASSERT_DOUBLE_EQ(commander.get_clump_info().distance, 100000); + ASSERT_TRUE(commander.parse_command_wrapper("--clump-kb 100b")); + ASSERT_DOUBLE_EQ(commander.get_clump_info().distance, 100); + ASSERT_TRUE(commander.parse_command_wrapper("--clump-kb 200mb")); + ASSERT_DOUBLE_EQ(commander.get_clump_info().distance, 200000000); + ASSERT_FALSE(commander.parse_command_wrapper("--clump-kb -100kb")); +} +TEST(COMMAND_PARSING, PRSET) +{ + // wind-3 and --wind-5 use the same function as clump + mockCommander commander; + ASSERT_EQ(commander.get_set().wind_3, 0); + ASSERT_EQ(commander.get_set().wind_5, 0); + // default is bp + ASSERT_TRUE(commander.parse_command_wrapper("--wind-5 10")); + ASSERT_EQ(commander.get_set().wind_5, 10); + ASSERT_TRUE(commander.parse_command_wrapper("--wind-3 20k")); + ASSERT_EQ(commander.get_set().wind_3, 20000); + // now check the background stuff + ASSERT_TRUE(commander.get_set().background.empty()); + ASSERT_TRUE(commander.get_set().msigdb.empty()); + ASSERT_TRUE(commander.get_set().bed.empty()); + ASSERT_TRUE(commander.get_set().snp.empty()); + ASSERT_TRUE(commander.get_set().feature.empty()); + ASSERT_TRUE(commander.get_set().gtf.empty()); + ASSERT_FALSE(commander.get_set().run); + ASSERT_TRUE(commander.exclusion_range().empty()); + // now check if they are loaded correctly (doesn't have to be in correct + // format at the moment) + ASSERT_TRUE(commander.parse_command_wrapper("--background Name:0")); + ASSERT_STREQ(commander.get_set().background.c_str(), "Name:0"); + ASSERT_TRUE(commander.parse_command_wrapper("--msigdb kegg")); + ASSERT_EQ(commander.get_set().msigdb.size(), 1); + ASSERT_STREQ(commander.get_set().msigdb[0].c_str(), "kegg"); + ASSERT_TRUE(commander.parse_command_wrapper("-m Reactome,MP")); + // it append + ASSERT_EQ(commander.get_set().msigdb.size(), 3); + ASSERT_STREQ(commander.get_set().msigdb[0].c_str(), "kegg"); + ASSERT_STREQ(commander.get_set().msigdb[1].c_str(), "Reactome"); + ASSERT_STREQ(commander.get_set().msigdb[2].c_str(), "MP"); + // GTF + ASSERT_TRUE(commander.parse_command_wrapper("--gtf Homo")); + ASSERT_STREQ(commander.get_set().gtf.c_str(), "Homo"); + ASSERT_TRUE(commander.parse_command_wrapper("-g Misc")); + ASSERT_STREQ(commander.get_set().gtf.c_str(), "Misc"); + // bed B + ASSERT_TRUE(commander.parse_command_wrapper("--bed File:Name")); + ASSERT_EQ(commander.get_set().bed.size(), 1); + ASSERT_STREQ(commander.get_set().bed[0].c_str(), "File:Name"); + ASSERT_TRUE(commander.parse_command_wrapper("-B Something,oK")); + ASSERT_STREQ(commander.get_set().bed[0].c_str(), "File:Name"); + ASSERT_STREQ(commander.get_set().bed[1].c_str(), "Something"); + ASSERT_STREQ(commander.get_set().bed[2].c_str(), "oK"); + // snp-set + ASSERT_TRUE(commander.parse_command_wrapper("--snp-set list,of,snp")); + ASSERT_EQ(commander.get_set().snp.size(), 3); + ASSERT_STREQ(commander.get_set().snp[0].c_str(), "list"); + ASSERT_STREQ(commander.get_set().snp[1].c_str(), "of"); + ASSERT_STREQ(commander.get_set().snp[2].c_str(), "snp"); + // feature + ASSERT_TRUE(commander.parse_command_wrapper("--feature gene")); + ASSERT_EQ(commander.get_set().feature.size(), 1); + ASSERT_STREQ(commander.get_set().feature[0].c_str(), "gene"); + // no duplicate check + ASSERT_TRUE(commander.parse_command_wrapper("--feature protein,gene")); + ASSERT_EQ(commander.get_set().feature.size(), 3); + ASSERT_STREQ(commander.get_set().feature[0].c_str(), "gene"); + ASSERT_STREQ(commander.get_set().feature[1].c_str(), "protein"); + ASSERT_STREQ(commander.get_set().feature[2].c_str(), "gene"); + // Exclusion range is a direct loading + ASSERT_TRUE(commander.parse_command_wrapper("--x-range chr6:1-10")); + ASSERT_STREQ(commander.exclusion_range().c_str(), "chr6:1-10"); + // we don't even tokenize it + ASSERT_TRUE( + commander.parse_command_wrapper("--x-range chr6:1-10,chr22:133:288")); + ASSERT_STREQ(commander.exclusion_range().c_str(), + "chr6:1-10,chr22:133:288"); +} + +TEST(COMMAND_PARSING, MISC) +{ + mockCommander commander; + // check defaults + ASSERT_STREQ(commander.out().c_str(), "PRSice"); + ASSERT_EQ(commander.get_prs_instruction().thread, 1); + ASSERT_EQ(commander.memory(), 1e10); + ASSERT_STREQ(commander.delim().c_str(), " "); + ASSERT_TRUE(commander.exclude_file().empty()); + ASSERT_TRUE(commander.extract_file().empty()); + ASSERT_TRUE(commander.parse_command_wrapper("--out PRSet")); + ASSERT_STREQ(commander.out().c_str(), "PRSet"); + int32_t max_thread = commander.max_thread(); + if (max_thread > 2) { - SUCCEED(); + ASSERT_TRUE(commander.parse_command_wrapper("--thread 2")); + ASSERT_EQ(commander.get_prs_instruction().thread, 2); } + ASSERT_TRUE(commander.parse_command_wrapper("--thread " + + std::to_string(max_thread))); + ASSERT_EQ(commander.get_prs_instruction().thread, max_thread); + // reset it first + ASSERT_TRUE(commander.parse_command_wrapper("--thread 1")); + ASSERT_EQ(commander.get_prs_instruction().thread, 1); + ASSERT_TRUE(commander.parse_command_wrapper("--thread max")); + ASSERT_EQ(commander.get_prs_instruction().thread, max_thread); + // reset again + ASSERT_TRUE(commander.parse_command_wrapper( + "--thread " + std::to_string(max_thread * 2))); + ASSERT_EQ(commander.get_prs_instruction().thread, max_thread); + ASSERT_TRUE(commander.parse_command_wrapper("--thread " + + std::to_string(max_thread))); + ASSERT_EQ(commander.get_prs_instruction().thread, max_thread); + ASSERT_TRUE(commander.parse_command_wrapper("--extract Love")); + ASSERT_STREQ(commander.extract_file().c_str(), "Love"); + ASSERT_TRUE(commander.parse_command_wrapper("--exclude Hate")); + ASSERT_STREQ(commander.exclude_file().c_str(), "Hate"); + /* + not sure how to do proper escape here, will only do very simple cases + ASSERT_TRUE(commander.parse_command_wrapper("--id-delim \"-\"")); + ASSERT_STREQ(commander.delim().c_str(), "-"); + ASSERT_TRUE(commander.parse_command_wrapper("--id-delim \" \"")); + ASSERT_STREQ(commander.delim().c_str(), " "); + */ + ASSERT_TRUE(commander.parse_command_wrapper("--id-delim -")); + ASSERT_STREQ(commander.delim().c_str(), "-"); + ASSERT_TRUE(commander.parse_command_wrapper("--memory 1k")); + ASSERT_EQ(commander.memory(), 1024); + // default is mb + ASSERT_TRUE(commander.parse_command_wrapper("--memory 10")); + ASSERT_EQ(commander.memory(), 10485760); + ASSERT_TRUE(commander.parse_command_wrapper("--memory 1gb")); + ASSERT_EQ(commander.memory(), 1073741824); + ASSERT_TRUE(commander.parse_command_wrapper("--memory 30tb")); + ASSERT_EQ(commander.memory(), 32985348833280); + // the default of seed is random, which is difficult to test. So we will + // just check if we set the seed correctly + ASSERT_TRUE(commander.parse_command_wrapper("--seed 123")); + ASSERT_EQ(commander.get_perm().seed, 123); + // check permutation default + ASSERT_EQ(commander.get_perm().num_permutation, 0); + ASSERT_FALSE(commander.get_perm().run_perm); + ASSERT_FALSE(commander.get_perm().run_set_perm); + ASSERT_TRUE(commander.parse_command_wrapper("--perm 100")); + ASSERT_EQ(commander.get_perm().num_permutation, 100); + ASSERT_TRUE(commander.get_perm().run_perm); + ASSERT_FALSE(commander.get_perm().run_set_perm); + ASSERT_TRUE(commander.parse_command_wrapper("--set-perm 1026")); + ASSERT_EQ(commander.get_perm().num_permutation, 1026); + ASSERT_TRUE(commander.get_perm().run_set_perm); + // we won't change the other + ASSERT_TRUE(commander.get_perm().run_perm); + // now check for overflow + ASSERT_FALSE(commander.parse_command_wrapper("--set-perm 1e200")); + // number of autosome + ASSERT_EQ(commander.get_target().num_autosome, 22); + ASSERT_EQ(commander.get_reference().num_autosome, 22); + ASSERT_TRUE(commander.parse_command_wrapper("--num-auto 1")); + ASSERT_EQ(commander.get_target().num_autosome, 1); + ASSERT_EQ(commander.get_reference().num_autosome, 1); + ASSERT_TRUE(commander.parse_command_wrapper("--num-auto -100")); + ASSERT_EQ(commander.get_target().num_autosome, -100); + ASSERT_FALSE(commander.parse_command_wrapper("--num-auto 1e100")); } -class CovariateTest : public Commander +TEST(COMMAND_PARSING, PRS_MODEL_THRESHOLD) { -public: - static std::vector - transform_covariate(const std::string& cov_in) + mockCommander commander; + ASSERT_DOUBLE_EQ(commander.get_p_threshold().inter, 0.00005); + ASSERT_DOUBLE_EQ(commander.get_p_threshold().lower, 5e-8); + ASSERT_DOUBLE_EQ(commander.get_p_threshold().upper, 0.5); + ASSERT_TRUE(commander.parse_command_wrapper("--inter 1e-10")); + ASSERT_DOUBLE_EQ(commander.get_p_threshold().inter, 1e-10); + ASSERT_TRUE(commander.parse_command_wrapper("-i 110")); + ASSERT_DOUBLE_EQ(commander.get_p_threshold().inter, 110); + ASSERT_TRUE(commander.parse_command_wrapper("--lower 1e-20")); + ASSERT_DOUBLE_EQ(commander.get_p_threshold().lower, 1e-20); + ASSERT_TRUE(commander.parse_command_wrapper("-l 123")); + ASSERT_DOUBLE_EQ(commander.get_p_threshold().lower, 123); + ASSERT_TRUE(commander.parse_command_wrapper("--upper 5e-70")); + ASSERT_DOUBLE_EQ(commander.get_p_threshold().upper, 5e-70); + ASSERT_TRUE(commander.parse_command_wrapper("-u 5e10")); + ASSERT_DOUBLE_EQ(commander.get_p_threshold().upper, 5e10); + // extreme value + ASSERT_TRUE(commander.parse_command_wrapper("-u 5e300")); + ASSERT_DOUBLE_EQ(commander.get_p_threshold().upper, 5e300); + // fail + ASSERT_FALSE(commander.parse_command_wrapper("-u 5e400")); + ASSERT_FALSE(commander.parse_command_wrapper("-l -5e400")); + ASSERT_FALSE(commander.parse_command_wrapper("-i hi")); + // MODEL and SCORES + ASSERT_EQ(commander.get_prs_instruction().scoring_method, SCORING::AVERAGE); + ASSERT_TRUE(commander.parse_command_wrapper("--score Sum")); + ASSERT_EQ(commander.get_prs_instruction().scoring_method, SCORING::SUM); + ASSERT_TRUE(commander.parse_command_wrapper("--score std")); + ASSERT_EQ(commander.get_prs_instruction().scoring_method, + SCORING::STANDARDIZE); + ASSERT_TRUE(commander.parse_command_wrapper("--score con-std")); + ASSERT_EQ(commander.get_prs_instruction().scoring_method, + SCORING::CONTROL_STD); + ASSERT_TRUE(commander.parse_command_wrapper("--score avg")); + ASSERT_EQ(commander.get_prs_instruction().scoring_method, SCORING::AVERAGE); + // we do exact match + ASSERT_FALSE(commander.parse_command_wrapper("--score averaging")); + + ASSERT_EQ(commander.get_prs_instruction().missing_score, + MISSING_SCORE::MEAN_IMPUTE); + ASSERT_TRUE(commander.parse_command_wrapper("--missing SET_Zero")); + ASSERT_EQ(commander.get_prs_instruction().missing_score, + MISSING_SCORE::SET_ZERO); + ASSERT_TRUE(commander.parse_command_wrapper("--missing Center")); + ASSERT_EQ(commander.get_prs_instruction().missing_score, + MISSING_SCORE::CENTER); + ASSERT_TRUE(commander.parse_command_wrapper("--missing mean_impute")); + ASSERT_EQ(commander.get_prs_instruction().missing_score, + MISSING_SCORE::MEAN_IMPUTE); + // Allowed, but don't think I have implemented this yet + ASSERT_TRUE(commander.parse_command_wrapper("--missing IMPUTE_CONTROL")); + ASSERT_EQ(commander.get_prs_instruction().missing_score, + MISSING_SCORE::IMPUTE_CONTROL); + // We only matched the first character + ASSERT_TRUE(commander.parse_command_wrapper("--missing cat")); + ASSERT_EQ(commander.get_prs_instruction().missing_score, + MISSING_SCORE::CENTER); + // but should fail if we have something that starts with different character + ASSERT_FALSE(commander.parse_command_wrapper("--missing beatrice")); + ASSERT_TRUE(commander.parse_command_wrapper("--model dom")); + ASSERT_EQ(commander.get_prs_instruction().genetic_model, MODEL::DOMINANT); + ASSERT_TRUE(commander.parse_command_wrapper("--model het")); + ASSERT_EQ(commander.get_prs_instruction().genetic_model, + MODEL::HETEROZYGOUS); + ASSERT_TRUE(commander.parse_command_wrapper("--model rec")); + ASSERT_EQ(commander.get_prs_instruction().genetic_model, MODEL::RECESSIVE); + ASSERT_TRUE(commander.parse_command_wrapper("--model ADD")); + ASSERT_EQ(commander.get_prs_instruction().genetic_model, MODEL::ADDITIVE); + // similar to missing + ASSERT_TRUE(commander.parse_command_wrapper("--model darwin")); + ASSERT_EQ(commander.get_prs_instruction().genetic_model, MODEL::DOMINANT); + ASSERT_FALSE(commander.parse_command_wrapper("--model mendel")); +} +void check_cov_loading(const std::string& command, + const std::vector& expected, + const bool expect_fail, const bool factor = false) +{ + mockCommander commander; + bool success = commander.parse_command_wrapper(command); + if (expect_fail) { ASSERT_FALSE(success); } + else if (!factor) { - return Commander::transform_covariate(cov_in); + ASSERT_TRUE(success); + ASSERT_EQ(expected.size(), commander.get_pheno().cov_colname.size()); + for (size_t i = 0; i < expected.size(); ++i) + { + ASSERT_STREQ(expected[i].c_str(), + commander.get_pheno().cov_colname[i].c_str()); + } } -}; + else + { + ASSERT_TRUE(success); + ASSERT_EQ(expected.size(), commander.get_pheno().factor_cov.size()); + for (size_t i = 0; i < expected.size(); ++i) + { + ASSERT_STREQ(expected[i].c_str(), + commander.get_pheno().factor_cov[i].c_str()); + } + } +} +TEST(COMMAND_PARSING, COVARIATE) +{ + mockCommander commander; + ASSERT_TRUE(commander.get_pheno().cov_file.empty()); + ASSERT_TRUE(commander.get_pheno().cov_colname.empty()); + ASSERT_TRUE(commander.get_pheno().factor_cov.empty()); + ASSERT_TRUE(commander.parse_command_wrapper("--cov Covar")); + ASSERT_STREQ(commander.get_pheno().cov_file.c_str(), "Covar"); + check_cov_loading("--cov-col Testing,@PC[1-55]", + std::vector {"Testing", "@PC[1-55]"}, false); + // this should be allowed + check_cov_loading("--cov-col Testing,@PC[1.3.5]", + std::vector {"Testing", "@PC[1.3.5]"}, + false); + // this will be stored but shouldn't pass the check + + check_cov_loading("--cov-col Testing,@PC[1,3,5]", + std::vector {"Testing", "@PC[1", "3", "5]"}, + false); + // cov-factor uses the same function as cov-col, so will only test if it is + // set properly + check_cov_loading("--cov-factor Sex", std::vector {"Sex"}, + false, true); + // can append + ASSERT_TRUE(commander.parse_command_wrapper("--cov-col Testing,@PC[1-55]")); + std::vector expected {"Testing", "@PC[1-55]"}; + ASSERT_EQ(expected.size(), commander.get_pheno().cov_colname.size()); + for (size_t i = 0; i < expected.size(); ++i) + { + ASSERT_STREQ(expected[i].c_str(), + commander.get_pheno().cov_colname[i].c_str()); + } + ASSERT_TRUE(commander.parse_command_wrapper("--cov-col More,Covariate")); + expected.push_back("More"); + expected.push_back("Covariate"); + ASSERT_EQ(expected.size(), commander.get_pheno().cov_colname.size()); + for (size_t i = 0; i < expected.size(); ++i) + { + ASSERT_STREQ(expected[i].c_str(), + commander.get_pheno().cov_colname[i].c_str()); + } +} +TEST(COMMAND_PARSING, REFERENCE_FILE) +{ + mockCommander commander; + ASSERT_TRUE(commander.get_reference().is_ref); + ASSERT_TRUE(commander.parse_command_wrapper("--ld genotype")); + ASSERT_STREQ(commander.get_reference().file_name.c_str(), "genotype"); + ASSERT_TRUE(commander.parse_command_wrapper("-L plink")); + ASSERT_STREQ(commander.get_reference().file_name.c_str(), "plink"); + ASSERT_TRUE(commander.parse_command_wrapper("--ld-list testing")); + ASSERT_STREQ(commander.get_reference().file_list.c_str(), "testing"); + // default is bed, currently don't do any check + ASSERT_TRUE(commander.parse_command_wrapper("--ld-type bgen")); + ASSERT_STREQ(commander.get_reference().type.c_str(), "bgen"); + // so in theory, we can set whatever string we like + ASSERT_TRUE(commander.parse_command_wrapper("--ld-type beatrice")); + ASSERT_STREQ(commander.get_reference().type.c_str(), "beatrice"); + // check keep and remove is correct + ASSERT_TRUE(commander.parse_command_wrapper("--ld-keep fun")); + ASSERT_STREQ(commander.get_reference().keep.c_str(), "fun"); + ASSERT_TRUE(commander.get_reference().remove.empty()); + ASSERT_TRUE(commander.parse_command_wrapper("--ld-remove depression")); + ASSERT_STREQ(commander.get_reference().remove.c_str(), "depression"); + // we should not change keep when we set keep + ASSERT_STREQ(commander.get_reference().keep.c_str(), "fun"); + ASSERT_TRUE(commander.get_reference().is_ref); +} +TEST(COMMAND_PARSING, TARGET_FILE) +{ + mockCommander commander; + ASSERT_FALSE(commander.get_target().is_ref); + ASSERT_TRUE(commander.parse_command_wrapper("--target genotype")); + ASSERT_STREQ(commander.get_target().file_name.c_str(), "genotype"); + ASSERT_TRUE(commander.parse_command_wrapper("-t plink")); + ASSERT_STREQ(commander.get_target().file_name.c_str(), "plink"); + ASSERT_TRUE(commander.parse_command_wrapper("--target-list testing")); + ASSERT_STREQ(commander.get_target().file_list.c_str(), "testing"); + // default is bed, currently don't do any check + ASSERT_TRUE(commander.parse_command_wrapper("--type bgen")); + ASSERT_STREQ(commander.get_target().type.c_str(), "bgen"); + // so in theory, we can set whatever string we like + ASSERT_TRUE(commander.parse_command_wrapper("--type beatrice")); + ASSERT_STREQ(commander.get_target().type.c_str(), "beatrice"); + // check keep and remove is correct + ASSERT_TRUE(commander.parse_command_wrapper("--keep fun")); + ASSERT_STREQ(commander.get_target().keep.c_str(), "fun"); + ASSERT_TRUE(commander.get_target().remove.empty()); + ASSERT_TRUE(commander.parse_command_wrapper("--remove depression")); + ASSERT_STREQ(commander.get_target().remove.c_str(), "depression"); + // we should not change keep when we set keep + ASSERT_STREQ(commander.get_target().keep.c_str(), "fun"); + ASSERT_FALSE(commander.get_target().is_ref); +} +TEST(COMMAND_PARSING, PHENO_SET) +{ + mockCommander commander; + ASSERT_TRUE(commander.get_pheno().pheno_file.empty()); + ASSERT_TRUE(commander.get_pheno().pheno_col.empty()); + ASSERT_TRUE(commander.get_pheno().pheno_col_idx.empty()); + ASSERT_TRUE(commander.parse_command_wrapper("--pheno Phenotype")); + ASSERT_STREQ(commander.get_pheno().pheno_file.c_str(), "Phenotype"); + ASSERT_TRUE(commander.get_pheno().pheno_col.empty()); + ASSERT_TRUE(commander.get_pheno().pheno_col_idx.empty()); + ASSERT_TRUE(commander.parse_command_wrapper("--pheno-col A1,B1")); + std::vector expected = {"A1", "B1"}; + ASSERT_EQ(commander.get_pheno().pheno_col.size(), expected.size()); + for (size_t i = 0; i < expected.size(); ++i) + { + ASSERT_STREQ(commander.get_pheno().pheno_col[i].c_str(), + expected[i].c_str()); + } + ASSERT_TRUE(commander.get_pheno().pheno_col_idx.empty()); + // We do allow multiple use of --pheno-col, though not sure if that is a + // good idea or not + ASSERT_TRUE(commander.parse_command_wrapper("--pheno-col C2,D2")); + expected = {"A1", "B1", "C2", "D2"}; + ASSERT_EQ(commander.get_pheno().pheno_col.size(), expected.size()); + for (size_t i = 0; i < expected.size(); ++i) + { + ASSERT_STREQ(commander.get_pheno().pheno_col[i].c_str(), + expected[i].c_str()); + } + // now check prevalence + ASSERT_TRUE(commander.get_pheno().prevalence.empty()); + ASSERT_TRUE(commander.parse_command_wrapper("-k 0.1,0.3,1,3")); + // there is no bound check yet + std::vector expected_prev = {0.1, 0.3, 1, 3}; + ASSERT_EQ(commander.get_pheno().prevalence.size(), expected_prev.size()); + for (size_t i = 0; i < expected.size(); ++i) + { + ASSERT_DOUBLE_EQ(commander.get_pheno().prevalence[i], expected_prev[i]); + } + // also check long flag + // use new mockCommander, as prevalence should stack + mockCommander second_command; + ASSERT_TRUE( + second_command.parse_command_wrapper("--prevalence -0.1,0.44,1e-5")); + // there is no bound check yet + expected_prev.clear(); + expected_prev = {-0.1, 0.44, 1e-5}; + ASSERT_EQ(second_command.get_pheno().prevalence.size(), + expected_prev.size()); + for (size_t i = 0; i < expected_prev.size(); ++i) + { + ASSERT_DOUBLE_EQ(second_command.get_pheno().prevalence[i], + expected_prev[i]); + } + ASSERT_TRUE( + second_command.parse_command_wrapper("--prevalence 0.1,0.3,0.5")); + // check stacking + expected_prev.push_back(0.1); + expected_prev.push_back(0.3); + expected_prev.push_back(0.5); + ASSERT_EQ(second_command.get_pheno().prevalence.size(), + expected_prev.size()); + for (size_t i = 0; i < expected.size(); ++i) + { + ASSERT_DOUBLE_EQ(second_command.get_pheno().prevalence[i], + expected_prev[i]); + } + // check out of bound + ASSERT_FALSE(second_command.parse_command_wrapper("-k 1e-400")); + // non-numeric + ASSERT_FALSE(second_command.parse_command_wrapper("-k common_disease")); +} +TEST(COMMAND_PARSING, TARGET_FILTER_CHECK) +{ + // now check the get set combo works + mockCommander commander; + // first check valid inputs + ASSERT_TRUE(commander.parse_command_wrapper("--geno 0.4")); + ASSERT_DOUBLE_EQ(commander.get_target_qc().geno, 0.4); + ASSERT_TRUE(commander.parse_command_wrapper("--info 0.2")); + ASSERT_DOUBLE_EQ(commander.get_target_qc().info_score, 0.2); + ASSERT_TRUE(commander.parse_command_wrapper("--maf 0.01")); + ASSERT_DOUBLE_EQ(commander.get_target_qc().maf, 0.01); + // out of bound input (check later, so should still be valid as of now + ASSERT_TRUE(commander.parse_command_wrapper("--geno -0.4")); + ASSERT_DOUBLE_EQ(commander.get_target_qc().geno, -0.4); + ASSERT_TRUE(commander.parse_command_wrapper("--info 20")); + ASSERT_DOUBLE_EQ(commander.get_target_qc().info_score, 20); + ASSERT_TRUE(commander.parse_command_wrapper("--maf -10.01")); + ASSERT_DOUBLE_EQ(commander.get_target_qc().maf, -10.01); + // the invalid input e.g non-numeric + ASSERT_FALSE(commander.parse_command_wrapper("--geno --0.4")); + ASSERT_FALSE(commander.parse_command_wrapper("--geno geno")); + ASSERT_FALSE(commander.parse_command_wrapper("--info --0.2")); + ASSERT_FALSE(commander.parse_command_wrapper("--info test_yourself")); + ASSERT_FALSE(commander.parse_command_wrapper("--maf -+0.01")); + ASSERT_FALSE(commander.parse_command_wrapper("--maf rare")); +} +TEST(COMMAND_PARSING, REFERENCE_FILTER_CHECK) +{ + // now check the get set combo works + mockCommander commander; + // first check valid inputs + ASSERT_TRUE(commander.parse_command_wrapper("--ld-geno 0.4")); + ASSERT_DOUBLE_EQ(commander.get_ref_qc().geno, 0.4); + ASSERT_TRUE(commander.parse_command_wrapper("--ld-info 0.2")); + ASSERT_DOUBLE_EQ(commander.get_ref_qc().info_score, 0.2); + ASSERT_TRUE(commander.parse_command_wrapper("--ld-maf 0.01")); + ASSERT_DOUBLE_EQ(commander.get_ref_qc().maf, 0.01); + // out of bound input (check later, so should still be valid as of now + ASSERT_TRUE(commander.parse_command_wrapper("--ld-geno -0.4")); + ASSERT_DOUBLE_EQ(commander.get_ref_qc().geno, -0.4); + ASSERT_TRUE(commander.parse_command_wrapper("--ld-info 20")); + ASSERT_DOUBLE_EQ(commander.get_ref_qc().info_score, 20); + ASSERT_TRUE(commander.parse_command_wrapper("--ld-maf -10.01")); + ASSERT_DOUBLE_EQ(commander.get_ref_qc().maf, -10.01); + // the invalid input e.g non-numeric + ASSERT_FALSE(commander.parse_command_wrapper("--ld-geno --0.4")); + ASSERT_FALSE(commander.parse_command_wrapper("--ld-geno geno")); + ASSERT_FALSE(commander.parse_command_wrapper("--ld-info --0.2")); + ASSERT_FALSE(commander.parse_command_wrapper("--ld-info test_yourself")); + ASSERT_FALSE(commander.parse_command_wrapper("--ld-maf -+0.01")); + ASSERT_FALSE(commander.parse_command_wrapper("--ld-maf rare")); +} void invalid_cov_input(const std::string& cov_string) { try { // invalid input std::vector results = - CovariateTest::transform_covariate(cov_string); + mockCommander::transform_covariate(cov_string); FAIL(); } catch (const std::runtime_error&) @@ -86,46 +1109,1091 @@ void invalid_cov_input(const std::string& cov_string) SUCCEED(); } } -TEST(COVARITE_TRANSFORM, TRANSFORMATION) -{ - // should not do transformation when not start with @ - std::string cov_string = "PC1"; - std::string expected = cov_string; - ASSERT_STREQ( - expected.c_str(), - CovariateTest::transform_covariate(cov_string).front().c_str()); - // same for empty string - cov_string = expected = ""; - ASSERT_STREQ( - expected.c_str(), - CovariateTest::transform_covariate(cov_string).front().c_str()); - // should be fine if the @ is in middle of the string - cov_string = expected = "PC1@Home"; - ASSERT_STREQ( - expected.c_str(), - CovariateTest::transform_covariate(cov_string).front().c_str()); - // when start with @ but not with any [], we will just remove the @ - cov_string = "@PC1"; - expected = "PC1"; - ASSERT_STREQ( - expected.c_str(), - CovariateTest::transform_covariate(cov_string).front().c_str()); - cov_string = "@PC[1-5]"; - // in this order - std::vector expected_outputs = {"PC1", "PC2", "PC3", "PC4", - "PC5"}; - std::vector results = - CovariateTest::transform_covariate(cov_string); - EXPECT_EQ(results.size(), expected_outputs.size()); - for (size_t i = 0; i < results.size(); ++i) - { EXPECT_STREQ(expected_outputs[i].c_str(), results[i].c_str()); } - invalid_cov_input("@PC[[1-5]]"); - invalid_cov_input("@PC[1-5"); - invalid_cov_input("@PC1-5]"); - invalid_cov_input("@PC[[1-5]"); - invalid_cov_input("@PC[1-5]]"); - invalid_cov_input("@PC[1-5]["); - invalid_cov_input("@PC[1-5,]"); - invalid_cov_input("@PC[,1-5]"); +TEST(COVARIATE_TRANSFORM, RANGE_CHECK) +{ + + std::string cov = "PC[1-5]"; + size_t res; + ASSERT_FALSE(mockCommander::find_first_end_wrapper(cov, 0, res)); + ASSERT_TRUE(mockCommander::find_first_end_wrapper(cov, 2, res)); + ASSERT_EQ(res, 6); + cov = "PC[1-5[1-5]]"; + ASSERT_FALSE(mockCommander::find_first_end_wrapper(cov, 0, res)); + ASSERT_FALSE(mockCommander::find_first_end_wrapper(cov, 2, res)); +} +TEST(COVARIATE_TRANSFORM, PARSE_RANGE) +{ + std::string cov = "[1-5]"; + std::vector res; + // we expect [] to be removed + ASSERT_FALSE(mockCommander::parse_range_wrapper(cov, res)); + cov = "1-5"; + res.clear(); + ASSERT_TRUE(mockCommander::parse_range_wrapper(cov, res)); + ASSERT_EQ(res.size(), 5); + for (size_t i = 0; i < res.size(); ++i) ASSERT_EQ(res[i], i + 1); + cov = "10-50"; + res.clear(); + ASSERT_TRUE(mockCommander::parse_range_wrapper(cov, res)); + ASSERT_EQ(res.size(), 41); + for (size_t i = 0; i < res.size(); ++i) ASSERT_EQ(res[i], i + 10); + cov = "50-10"; + res.clear(); + ASSERT_TRUE(mockCommander::parse_range_wrapper(cov, res)); + ASSERT_EQ(res.size(), 41); + for (size_t i = 0; i < res.size(); ++i) ASSERT_EQ(res[i], i + 10); + cov = "10"; + res.clear(); + ASSERT_TRUE(mockCommander::parse_range_wrapper(cov, res)); + ASSERT_EQ(res.size(), 1); + ASSERT_EQ(res[0], 10); + // should not work for negative + cov = "-1"; + res.clear(); + ASSERT_FALSE(mockCommander::parse_range_wrapper(cov, res)); + cov = "1--5"; + res.clear(); + ASSERT_FALSE(mockCommander::parse_range_wrapper(cov, res)); + cov = "-1--5"; + res.clear(); + ASSERT_FALSE(mockCommander::parse_range_wrapper(cov, res)); + res.clear(); + cov = "1,5"; + // we assume , is already dealt with + ASSERT_FALSE(mockCommander::parse_range_wrapper(cov, res)); +} + +TEST(COVARIATE_TRANSFORM, GET_RANGE) +{ + std::string cov = "PC[1-5]"; + std::vector res; + // format cannot be converted + ASSERT_FALSE(mockCommander::get_range_wrapper(cov, 0, 6, res)); + // still wrong format + ASSERT_FALSE(mockCommander::get_range_wrapper(cov, 2, 4, res)); + // out of bound + ASSERT_FALSE(mockCommander::get_range_wrapper(cov, 2, 7, res)); + res.clear(); + ASSERT_TRUE(mockCommander::get_range_wrapper(cov, 2, 6, res)); + ASSERT_EQ(res.size(), 5); + for (size_t i = 0; i < res.size(); ++i) { ASSERT_EQ(res[i], i + 1); } + // complex options + cov = "PC[1-5.8.7-10]"; + res.clear(); + ASSERT_TRUE(mockCommander::get_range_wrapper(cov, 2, 13, res)); + // should be sorted and removed the duplicates (8) + std::vector expected = {1, 2, 3, 4, 5, 7, 8, 9, 10}; + ASSERT_EQ(res.size(), expected.size()); + for (size_t i = 0; i < res.size(); ++i) { ASSERT_EQ(res[i], expected[i]); } + cov = "PC[1-5.-6]"; + // One fail, all fail + res.clear(); + ASSERT_FALSE(mockCommander::get_range_wrapper(cov, 2, 9, res)); + // as we use . to separate each input, it means double value will be parsed + // to something else. User will have to read the log to check if the parsing + // is correct + cov = "PC[1-5.6.0.005]"; + ASSERT_TRUE(mockCommander::get_range_wrapper(cov, 2, 14, res)); + expected.clear(); + expected = {0, 1, 2, 3, 4, 5, 6}; + ASSERT_EQ(res.size(), expected.size()); + for (size_t i = 0; i < res.size(); ++i) { ASSERT_EQ(res[i], expected[i]); } +} +TEST(COVARIATE_TRANSFORM, UPDATE_COVARIATE_WITH_RANGE) +{ + std::vector result; + std::vector range; + ASSERT_FALSE(mockCommander::update_covariate_ranges_wrapper(result, range)); + range = {1, 3, 5, 7, 9}; + result.clear(); + std::vector expected = {"1", "3", "5", "7", "9"}; + ASSERT_TRUE(mockCommander::update_covariate_ranges_wrapper(result, range)); + ASSERT_EQ(result.size(), range.size()); + for (size_t i = 0; i < result.size(); ++i) + { ASSERT_STREQ(result[i].c_str(), expected[i].c_str()); } + result.clear(); + result = {"PC1AB", "PC2AB"}; + // sequence = iterate result then range + expected = {"PC1AB1", "PC1AB3", "PC1AB5", "PC1AB7", "PC1AB9", + "PC2AB1", "PC2AB3", "PC2AB5", "PC2AB7", "PC2AB9"}; + ASSERT_TRUE(mockCommander::update_covariate_ranges_wrapper(result, range)); + ASSERT_EQ(result.size(), expected.size()); + for (size_t i = 0; i < result.size(); ++i) + { ASSERT_STREQ(result[i].c_str(), expected[i].c_str()); } +} +void transform_test(const std::string& cov, + const std::vector& expected, + bool expect_success) +{ + std::vector result; + if (!expect_success) + { ASSERT_FALSE(mockCommander::transform_wrapper(cov, result)); } + else + { + ASSERT_TRUE(mockCommander::transform_wrapper(cov, result)); + ASSERT_EQ(result.size(), expected.size()); + for (size_t i = 0; i < result.size(); ++i) + { ASSERT_STREQ(result[i].c_str(), expected[i].c_str()); } + } +} +TEST(COVARIATE_TRANSFORM, TRANSFORMATION) +{ + transform_test("@PC", std::vector {"PC"}, true); + transform_test("@@PC", std::vector {"@PC"}, true); + transform_test("PC1", std::vector {"PC1"}, true); + transform_test("PC1-5", std::vector {"PC1-5"}, true); + transform_test("PC1@5", std::vector {"PC1@5"}, true); + transform_test("@PC1-5", std::vector {"PC1-5"}, true); + transform_test("@PC[1-5]", + std::vector {"PC1", "PC2", "PC3", "PC4", "PC5"}, + true); + transform_test("@PC[1-2.5]", std::vector {"PC1", "PC2", "PC5"}, + true); + transform_test( + "@PC[1-2.4.3-6]", + std::vector {"PC1", "PC2", "PC3", "PC4", "PC5", "PC6"}, + true); + transform_test("@PC[1-2]A", std::vector {"PC1A", "PC2A"}, + true); + transform_test( + "@PC[1-2]A[1-2]", + std::vector {"PC1A1", "PC1A2", "PC2A1", "PC2A2"}, true); +} + +void quick_check_unit(const std::string& input_str, const size_t exp_output, + const size_t def_power = 0, const bool memory = false) +{ + mockCommander commander; + size_t value; + commander.check_parse_unit_value(input_str, "", def_power, value, memory); + ASSERT_EQ(exp_output, value); +} + +TEST(PARSE_UNIT, VALIDITY) +{ + mockCommander commander; + size_t value = 0; + // Check if valid + ASSERT_FALSE(commander.check_parse_unit_value("m", "--mem", 0, value)); + ASSERT_FALSE(commander.check_parse_unit_value("b", "--mem", 0, value)); + ASSERT_FALSE(commander.check_parse_unit_value("mb", "--mem", 0, value)); + ASSERT_FALSE(commander.check_parse_unit_value("hi", "--mem", 0, value)); + ASSERT_FALSE(commander.check_parse_unit_value("TB", "--mem", 0, value)); +} +TEST(PARSE_UNIT, OUT_BOUND) +{ + mockCommander commander; + size_t value = 0; + // out of bound + ASSERT_FALSE(commander.check_parse_unit_value("1", "", 7, value)); + ASSERT_FALSE( + commander.check_parse_unit_value("1000000000tb", "", 1, value)); +} +TEST(PARSE_UNIT, NEGATIVES) +{ + // Check for negative values + mockCommander commander; + size_t value = 0; + ASSERT_FALSE(commander.check_parse_unit_value("-1", "", 1, value)); + ASSERT_FALSE(commander.check_parse_unit_value("-1tb", "", 1, value)); +} +TEST(PARSE_UNIT, WITH_UNIT) +{ + // default value should be ignored when user provide a unit + quick_check_unit("1b", 1, 0); + quick_check_unit("1b", 1, 1); + quick_check_unit("1b", 1, 3); + quick_check_unit("1b", 1, 4); + quick_check_unit("1b", 1, 5); + quick_check_unit("1b", 1, 6); +} +TEST(PARSE_UNIT, DIFFERENT_UNIT) +{ + // check unit works as expected + quick_check_unit("1k", 1000, 0); + quick_check_unit("1kb", 1000, 0); + quick_check_unit("1m", 1000000, 0); + quick_check_unit("1mb", 1000000, 0); + quick_check_unit("1g", 1000000000, 0); + quick_check_unit("1gb", 1000000000, 0); + quick_check_unit("1t", 1000000000000, 0); + quick_check_unit("1tb", 1000000000000, 0); +} +TEST(PARSE_UNIT, NON_INTEGER) +{ + mockCommander commander; + size_t value = 0; + // Check non-integer scenarios + quick_check_unit("1.5k", 1500, 0); + quick_check_unit("1.004k", 1004, 0); + ASSERT_FALSE(commander.check_parse_unit_value("1.5b", "", 1, value)); +} +TEST(PARSE_UNIT, DEFAULT_VALUE) +{ + // check default value works + quick_check_unit("1", 1, 0); + quick_check_unit("1", 1000, 1); + quick_check_unit("1", 1000000000, 3); + quick_check_unit("1", 1000000000000, 4); + quick_check_unit("1", 1000000000000000, 5); + quick_check_unit("1", 1000000000000000000, 6); +} + +bool test_target_check(const std::string& command) +{ + mockCommander commander; + if (!command.empty()) commander.parse_command_wrapper(command); + return commander.target_check_wrapper(); +} +TEST(COMMAND_VALIDATION, TARGET) +{ + // Should fail, as target and target-list not provided + ASSERT_FALSE(test_target_check("")); + ASSERT_TRUE(test_target_check("--target test")); + ASSERT_TRUE(test_target_check("--target-list test")); + // but not both + ASSERT_FALSE(test_target_check("--target test --target-list listing")); + // Now check keep and remove + ASSERT_TRUE(test_target_check("--target test --remove No_more")); + ASSERT_TRUE(test_target_check("--target test --keep More")); + ASSERT_FALSE( + test_target_check("--target test --remove No_more --keep More")); + // check valid type + ASSERT_TRUE(test_target_check("--target test --type bgen")); + ASSERT_TRUE(test_target_check("--target test --type bed")); + ASSERT_TRUE(test_target_check("--target test --type ped")); + ASSERT_FALSE(test_target_check("--target test --type vcf")); + // now check number of autosome is correctly checked + ASSERT_TRUE(test_target_check("--target test --num-auto 24")); + ASSERT_FALSE(test_target_check("--target test --num-auto -10")); +} + +std::tuple test_prsice_check(const std::string& command) +{ + mockCommander commander; + if (!command.empty()) commander.parse_command_wrapper(command); + return {commander.prsice_check_wrapper(), commander.get_p_threshold()}; +} +void bar_check(const std::string& command, const std::vector& expected, + bool expect_fail, bool is_fast = false) +{ + auto [success, p_thres] = test_prsice_check(command); + if (expect_fail) { ASSERT_FALSE(success); } + else + { + ASSERT_TRUE(success); + ASSERT_EQ(p_thres.bar_levels.size(), expected.size()); + for (size_t i = 0; i < p_thres.bar_levels.size(); ++i) + ASSERT_DOUBLE_EQ(p_thres.bar_levels[i], expected[i]); + ASSERT_EQ(is_fast, p_thres.fastscore); + } +} +void interval_check(const std::string& command, + const std::vector& expected, bool expect_fail) +{ + auto [success, p_thres] = test_prsice_check(command); + if (expect_fail) { ASSERT_FALSE(success); } + else + { + ASSERT_TRUE(success); + ASSERT_DOUBLE_EQ(p_thres.lower, expected[0]); + ASSERT_DOUBLE_EQ(p_thres.inter, expected[1]); + ASSERT_DOUBLE_EQ(p_thres.upper, expected[2]); + } +} +TEST(COMMAND_VALIDATION, P_VALUE_THRESHOLDS) +{ + const bool EXPECT_FAIL = true; + const bool EXPECT_SUCCESS = false; + // check default + bar_check("", std::vector {0.001, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 1}, + false); + // no full default + bar_check("--no-full", + std::vector {0.001, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5}, + EXPECT_SUCCESS); + // prset + bar_check("--msigdb PRSet", std::vector {1}, EXPECT_SUCCESS, true); + // prset with any kind of thresholding + bar_check("--msigdb PRSet --fastscore", + std::vector {0.001, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 1}, + EXPECT_SUCCESS, true); + bar_check("--msigdb PRSet --lower 1e-5", + std::vector {0.001, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 1}, + EXPECT_SUCCESS, false); + bar_check("--msigdb PRSet --inter 1e-5", + std::vector {0.001, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 1}, + EXPECT_SUCCESS, false); + bar_check("--msigdb PRSet --upper 0.6", + std::vector {0.001, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 1}, + EXPECT_SUCCESS, false); + // no-full doesn't work with PRSet + bar_check("--msigdb PRSet --no-full", std::vector {1}, + EXPECT_SUCCESS, true); + // now check for out of bound bar levels + bar_check("--bar-levels 0.1", std::vector {0.1, 1}, EXPECT_SUCCESS, + false); + bar_check("--bar-levels -0.1", std::vector {0.1, 1}, EXPECT_FAIL, + false); + bar_check("--bar-levels 1.1", std::vector {0.1, 1}, EXPECT_FAIL, + false); + // duplicated bar levels should be removed + bar_check("--bar-levels 0.1,0.2,0.3,0.2", + std::vector {0.1, 0.2, 0.3, 1}, EXPECT_SUCCESS, false); + // and should be ordered + bar_check("--bar-levels 0.3,0.2,0.3,0.1,1", + std::vector {0.1, 0.2, 0.3, 1}, EXPECT_SUCCESS, false); + // now check threshold values + interval_check("--lower -0.01", std::vector {5e-8, 0.00005, 0.5}, + EXPECT_FAIL); + interval_check("--lower 0.01", std::vector {0.01, 0.00005, 0.5}, + EXPECT_SUCCESS); + interval_check("--lower 1.1", std::vector {5e-8, 0.00005, 0.5}, + EXPECT_FAIL); + interval_check("--upper -0.02", std::vector {5e-8, 0.00005, 0.5}, + EXPECT_FAIL); + interval_check("--upper 0.04", std::vector {5e-8, 0.00005, 0.04}, + EXPECT_SUCCESS); + interval_check("--upper 1.1", std::vector {5e-8, 0.00005, 0.5}, + EXPECT_FAIL); + // can't be lower + interval_check("--lower 0.01 --upper 0.001", + std::vector {0.01, 0.0005, 0.001}, EXPECT_FAIL); + + interval_check("--inter -0.05", std::vector {5e-8, 0.00005, 0.5}, + EXPECT_FAIL); + interval_check("--inter 1e-3", std::vector {5e-8, 1e-3, 0.5}, + EXPECT_SUCCESS); + interval_check("--inter 123", std::vector {5e-8, 0.00005, 0.5}, + EXPECT_FAIL); +} + +void clump_param_check(const std::string& command, + const std::vector& expected, + const size_t expected_distance, bool expect_fail, + bool provided_distance, bool use_proxy) +{ + mockCommander commander; + if (!command.empty()) commander.parse_command_wrapper(command); + bool success = commander.clump_check_wrapper(); + if (expect_fail) { ASSERT_FALSE(success); } + else + { + ASSERT_TRUE(success); + auto clump = commander.get_clump_info(); + ASSERT_DOUBLE_EQ(clump.r2, expected[0]); + ASSERT_DOUBLE_EQ(clump.pvalue, expected[1]); + ASSERT_DOUBLE_EQ(clump.proxy, expected[2]); + ASSERT_EQ(clump.provided_distance, provided_distance); + ASSERT_EQ(clump.distance, expected_distance); + ASSERT_EQ(clump.use_proxy, use_proxy); + } +} +TEST(COMMAND_VALIDATION, CLUMP_CHECK) +{ + const bool EXPECT_FAIL = true, HAS_DIST = true, HAS_PROXY = true; + // check default + clump_param_check("", std::vector {0.1, 1, 0}, 250000, !EXPECT_FAIL, + !HAS_DIST, !HAS_PROXY); + // default change if set is used + clump_param_check("--msigdb RunPRSet", std::vector {0.1, 1, 0}, + 1000000, !EXPECT_FAIL, !HAS_DIST, !HAS_PROXY); + // out of bound + clump_param_check("--clump-r2 -0.1", std::vector {}, 250000, + EXPECT_FAIL, !HAS_DIST, !HAS_PROXY); + clump_param_check("--clump-r2 3", std::vector {}, 250000, + EXPECT_FAIL, !HAS_DIST, !HAS_PROXY); + clump_param_check("--clump-p -0.3", std::vector {}, 250000, + EXPECT_FAIL, !HAS_DIST, !HAS_PROXY); + clump_param_check("--clump-p 1.1", std::vector {}, 250000, + EXPECT_FAIL, !HAS_DIST, !HAS_PROXY); + // check proxy bound + clump_param_check("--proxy 0.3", std::vector {0.1, 1, 0.3}, 250000, + !EXPECT_FAIL, !HAS_DIST, HAS_PROXY); + clump_param_check("--proxy -123", std::vector {}, 250000, + EXPECT_FAIL, !HAS_DIST, HAS_PROXY); + clump_param_check("--proxy 456", std::vector {}, 250000, + EXPECT_FAIL, !HAS_DIST, HAS_PROXY); + // Negative distance will fail early on, we don't bother to check +} + +bool test_ref_check(const std::string& command) +{ + mockCommander commander; + if (!command.empty()) commander.parse_command_wrapper(command); + return commander.ref_check_wrapper(); +} +TEST(COMMAND_VALIDATION, REF_CHECK) +{ + ASSERT_TRUE(test_ref_check("--ld 1000G")); + ASSERT_TRUE(test_ref_check("--ld-list 1000G-lists")); + ASSERT_FALSE(test_ref_check("--ld-list 1000G-lists --ld 1000G")); + // Without LD reference, we are not going to run any LD check, therefore we + // will never really encounter a situation within ref_check where both + // ld-list and ld are not provided + ASSERT_TRUE(test_ref_check("")); + ASSERT_FALSE(test_ref_check("--ld 1000G --ld-geno -1")); + ASSERT_FALSE(test_ref_check("--ld 1000G --ld-geno 1.1")); + ASSERT_FALSE(test_ref_check("--ld 1000G --ld-maf -20")); + ASSERT_FALSE(test_ref_check("--ld 1000G --ld-maf 101")); + // don't check info score if we are not bgen + ASSERT_TRUE(test_ref_check("--ld 1000G --ld-info 1.1")); + ASSERT_TRUE(test_ref_check("--ld 1000G --ld-hard-thres 1.1")); + ASSERT_FALSE(test_ref_check("--ld 1000G --ld-info 1.1 --ld-type bgen")); + ASSERT_FALSE(test_ref_check("--ld 1000G --ld-info -2 --ld-type bgen")); + ASSERT_FALSE( + test_ref_check("--ld 1000G --ld-hard-thres 123 --ld-type bgen")); + ASSERT_FALSE( + test_ref_check("--ld 1000G --ld-hard-thres -200 --ld-type bgen")); + ASSERT_FALSE( + test_ref_check("--ld 1000G --ld-dose-thres 321 --ld-type bgen")); + ASSERT_FALSE( + test_ref_check("--ld 1000G --ld-dose-thres -234 --ld-type bgen")); + // check valid type + ASSERT_TRUE(test_ref_check("--ld 1000G --ld-type bed")); + ASSERT_TRUE(test_ref_check("--ld 1000G --ld-type ped")); + ASSERT_TRUE(test_ref_check("--ld 1000G --ld-type bgen")); + ASSERT_FALSE(test_ref_check("--ld 1000G --ld-type vcf")); + mockCommander ref, ref_list; + ASSERT_FALSE(ref.use_ref()); + ASSERT_TRUE(ref.parse_command_wrapper("--ld test")); + ASSERT_TRUE(ref.use_ref()); + ASSERT_FALSE(ref_list.use_ref()); + ASSERT_TRUE(ref_list.parse_command_wrapper("--ld-list test_list")); + ASSERT_TRUE(ref_list.use_ref()); + // can do keep and remove but not both + ASSERT_TRUE(test_ref_check("--ld 1000G --ld-keep Good")); + ASSERT_TRUE(test_ref_check("--ld 1000G --ld-remove Bad")); + ASSERT_FALSE(test_ref_check("--ld 1000G --ld-keep Good --ld-remove Bad")); +} + + +bool test_misc_check(const std::string& command) +{ + mockCommander commander; + if (!command.empty()) commander.parse_command_wrapper(command); + return commander.misc_check_wrapper(); +} + +bool intermediate_check(const std::string& command) +{ + mockCommander commander; + // we assume the command is valid + commander.parse_command_wrapper(command); + commander.misc_check_wrapper(); + return commander.use_inter(); +} +TEST(COMMAND_VALIDATION, MISC_CHECK) +{ + mockCommander commander; + // no negative thread is allowed + ASSERT_FALSE(test_misc_check("--thread -10")); + // can specify --logit-perm without --perm or --set-perm, but should update + // error + ASSERT_TRUE(test_misc_check("--logit-perm")); + // Cannot use-ref-maf without --ld + ASSERT_FALSE(test_misc_check("--use-ref-maf")); + ASSERT_TRUE(test_misc_check("--use-ref-maf --ld testing")); + // both target and ref are not bgen, inter should be false + ASSERT_FALSE(intermediate_check("--allow-inter")); + // we will need intermediate because we will use the target file for ld + // construction + ASSERT_TRUE(intermediate_check("--allow-inter --type bgen")); + // we will need the intermediate because the ld file is in bgen format + ASSERT_TRUE( + intermediate_check("--allow-inter --type bed --ld-type bgen --ld ref")); + // we won't need intermediate, as we are using dosage score and reference is + // already in bed format + ASSERT_FALSE( + intermediate_check("--allow-inter --type bgen --ld-type bed --ld ref")); + // we will need intermediate for hard coded score + ASSERT_TRUE(intermediate_check( + "--allow-inter --type bgen --ld-type bed --hard --ld ref")); + // got nothing related to bgen + ASSERT_FALSE(intermediate_check("--allow-inter --type bed")); + // now check for ultra aggressive flag + // won't use it for dosage score + ASSERT_TRUE(commander.parse_command_wrapper("--type bgen --ultra")); + ASSERT_TRUE(commander.misc_check_wrapper()); + ASSERT_FALSE(commander.ultra_aggressive()); + // allow no regress for more than 1 pheno, but will only generate 1 file + ASSERT_TRUE( + commander.parse_command_wrapper("--pheno-col 1,2,3 --no-regress")); +} + +bool test_filter_check(const std::string& command) +{ + mockCommander commander; + if (!command.empty()) commander.parse_command_wrapper(command); + return commander.filter_check_wrapper(); } +TEST(COMMAND_VALIDATION, FILTER_CHECK) +{ + ASSERT_TRUE(test_filter_check("--extract keep_snp")); + ASSERT_TRUE(test_filter_check("--exclude remove_snp")); + // can't do both + ASSERT_FALSE(test_filter_check("--exclude remove_snp --extract keep_snp")); + ASSERT_FALSE(test_filter_check("--maf -0.1")); + ASSERT_FALSE(test_filter_check("--maf 1.1")); + ASSERT_FALSE(test_filter_check("--geno 12")); + ASSERT_FALSE(test_filter_check("--geno -1")); + // won't do info and hard-threshold filtering if bgen isn't used + ASSERT_TRUE(test_filter_check("--hard-thres -19")); + ASSERT_TRUE(test_filter_check("--hard-thres 60")); + ASSERT_TRUE(test_filter_check("--info -0.5")); + ASSERT_TRUE(test_filter_check("--info 1.2")); + ASSERT_TRUE(test_filter_check("--hard-thres -133")); + ASSERT_TRUE(test_filter_check("--hard-thres 2887")); + // but if we use bgen, we will check the threshold + + ASSERT_FALSE(test_filter_check("--type bgen --hard-thres -19")); + ASSERT_FALSE(test_filter_check("--type bgen --hard-thres 60")); + ASSERT_FALSE(test_filter_check("--type bgen --info -0.5")); + ASSERT_FALSE(test_filter_check("--type bgen --info 1.2")); + ASSERT_FALSE(test_filter_check("--type bgen --hard-thres -133")); + ASSERT_FALSE(test_filter_check("--type bgen --hard-thres 2887")); +} + + +bool test_prset_check(const std::string& command) +{ + mockCommander commander; + if (!command.empty()) commander.parse_command_wrapper(command); + return commander.prset_check_wrapper(); +} +TEST(COMMAND_VALIDATION, PRSET_CHECK) +{ + // can't do both (note, need --bed or --snp-set as won't do check if prset + // isn't run + ASSERT_FALSE(test_prset_check("--perm 100 --set-perm 1000 --snp-set snps")); + // require GTF + ASSERT_FALSE(test_prset_check("--msigdb kegg")); + mockCommander commander, with_feature, bed_gtf; + // test default + ASSERT_TRUE( + commander.parse_command_wrapper("--msigdb kegg --gtf Homo.gtf")); + ASSERT_TRUE(commander.prset_check_wrapper()); + std::vector expected = {"exon", "gene", "protein_coding", + "CDS"}; + ASSERT_EQ(commander.get_set().feature.size(), expected.size()); + for (size_t i = 0; i < expected.size(); ++i) + { + ASSERT_STREQ(commander.get_set().feature[i].c_str(), + expected[i].c_str()); + } + // we won't overwrite + ASSERT_TRUE(with_feature.parse_command_wrapper( + "--bed test --feature gene,intron --set-perm 10")); + ASSERT_FALSE(with_feature.get_set().full_as_background); + ASSERT_TRUE(with_feature.prset_check_wrapper()); + expected.clear(); + expected = {"gene", "intron"}; + ASSERT_EQ(with_feature.get_set().feature.size(), expected.size()); + for (size_t i = 0; i < expected.size(); ++i) + { + ASSERT_STREQ(with_feature.get_set().feature[i].c_str(), + expected[i].c_str()); + } + // now also check that the full back is set as no gtf is provided + ASSERT_TRUE(with_feature.get_set().full_as_background); + // won't be the case if gtf is provided + ASSERT_TRUE( + bed_gtf.parse_command_wrapper("--bed hi --gtf Homo.gtf --set-perm 10")); + ASSERT_FALSE(bed_gtf.get_set().full_as_background); + ASSERT_TRUE(bed_gtf.prset_check_wrapper()); + ASSERT_FALSE(bed_gtf.get_set().full_as_background); +} + +TEST(COMMAND_VALIDATION, COVARIATE_CHECK) +{ + // slightly different than others, as we will only test the sub functions to + // check and see if they function. If they all work, then we can say that + // the covariate check works + mockCommander commander; + ASSERT_TRUE(commander.parse_command_wrapper( + "--cov test --cov-col Sex,@PC[1-5.6],@PC[6.8.9],@Hi")); + std::unordered_set included = commander.get_cov_names_wrap(); + std::unordered_set ori_input = included; + std::vector expected = {"Sex", "PC1", "PC2", "PC3", "PC4", + "PC5", "PC6", "PC8", "PC9", "Hi"}; + std::vector unexpected = {"Age", "BMI", "Bye", "PC7"}; + for (auto&& exp : expected) + { ASSERT_TRUE(included.find(exp) != included.end()); } + for (auto&& unexp : unexpected) + { ASSERT_TRUE(included.find(unexp) == included.end()); } + // we will test different stuff + std::vector cov_name = {"PC3", "PC2", "PC1"}; + // represent the index of each col-name + std::unordered_map ref_index = { + {"PC1", 2}, {"PC2", 1}, {"PC3", 0}}; + std::string missing = ""; + size_t valid_cov = + commander.find_cov_idx_wrap(included, ref_index, missing); + // order of col_cov_idx = input order in --cov-col + std::vector idx_exp = {2, 1, 0}; + ASSERT_EQ(valid_cov, 3); + auto cov = commander.get_pheno().cov_colname; + auto cov_idx = commander.get_pheno().col_index_of_cov; + // can't really test the ordering as we use unordered_set for included + ASSERT_EQ(cov_idx.size(), idx_exp.size()); + expected.clear(); + expected = {"Sex", "@PC[1-5.6]", "@PC[6.8.9]", "@Hi"}; + ASSERT_EQ(cov.size(), expected.size()); + for (size_t i = 0; i < expected.size(); ++i) + { ASSERT_STREQ(cov[i].c_str(), expected[i].c_str()); } + + commander.reorganize_cov_name_wrap(cov_name); + cov = commander.get_pheno().cov_colname; + expected.clear(); + // now should be ordered by their appearance within the cov file + expected = {"PC3", "PC2", "PC1"}; + idx_exp.clear(); + idx_exp = {0, 1, 2}; + cov_idx = commander.get_pheno().col_index_of_cov; + ASSERT_EQ(cov.size(), expected.size()); + // now we can check the ordering as we know this should be sorted + ASSERT_EQ(cov_idx.size(), idx_exp.size()); + for (size_t i = 0; i < expected.size(); ++i) + { ASSERT_STREQ(cov[i].c_str(), expected[i].c_str()); } + for (size_t i = 0; i < cov_idx.size(); ++i) + { ASSERT_EQ(cov_idx[i], idx_exp[i]); } + // check that we should get the correct number (0) + included.clear(); + included = {"Sex", "Age"}; + valid_cov = commander.find_cov_idx_wrap(included, ref_index, missing); + ASSERT_EQ(valid_cov, 0); + // now reset and prepare for factor check + included.clear(); + included = {"Sex", "PC1", "PC2", "PC3", "PC4", + "PC5", "PC6", "PC8", "PC9", "Hi"}; + valid_cov = commander.find_cov_idx_wrap(included, ref_index, missing); + // and proceed + ASSERT_TRUE( + commander.parse_command_wrapper("--cov test --cov-factor @PC[6.8.9]")); + // if we don't have any of the factors in the file, but all the factors are + // provided in the cov-col, we are ok with it + ASSERT_TRUE( + commander.process_factor_cov_wrap(included, ref_index, ori_input)); + // now check situation where we do have the factor + // Note: --cov-factor is additive if I remember correctly + ASSERT_TRUE(commander.parse_command_wrapper( + "--cov test --cov-factor @PC[3.6.8.9]")); + ASSERT_TRUE( + commander.process_factor_cov_wrap(included, ref_index, ori_input)); + ASSERT_EQ(commander.get_pheno().col_index_of_factor_cov.size(), 1); + ASSERT_EQ(commander.get_pheno().col_index_of_factor_cov.front(), 0); + // now if we do have something that'd not found, we should error out + mockCommander error; + ASSERT_TRUE(error.parse_command_wrapper( + "--cov test --cov-col @PC[1-5] --cov-factor @PC[6-9]")); + included = error.get_cov_names_wrap(); + ori_input = included; + valid_cov = error.find_cov_idx_wrap(included, ref_index, missing); + error.reorganize_cov_name_wrap(cov_name); + ASSERT_FALSE(error.process_factor_cov_wrap(included, ref_index, ori_input)); + // Stuff in factor are not in col but were found in the covariate file + mockCommander factor_only; + ASSERT_TRUE(factor_only.parse_command_wrapper( + "--cov test --cov-factor @PC[1-2] --cov-col @PC[3-9]")); + included = factor_only.get_cov_names_wrap(); + ori_input = included; + valid_cov = factor_only.find_cov_idx_wrap(included, ref_index, missing); + factor_only.reorganize_cov_name_wrap(cov_name); + // should error out as --cov-factor should always be a subset of --cov-col + ASSERT_FALSE( + factor_only.process_factor_cov_wrap(included, ref_index, ori_input)); +} + +std::tuple test_pheno_check(const std::string& command, + const bool is_beta) +{ + mockCommander commander; + if (!command.empty()) commander.parse_command_wrapper(command); + return {commander.pheno_check_wrapper(is_beta), commander.get_pheno()}; +} +TEST(COMMAND_VALIDATION, PHENO_CHECK) +{ + const bool IS_BETA = true; + // automatically decide the phenotype type as continuous or binary using + // --beta and --or + auto [success, pheno] = test_pheno_check("--pheno Phenotype", IS_BETA); + ASSERT_TRUE(success); + ASSERT_EQ(pheno.binary.size(), 1); + ASSERT_FALSE(pheno.binary.front()); + std::tie(success, pheno) = test_pheno_check("--pheno Phenotype", !IS_BETA); + ASSERT_TRUE(success); + ASSERT_EQ(pheno.binary.size(), 1); + ASSERT_TRUE(pheno.binary.front()); + // we don't allow pheno-col when no --pheno is provided + std::tie(success, pheno) = test_pheno_check("--pheno-col A,B,C", !IS_BETA); + ASSERT_FALSE(success); + // check default also works when more than 1 phenotype is provided + std::tie(success, pheno) = + test_pheno_check("--pheno Pheno --pheno-col A,B,C,D", IS_BETA); + ASSERT_TRUE(success); + ASSERT_EQ(pheno.binary.size(), 4); + for (size_t i = 0; i < 4; ++i) { ASSERT_FALSE(pheno.binary[i]); } + std::tie(success, pheno) = + test_pheno_check("--pheno Pheno --pheno-col A,B,C", !IS_BETA); + ASSERT_TRUE(success); + ASSERT_EQ(pheno.binary.size(), 3); + for (size_t i = 0; i < 3; ++i) { ASSERT_TRUE(pheno.binary[i]); } + // and we will fail if the binary-target and pheno-col size doesn't match + std::tie(success, pheno) = test_pheno_check( + "--pheno Pheno --binary-target T --pheno-col A,B,C", !IS_BETA); + ASSERT_FALSE(success); + std::tie(success, pheno) = test_pheno_check( + "--pheno Pheno --binary-target 10F --pheno-col A,B,C", !IS_BETA); + ASSERT_FALSE(success); + // default should never over-rule what we have + std::tie(success, pheno) = test_pheno_check( + "--pheno Pheno --binary-target 4T --pheno-col A,B,C,E", IS_BETA); + ASSERT_TRUE(success); + ASSERT_EQ(pheno.binary.size(), 4); + for (auto bin : pheno.binary) ASSERT_TRUE(bin); + std::tie(success, pheno) = test_pheno_check( + "--pheno Pheno --binary-target 3F --pheno-col A,C,E", !IS_BETA); + ASSERT_TRUE(success); + ASSERT_EQ(pheno.binary.size(), 3); + for (auto bin : pheno.binary) ASSERT_FALSE(bin); + // now check the prevalence is alright + std::tie(success, pheno) = test_pheno_check( + "--pheno Pheno --binary-target 3F --pheno-col A,C,E --prevalence 0.4", + !IS_BETA); + // fail as we don't have binary target + ASSERT_FALSE(success); + std::tie(success, pheno) = + test_pheno_check("--pheno Pheno --binary-target F,T,F --pheno-col " + "A,C,E --prevalence 0.2", + !IS_BETA); + // this is ok + ASSERT_TRUE(success); + ASSERT_EQ(pheno.prevalence.size(), 1); + ASSERT_DOUBLE_EQ(pheno.prevalence.front(), 0.2); + // ok with multiple + std::tie(success, pheno) = + test_pheno_check("--pheno Pheno --binary-target F,T,F,T --pheno-col " + "A,C,E,F --prevalence 0.1,0.4", + !IS_BETA); + ASSERT_TRUE(success); + ASSERT_EQ(pheno.prevalence.size(), 2); + ASSERT_DOUBLE_EQ(pheno.prevalence.front(), 0.1); + ASSERT_DOUBLE_EQ(pheno.prevalence.back(), 0.4); + // not ok because prevalence out of bound + std::tie(success, pheno) = + test_pheno_check("--pheno Pheno --binary-target F,T,F,T,2F --pheno-col " + "A,C,E,F,D,H --prevalence 0.1,-0.4", + !IS_BETA); + ASSERT_FALSE(success); + std::tie(success, pheno) = + test_pheno_check("--pheno Pheno --binary-target F,T,F,T,2F --pheno-col " + "A,C,E,F,D,H --prevalence 1.2,0.4", + !IS_BETA); + ASSERT_FALSE(success); + // duplicated phenotype, we will should error out just in case (be extra + // causious) + + std::tie(success, pheno) = + test_pheno_check("--pheno Pheno --binary-target F,T,F,T,2F --pheno-col " + "A,C,E,F,A,H --prevalence 0.1,-0.4", + !IS_BETA); + ASSERT_FALSE(success); +} + +std::tuple +test_base_check(const std::string& command, + std::vector& column_name) +{ + mockCommander commander; + if (!command.empty()) commander.parse_command_wrapper(command); + bool success = commander.base_column_check_wrapper(column_name); + return {success, commander.get_base(), commander.get_base_qc(), + commander.get_base_name()}; +} +TEST(COMMAND_VALIDATION, BASE_CHECK) +{ + mockCommander commander; + // error out because base file not provided + ASSERT_FALSE(commander.base_check_wrapper()); + // these are the default. + std::vector col = {"P", "BETA", "CHR", "LOC", + "A1", "A2", "SNP"}; + auto [success, base, qc, name] = test_base_check("--base Base", col); + ASSERT_TRUE(success); + // no chr + std::tie(success, base, qc, name) = + test_base_check("--base Base --chr Hi", col); + ASSERT_TRUE(success); + ASSERT_FALSE(base.has_column[+BASE_INDEX::CHR]); + // no bp + std::tie(success, base, qc, name) = + test_base_check("--base Base --bp BP", col); + ASSERT_TRUE(success); + ASSERT_FALSE(base.has_column[+BASE_INDEX::BP]); + // no A2 + std::tie(success, base, qc, name) = + test_base_check("--base Base --a2 Alternative", col); + ASSERT_TRUE(success); + ASSERT_FALSE(base.has_column[+BASE_INDEX::NONEFFECT]); + // no default, so we should fail + std::tie(success, base, qc, name) = + test_base_check("--base Base --no-default", col); + ASSERT_FALSE(success); + // but won't fail if we have P BETA A1 and SNP + std::tie(success, base, qc, name) = test_base_check( + "--base Base --no-default --snp SNP --pvalue P --stat BETA --a1 A1", + col); + ASSERT_TRUE(success); + // and we should be able to get the --or and --beta automatically set + ASSERT_FALSE(base.is_or); + ASSERT_TRUE(base.is_beta); + // index check shouldn't allow non-numeric value so will always act as if + // --no-default is set + std::tie(success, base, qc, name) = + test_base_check("--base Base --index", col); + ASSERT_FALSE(success); + // and will fail even if we provide all column name unless they are numeric + // values + std::tie(success, base, qc, name) = test_base_check( + "--base Base --index --snp SNP --pvalue P --stat BETA --a1 A1", col); + ASSERT_FALSE(success); + // invalid because we can't guess beta or or + std::tie(success, base, qc, name) = test_base_check( + "--base Base --index --snp 1 --pvalue 2 --stat 3 --a1 4 ", col); + ASSERT_FALSE(success); + // valid index syntex + std::tie(success, base, qc, name) = test_base_check( + "--base Base --index --snp 1 --pvalue 2 --stat 3 --a1 4 --beta", col); + ASSERT_TRUE(success); + ASSERT_TRUE(base.has_column[+BASE_INDEX::RS]); + ASSERT_EQ(base.column_index[+BASE_INDEX::RS], 1); + ASSERT_TRUE(base.has_column[+BASE_INDEX::P]); + ASSERT_EQ(base.column_index[+BASE_INDEX::P], 2); + ASSERT_TRUE(base.has_column[+BASE_INDEX::STAT]); + ASSERT_EQ(base.column_index[+BASE_INDEX::STAT], 3); + ASSERT_TRUE(base.has_column[+BASE_INDEX::EFFECT]); + ASSERT_EQ(base.column_index[+BASE_INDEX::EFFECT], 4); + ASSERT_EQ(base.column_index[+BASE_INDEX::MAX], 4); + // negative index + std::tie(success, base, qc, name) = test_base_check( + "--base Base --index --snp 1 --pvalue -2 --stat 3 --a1 4", col); + ASSERT_FALSE(success); + // multiple beta or + std::tie(success, base, qc, name) = + test_base_check("--base Base --beta --or", col); + ASSERT_FALSE(success); + // now use non-default names to test if the has_column and column_index is + // set correctly + col.clear(); + col = {"pvalue", "z-score", "coordinate", "effect", "non-effect", + "empty", "pad", "check", "rsid", "chrom"}; + std::tie(success, base, qc, name) = test_base_check( + "--base Base --stat z-score --pvalue pvalue --a1 effect --a2 " + "non-effect --snp rsid --chr chrom --bp coordinate --beta", + col); + ASSERT_TRUE(success); + ASSERT_TRUE(base.has_column[+BASE_INDEX::STAT]); + ASSERT_EQ(base.column_index[+BASE_INDEX::STAT], 1); + ASSERT_TRUE(base.has_column[+BASE_INDEX::P]); + ASSERT_EQ(base.column_index[+BASE_INDEX::P], 0); + ASSERT_TRUE(base.has_column[+BASE_INDEX::EFFECT]); + ASSERT_EQ(base.column_index[+BASE_INDEX::EFFECT], 3); + ASSERT_TRUE(base.has_column[+BASE_INDEX::NONEFFECT]); + ASSERT_EQ(base.column_index[+BASE_INDEX::NONEFFECT], 4); + ASSERT_TRUE(base.has_column[+BASE_INDEX::RS]); + ASSERT_EQ(base.column_index[+BASE_INDEX::RS], 8); + ASSERT_TRUE(base.has_column[+BASE_INDEX::CHR]); + ASSERT_EQ(base.column_index[+BASE_INDEX::CHR], 9); + ASSERT_TRUE(base.has_column[+BASE_INDEX::BP]); + ASSERT_EQ(base.column_index[+BASE_INDEX::BP], 2); + ASSERT_EQ(base.column_index[+BASE_INDEX::MAX], 9); + // fail because we won't be able to determine beta or OR + std::tie(success, base, qc, name) = test_base_check( + "--base Base --stat z-score --pvalue pvalue --a1 effect --a2 " + "non-effect --snp rsid --chr chrom --bp coordinate", + col); + ASSERT_FALSE(success); + // fail, because stat not found + std::tie(success, base, qc, name) = test_base_check( + "--base Base --stat OR --pvalue pvalue --a1 effect --a2 " + "non-effect --snp rsid --chr chrom --bp coordinate --beta", + col); + ASSERT_FALSE(success); + // fail, because pvalue not found + std::tie(success, base, qc, name) = test_base_check( + "--base Base --stat z-score --pvalue P-value --a1 effect --a2 " + "non-effect --snp rsid --chr chrom --bp coordinate --beta", + col); + ASSERT_FALSE(success); + // fail because A1 not found + std::tie(success, base, qc, name) = test_base_check( + "--base Base --stat z-score --pvalue pvalue --a1 A1 --a2 " + "non-effect --snp rsid --chr chrom --bp coordinate --beta", + col); + ASSERT_FALSE(success); + // fail because SNP not found + std::tie(success, base, qc, name) = test_base_check( + "--base Base --stat z-score --pvalue pvalue --a1 effect --a2 " + "non-effect --snp SNP --chr chrom --bp coordinate --beta", + col); + ASSERT_FALSE(success); + + // check different way of determining --stat and --beta / --or + col.clear(); + col = {"P", "or", "CHR", "LOC", "A1", "A2", "SNP"}; + std::tie(success, base, qc, name) = test_base_check("--base Base", col); + ASSERT_TRUE(success); + ASSERT_TRUE(base.is_or); + ASSERT_FALSE(base.is_beta); + ASSERT_TRUE(base.has_column[+BASE_INDEX::STAT]); + ASSERT_EQ(base.column_index[+BASE_INDEX::STAT], 1); + std::tie(success, base, qc, name) = + test_base_check("--base Base --stat or", col); + ASSERT_TRUE(success); + ASSERT_TRUE(base.is_or); + ASSERT_FALSE(base.is_beta); + std::tie(success, base, qc, name) = + test_base_check("--base Base --or", col); + ASSERT_TRUE(success); + ASSERT_TRUE(base.has_column[+BASE_INDEX::STAT]); + ASSERT_EQ(base.column_index[+BASE_INDEX::STAT], 1); + // this should fail as we can't find an beta column + std::tie(success, base, qc, name) = + test_base_check("--base Base --beta", col); + ASSERT_FALSE(success); + col.clear(); + col = {"P", "CHR", "beta", "LOC", "A1", "A2", "SNP"}; + std::tie(success, base, qc, name) = test_base_check("--base Base", col); + ASSERT_TRUE(success); + ASSERT_FALSE(base.is_or); + ASSERT_TRUE(base.is_beta); + ASSERT_TRUE(base.has_column[+BASE_INDEX::STAT]); + ASSERT_EQ(base.column_index[+BASE_INDEX::STAT], 2); + std::tie(success, base, qc, name) = + test_base_check("--base Base --stat beta", col); + ASSERT_TRUE(success); + ASSERT_FALSE(base.is_or); + ASSERT_TRUE(base.is_beta); + // check stat column guessing + std::tie(success, base, qc, name) = + test_base_check("--base Base --beta", col); + ASSERT_TRUE(success); + ASSERT_TRUE(base.has_column[+BASE_INDEX::STAT]); + ASSERT_EQ(base.column_index[+BASE_INDEX::STAT], 2); + // fail because there isn't an or column + std::tie(success, base, qc, name) = + test_base_check("--base Base --or", col); + ASSERT_FALSE(success); + // fail because file contain both col.clear(); + col = {"P", "CHR", "beta", "LOC", "A1", "A2", "SNP", "or"}; + std::tie(success, base, qc, name) = test_base_check("--base Base", col); + ASSERT_FALSE(success); + col = {"P", "CHR", "BETA", "BP", "A1", "A2", "SNP"}; + std::tie(success, base, qc, name) = + test_base_check("--base /home/bin/Base.gz.summary", col); + ASSERT_TRUE(success); + ASSERT_STREQ(name.c_str(), "Base.gz"); +} +TEST(COMMAND_VALIDATION, BASE_QC_CHECK) +{ + std::vector col = {"P", "CHR", "BETA", "LOC", + "A1", "A2", "SNP", "INFO"}; + // check default + auto [success, base, qc, name] = test_base_check("--base Base", col); + ASSERT_TRUE(success); + ASSERT_TRUE(base.has_column[+BASE_INDEX::INFO]); + ASSERT_EQ(base.column_index[+BASE_INDEX::INFO], 7); + ASSERT_DOUBLE_EQ(qc.info_score, 0.9); + // check invalid format + col.clear(); + col = {"P", "CHR", "BETA", "INFO_Score", "LOC", "A1", "A2", "SNP"}; + std::tie(success, base, qc, name) = + test_base_check("--base Base --base-info INFO_Score,0.1", col); + ASSERT_FALSE(success); + std::tie(success, base, qc, name) = + test_base_check("--base Base --base-info INFO_Score:0.1,INFO", col); + ASSERT_FALSE(success); + // valid format + std::tie(success, base, qc, name) = + test_base_check("--base Base --base-info INFO_Score:0.1", col); + ASSERT_TRUE(success); + ASSERT_TRUE(base.has_column[+BASE_INDEX::INFO]); + ASSERT_EQ(base.column_index[+BASE_INDEX::INFO], 3); + ASSERT_DOUBLE_EQ(qc.info_score, 0.1); + // check out bound + std::tie(success, base, qc, name) = + test_base_check("--base Base --base-info INFO_Score:-0.1", col); + ASSERT_FALSE(success); + std::tie(success, base, qc, name) = + test_base_check("--base Base --base-info INFO_Score:1.1", col); + ASSERT_FALSE(success); + // check not found + std::tie(success, base, qc, name) = + test_base_check("--base Base --base-info INFO:0.1", col); + ASSERT_TRUE(success); + ASSERT_FALSE(base.has_column[+BASE_INDEX::INFO]); + // now do MAF + + col.clear(); + col = {"P", "CHR", "BETA", "INFO_Score", "LOC", + "A1", "A2", "MAF", "Cases", "SNP"}; + // there is no default + std::tie(success, base, qc, name) = test_base_check("--base Base", col); + ASSERT_TRUE(success); + ASSERT_FALSE(base.has_column[+BASE_INDEX::MAF]); + ASSERT_FALSE(base.has_column[+BASE_INDEX::MAF_CASE]); + // and if we don't specify, only do MAF using control + std::tie(success, base, qc, name) = + test_base_check("--base Base --base-maf MAF:0.05", col); + ASSERT_TRUE(success); + ASSERT_TRUE(base.has_column[+BASE_INDEX::MAF]); + ASSERT_FALSE(base.has_column[+BASE_INDEX::MAF_CASE]); + ASSERT_EQ(base.column_index[+BASE_INDEX::MAF], 7); + ASSERT_DOUBLE_EQ(qc.maf, 0.05); + std::tie(success, base, qc, name) = + test_base_check("--base Base --base-maf MAF:0.05,Cases:0.01", col); + ASSERT_TRUE(success); + ASSERT_TRUE(base.has_column[+BASE_INDEX::MAF]); + ASSERT_TRUE(base.has_column[+BASE_INDEX::MAF_CASE]); + ASSERT_EQ(base.column_index[+BASE_INDEX::MAF], 7); + ASSERT_DOUBLE_EQ(qc.maf, 0.05); + ASSERT_EQ(base.column_index[+BASE_INDEX::MAF_CASE], 8); + ASSERT_DOUBLE_EQ(qc.maf_case, 0.01); + // will fail if either one of them are out of bound + std::tie(success, base, qc, name) = + test_base_check("--base Base --base-maf MAF:-0.05,Cases:0.01", col); + ASSERT_FALSE(success); + std::tie(success, base, qc, name) = + test_base_check("--base Base --base-maf MAF:0.05,Cases:-0.01", col); + ASSERT_FALSE(success); + std::tie(success, base, qc, name) = + test_base_check("--base Base --base-maf MAF:1.05,Cases:0.01", col); + ASSERT_FALSE(success); + std::tie(success, base, qc, name) = + test_base_check("--base Base --base-maf MAF:0.05,Cases:1.01", col); + ASSERT_FALSE(success); + // will not fail if one of them are not found (although I do think we should + // fail this, only keeping this behaviour for Chris) + std::tie(success, base, qc, name) = + test_base_check("--base Base --base-maf maf:0.05,Cases:0.01", col); + ASSERT_TRUE(success); + ASSERT_FALSE(base.has_column[+BASE_INDEX::MAF]); + ASSERT_FALSE(base.has_column[+BASE_INDEX::MAF_CASE]); + std::tie(success, base, qc, name) = + test_base_check("--base Base --base-maf MAF:0.05,maf_cases:0.01", col); + ASSERT_TRUE(success); + ASSERT_FALSE(base.has_column[+BASE_INDEX::MAF]); + ASSERT_FALSE(base.has_column[+BASE_INDEX::MAF_CASE]); + // invalid format + std::tie(success, base, qc, name) = + test_base_check("--base Base --base-maf MAF,0.05:maf_cases,0.01", col); + ASSERT_FALSE(success); + std::tie(success, base, qc, name) = + test_base_check("--base Base --base-maf MAF:0.05:maf_cases,0.01", col); + ASSERT_FALSE(success); + std::tie(success, base, qc, name) = test_base_check( + "--base Base --base-maf MAF:0.05,maf_cases:0.01,Case:0.5", col); + ASSERT_FALSE(success); +} + #endif // COMMANDER_TEST_H diff --git a/test/src/misc_test.cpp b/test/src/misc_test.cpp index 6d9354c5..3907db74 100644 --- a/test/src/misc_test.cpp +++ b/test/src/misc_test.cpp @@ -5,8 +5,62 @@ #include "misc.hpp" #include "reporter.hpp" #include "gtest/gtest.h" +#include #include +TEST(HAS_ENDING, CHECK_VALIDITY) +{ + std::string full_text; + std::string ending; + try + { + misc::hasEnding(full_text, ending); + FAIL(); + } + catch (...) + { + SUCCEED(); + } + full_text = "Hello"; + try + { + misc::hasEnding(full_text, ending); + FAIL(); + } + catch (...) + { + SUCCEED(); + } + ending = "lo"; + ASSERT_TRUE(misc::hasEnding(full_text, ending)); + ending = "Lo"; + ASSERT_FALSE(misc::hasEnding(full_text, ending)); + ending = "Hello"; + ASSERT_TRUE(misc::hasEnding(full_text, ending)); + ending = "HEllO"; + ASSERT_FALSE(misc::hasEnding(full_text, ending)); +} + +TEST(SPLIT, STRING_VIEW_FORMAT) +{ + std::string input = "test,the,splitter,and,make,sure-it,works well"; + std::vector token = + misc::split(input, std::string_view(",")); + ASSERT_EQ(token.size(), 7); + ASSERT_EQ(token[0], "test"); + ASSERT_EQ(token[1], "the"); + ASSERT_EQ(token[2], "splitter"); + ASSERT_EQ(token[3], "and"); + ASSERT_EQ(token[4], "make"); + ASSERT_EQ(token[5], "sure-it"); + ASSERT_EQ(token[6], "works well"); +} +TEST(OVERFLOW_CHECK, OVERFLOW_CHECK) +{ + ASSERT_FALSE(misc::overflow(10, 1)); + ASSERT_FALSE(misc::overflow(1000000000, 0)); +} + TEST(REPORTER, CHANGE_WIDTH) { try @@ -86,7 +140,7 @@ TEST(REPORTER, REPORTING_MESSAGE) try { // initialize with nothing - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); reporter.report("OUTPUT"); SUCCEED(); } @@ -101,7 +155,7 @@ TEST(REPORTER, LIST_MESSAGE) try { // initialize with nothing - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); reporter.report("1) Testing\n2)If this is ok\n"); SUCCEED(); } diff --git a/test/src/prsice_test.cpp b/test/src/prsice_test.cpp index 60884e93..c17c651e 100644 --- a/test/src/prsice_test.cpp +++ b/test/src/prsice_test.cpp @@ -11,7 +11,7 @@ TEST(PRSICE, CONSTRUCT) { // should do nothing but assignment - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::string output; CalculatePRS prs_info; PThresholding p_info; @@ -31,7 +31,7 @@ TEST(PRSICE, CONSTRUCT) TEST(PRSICE, PHENO_CHECK) { // Test if the phenotype checking function works - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::string output; CalculatePRS prs_info; // will still run if no_regress is use as we don't actually read the @@ -180,7 +180,7 @@ TEST(PRSICE, PHENO_CHECK) dup_pheno_valid_col.pheno_check(); ASSERT_EQ(dup_pheno_valid_col.num_phenotype(), 1); } - catch (const std::runtime_error& er) + catch (const std::runtime_error&) { FAIL(); } diff --git a/test/src/region_test.cpp b/test/src/region_test.cpp index 4bc40fc7..19719364 100644 --- a/test/src/region_test.cpp +++ b/test/src/region_test.cpp @@ -944,7 +944,7 @@ class REGION_MALFORM_BED_MASTER : public ::testing::Test bed_name = path + "Test.bed"; std::vector bed = {bed_name}; bool genome_wide_background = false; - reporter = new Reporter(std::string(path + "LOG")); + reporter = new Reporter(std::string(path + "LOG"), 60, true); region = new FAKE_REGION(bed, feature, msigdb, snp_set, background, gtf, window_5, window_3, genome_wide_background, reporter); @@ -1102,7 +1102,7 @@ TEST(REGION_MALFORM_BED, MALFORM_INPUT_SET) std::vector snp_set; std::vector bed = {bed_name}; std::string background = ""; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector> gene_sets; std::unordered_map> snp_in_sets; try @@ -1443,7 +1443,7 @@ TEST(REGION_STD_BED_INPUT, NO_RUN) std::vector snp_set; std::vector bed; std::string background = ""; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); FAKE_REGION region(bed, feature, msigdb, snp_set, background, gtf, window_5, window_3, genome_wide_background, &reporter); size_t num_regions = region.generate_regions(22); @@ -1493,7 +1493,7 @@ TEST(REGION_STD_BED_INPUT, WITH_HEADER_TRACE) std::vector snp_set; std::vector bed = {bed_name}; std::string background = ""; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); try { FAKE_REGION region(bed, feature, msigdb, snp_set, background, gtf, @@ -1533,7 +1533,7 @@ TEST(REGION_STD_BED_INPUT, DUPLICATED_SET_NAME) std::vector snp_set; std::vector bed = {std::string(bed_name + ":Base")}; std::string background = ""; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); try { FAKE_REGION region(bed, feature, msigdb, snp_set, background, gtf, @@ -1573,7 +1573,7 @@ TEST(REGION_STD_BED_INPUT, WITH_HEADER_BROWSER) std::vector snp_set; std::vector bed = {bed_name}; std::string background = ""; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); try { FAKE_REGION region(bed, feature, msigdb, snp_set, background, gtf, @@ -1687,7 +1687,7 @@ TEST(REGION_MALFORM_BED, INVALID_HEADER_FOR_SET_SELECT) std::vector snp_set; std::vector bed = {bed_name}; std::string background = ""; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); try { // malformed anything are considered as fatal @@ -1754,7 +1754,7 @@ class REGION_STD_BED : public ::testing::Test << "21 9363 49431 . . .\n" << "21 43440 82120 . . .\n"; // overlap bed_file.close(); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0; @@ -1879,7 +1879,7 @@ class REGION_CHR_BED : public ::testing::Test << "chr21 9363 49431 . . .\n" << "chr21 43440 82120 . . .\n"; // overlap bed_file.close(); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0; @@ -2004,7 +2004,7 @@ class REGION_STD_BED_PAD : public ::testing::Test << "21 9363 49431 . . .\n" << "21 43440 82120 . . .\n"; // overlap bed_file.close(); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 10; @@ -2104,7 +2104,7 @@ class REGION_MINIMUM_BED_PAD : public ::testing::Test << "21 9363 49431\n" << "21 43440 82120\n"; // overlap bed_file.close(); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 10; @@ -2161,7 +2161,7 @@ TEST_F(REGION_MINIMUM_BED_PAD, CHECK_PAD) TEST(REGION_MULTI_BED, CHECK_NAME) { - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::ofstream bed_file; std::string bed_name = path + "Test.bed"; std::string second_bed_name = path + "Test2.bed"; @@ -2203,7 +2203,7 @@ TEST(REGION_MULTI_BED, CHECK_NAME) TEST(REGION_MULTI_BED, CHECK_NAME2) { - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::ofstream bed_file; std::string bed_name = path + "Test.bed"; std::string second_bed_name = path + "Test2.bed"; @@ -2258,7 +2258,7 @@ class REGION_GTF : public ::testing::Test std::string gtf_name = path + "Test.gtf"; void SetUp() override { - reporter = new Reporter(std::string(path + "LOG")); + reporter = new Reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0; @@ -2814,7 +2814,7 @@ class REGION_GTF_FEATURE : public ::testing::Test "ENSG00000223973 ENSG00000255790 ENSG00000122966 " << std::endl; gmt.close(); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0; @@ -2935,7 +2935,7 @@ class REGION_GTF_GZ : public ::testing::Test "ENSG00000223973 ENSG00000255790 ENSG00000122966 " << std::endl; gmt.close(); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0; @@ -3134,7 +3134,7 @@ class REGION_GTF_PAD : public ::testing::Test "ENSG00000223973 ENSG00000255790 ENSG00000122966 " << std::endl; gmt.close(); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 10; @@ -3306,7 +3306,7 @@ class REGION_GTF_MULTI_EX : public ::testing::Test gmt << "SET5 ENSG00000223973" << std::endl; gmt << "SET6 ENSG00000122966" << std::endl; gmt.close(); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0; @@ -3405,7 +3405,7 @@ class REGION_MSIGDB : public ::testing::Test gmt << "SET1 DDX11L1" << std::endl; gmt << "SET2 CIT" << std::endl; gmt.close(); - reporter = new Reporter(std::string(path + "LOG")); + reporter = new Reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0; @@ -3721,7 +3721,7 @@ TEST(REGION_MSIGDB_SPECIAL, CHR_OVER) gtf.close(); gmt << "SET1 DDX11L1" << std::endl; gmt.close(); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0; @@ -3830,7 +3830,7 @@ TEST(REGION_MSIGDB_SPECIAL, WRONG_MSIG_NAME) gmt << "SET1 DDX11L1" << std::endl; gmt << "SET2 CIT" << std::endl; gmt.close(); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0; @@ -3862,7 +3862,7 @@ TEST(REGION_SNP_SET, INVALID_SNP_SET_NAME) snp_set.open(snp_set_name.c_str()); snp_set << "SNP_1\nSNP_2\nSNP_4\nSNP_5\n"; snp_set.close(); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0; @@ -3892,7 +3892,7 @@ TEST(REGION_SNP_SET, SNP_FILE_NOT_FOUND) { std::string snp_set_name = path + "404_set"; std::remove(snp_set_name.c_str()); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; std::vector bed_names = {}; @@ -3923,7 +3923,7 @@ TEST(REGION_SNP_SET, DUPLICATED_SET_NAME) snp_set.open(snp_set_name.c_str()); snp_set << "SNP_1\nSNP_2\nSNP_4\nSNP_5\n"; snp_set.close(); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; std::vector bed_names = {}; @@ -3955,7 +3955,7 @@ TEST(REGION_SNP_SET, VERTICAL_SNP_SET) snp_set.open(snp_set_name.c_str()); snp_set << "SNP_1\nSNP_2\nSNP_4\nSNP_5\n"; snp_set.close(); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; std::vector bed_names = {}; @@ -4053,7 +4053,7 @@ TEST(REGION_SNP_SET, MULTI_SNP_SET) snp_set << "SET_4 SNP_86 SNP_478 SNP_155 SNP_743\n"; snp_set << "SET_5 SNP_97 SNP_912 SNP_132 SNP_53\n"; snp_set.close(); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; std::vector bed_names = {}; @@ -4156,7 +4156,7 @@ TEST(REGION_SNP_SET, DUPLICATED_MULTI_SNP_SET_NAME) snp_set << "SET_4 SNP_86 SNP_478 SNP_155 SNP_743\n"; snp_set << "SET_5 SNP_97 SNP_912 SNP_132 SNP_53\n"; snp_set.close(); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; std::vector bed_names = {}; @@ -4241,7 +4241,7 @@ TEST(REGION_BACKGROUND, GTF_BACKGROUND) gmt << "SET1 DDX11L1" << std::endl; gmt << "SET2 CIT" << std::endl; gmt.close(); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0; @@ -4340,7 +4340,7 @@ TEST(REGION_BACKGROUND, GENOME_BACKGROUND) gmt << "SET1 DDX11L1" << std::endl; gmt << "SET2 CIT" << std::endl; gmt.close(); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0; @@ -4434,7 +4434,7 @@ TEST(REGION_BACKGROUND, BED_BACKGROUND) bed_file.close(); background.append(":bed"); std::vector bed_names = {bed_name}; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0; @@ -4582,7 +4582,7 @@ TEST(REGION_BACKGROUND, RANGE_BACKGROUND) "14 22104 47572\n"; bed_file.close(); background.append(":range"); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; std::vector bed_names = {bed_name}; @@ -4729,7 +4729,7 @@ TEST(REGION_BACKGROUND, GENE_NAME_BACKGROUND) gmt << "CIT CCTV" << std::endl; gmt.close(); background.append(":gene"); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; std::vector bed_names = {}; @@ -4900,7 +4900,7 @@ TEST(REGION_BACKGROUND, INVALID_FORMAT) bed_file.close(); std::vector bed_names = {bed_name}; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0; @@ -4973,7 +4973,7 @@ TEST(REGION_BACKGROUND, UNDEFINED_FORMAT) bed_file.close(); background.append(":undefined"); std::vector bed_names = {bed_name}; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0; @@ -5046,7 +5046,7 @@ TEST(REGION_BACKGROUND, MALFORMED_COLUMN) bed_file.close(); background.append(":bed"); std::vector bed_names = {bed_name}; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0; @@ -5119,7 +5119,7 @@ TEST(REGION_BACKGROUND, NEGATIVE_END) bed_file.close(); background.append(":bed"); std::vector bed_names = {bed_name}; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0; @@ -5191,7 +5191,7 @@ TEST(REGION_BACKGROUND, NEGATIVE_START) bed_file.close(); background.append(":bed"); std::vector bed_names = {bed_name}; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0; @@ -5264,7 +5264,7 @@ TEST(REGION_BACKGROUND, INVALID_END) bed_file.close(); background.append(":bed"); std::vector bed_names = {bed_name}; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0; @@ -5336,7 +5336,7 @@ TEST(REGION_BACKGROUND, INVALID_START) bed_file.close(); background.append(":bed"); std::vector bed_names = {bed_name}; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0; @@ -5409,7 +5409,7 @@ TEST(REGION_BACKGROUND, SMALLER_END) bed_file.close(); background.append(":bed"); std::vector bed_names = {bed_name}; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0; @@ -5447,7 +5447,7 @@ TEST(REGION_BACKGROUND, NOT_FOUND) std::remove(background.c_str()); background.append(":bed"); std::vector bed_names = {bed_name}; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0; @@ -5521,7 +5521,7 @@ TEST(REGION_BACKGROUND, SKIP_CHR) bed_file.close(); background.append(":bed"); std::vector bed_names = {bed_name}; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0; @@ -5653,7 +5653,7 @@ TEST(REGION_BACKGROUND, BED_BACKGROUND_STRANDED) bed_file.close(); background.append(":bed"); std::vector bed_names = {bed_name}; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 10; @@ -5823,7 +5823,7 @@ TEST(REGION_BACKGROUND, UNSTRANDED_BED_WITH_PAD) bed_file.close(); background.append(":bed"); std::vector bed_names = {bed_name}; - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 10; diff --git a/test/src/snp_test.cpp b/test/src/snp_test.cpp index b655d86e..71f39da1 100644 --- a/test/src/snp_test.cpp +++ b/test/src/snp_test.cpp @@ -607,7 +607,7 @@ class SNP_REGION : public ::testing::Test "ENSG00000223973 ENSG00000255790 ENSG00000122966 " << std::endl; gmt.close(); - Reporter reporter(std::string(path + "LOG")); + Reporter reporter(std::string(path + "LOG"), 60, true); std::vector feature = {"exon", "gene", "protein_coding", "CDS"}; size_t window_5 = 0;