config.yml

#
# microbetag parameters
#

# Run as a container - if True then the IO path will be /mnt. If false please provide the input - output path 
approach: 
  container: False
  io_path: /home/luna.kuleuven.be/u0156635/github_repos/KU/microbetag/tests/ # if docker : null

# OUTPUT
# ------
output_directory: kostas # output_dev_dada2_may # output_dev_qiime2

# OTU TABLE
# ----------

# Filename of your OTU table
otu_table: konst_test.tsv # dada2_use_case.tsv # qiime2_use_case.tsv   # otu_table_silva132_partial.tsv   #  table.from_biom_w_taxonomy.txt 

# Delimeter used in the OTU table file 
otu_table_delim: \t

# Taxonomy. 
# Set it to "GTDB" in case you have used the GTDB for your bins.
# Set it to "qiime2" or "dada2" in case you have used one of those
# Set it to "any" in case none of the previous apply
# REMEMBER! In all cases, you need to provide a 7-level taxonomy as described in the "how to"
taxonomy: dada2  # qiime2

# Column name denoting the OTU id; if special characters, for example "#", use double quotes 
otu_identifier_column: "ASV_ID"     # qiime2 -> "#OTU ID" dada2 -> "ASV_id"

# Column name denoting the taxonomy assignment of the OTU 
taxonomy_column_name : Taxonomy

# Delimeter in the taxonomy column 
taxonomy_delimeter: ";"

# Character denoting commented lines if any in the OTU table
comments_character: "#"

# [ATTENTION! dada2 and qiime2 tests highlight difference] Column names are in the last comment line prior to any
# The FAPROTAX step will break if this is not set properly
column_names_are_in  : False # dada2 -> False   qiime2 -> True


# EDGE LIST
# ---------

# If co-occurrence network already available, please provide its edge list; a 2 column tab separated file
edge_list:     #tests/output_dev_qiime2/flashweave/network_detailed_output.edgelist

# METADATA 
# --------
metadata_file:


# STEPS 
# --------
PhenDB: True
FAPROTAX: True
# BugBase can be quite tricky to install in case you are not using the container option. 
# If you are still to use it, please make sure you have the $BUGBASE_PATH environmental variable as in the installation.sh script.
# Further, you need to remember that BugBase only works with GreenGenes IDs; you can get those using OTU pickers such as https://github.com/GabeAl/NINJA-OPS
# If no GreenGenes IDs for OTUs or IMG IDs for shotgun data are present, microbetag will fail (https://github.com/knights-lab/BugBase/issues/2)
BugBase: True
NetCooperate: True
pathway_complementarity: False


# ------
# TOOLS
# ------

# Flashweave
flashweave_opt:

  # Else, microbetag will invoke flashweave to build a co-occurrence network; fill in its paratemeters
  # If edge_list is not empty, please skip this section
  algorithmic_parameters:

    --heterogeneous:                            # enable heterogeneous mode for multi-habitat or -protocol data 
                                                # with at least thousands of samples (FlashWeaveHE)
    --sensitive:                                # enable fine-grained associations (FlashWeave-S, FlashWeaveHE-S), 
                                                # sensitive=false results in the fast modes FlashWeave-F or FlashWeaveHE-F
    --max_k:                                    # maximum size of conditioning sets, high values can strongly increase 
                                                # runtime. max_k=0 results in no conditioning (univariate mode)
    --alpha:                                    # threshold used to determine statistical significance

    --conv:                                     # convergence threshold, i.e. if conv=0.01 assume convergence 
                                                # if the number of edges increased by only 1% after 100% more runtime (checked in intervals)
    --feed_forward:                             # enable feed-forward heuristic

    --max_tests:                                # maximum number of conditional tests that should be performed on a variable pair 
                                                # before association is assumed

    --hps:                                      # reliability criterion for statistical tests when sensitive=false

    --FDR:                                      # perform False Discovery Rate correction (Benjamini-Hochberg method) 
                                                # on pairwise associations
    --n_obs_min:                                # don't compute associations between variables having less reliable samples 
                                                # (i.e. non-zero if heterogeneous=true) than this number. 
                                                # -1: automatically choose a threshold.
    --time_limit:                               # if feed-forward heuristic is active, determines the interval (seconds) 
                                                # at which neighborhood information is updated

  general_parameters:
    --normalize:                     # automatically choose and perform data normalization (based on sensitive and heterogeneous)
    --track_rejections:              # store for each discarded edge, which variable set  lead to its exclusion (can be memory intense for large networks)
    --verbose:                       # print progress information
    --transposed:                    # if true, rows of data are variables and columns are samples
    --prec:                          # precision in bits to use for calculations (16, 32, 64 or 128)
    --make_sparse:                   # use a sparse data representation (should be left at true in almost all cases)


# FAPROTAX optional parameters
faprotax_opt: 
  --force: False


# BugBase optional parameters
# [ATTENTION!] If no GreenGenes IDs for OTUs or IMG IDs for shotgun data are present, microbetag will fail (https://github.com/knights-lab/BugBase/issues/2)
bugbase_opt:
  
  -c: taxonomy  # Map column header to plot by (which column denotes treatment groups)
  -w:           # Data is shotgun metagenomic data (picked against IMG database)
  -a:           # Plot all samples (no stats will be run)
  -x:           # Output prediction files only, no plots will be made
  -g:           # Specify subset of groups in map column to plot (list, comma separated)
  -z:           # Data is of type continuous 
  -C:           # Use coefficient of variance instead of variance to determine thresholds
  -l:           # Centered log-ratio transform the data instead of using relative abundance
  -t: 3           # Taxa level to plot OTU contributions by (number 1-7)
  -T:           # Specify a threshold to use for all traits (number 0-1)
  -k:           # Use the KEGG modules instead of default traits (Note: you must specify which modules!)
  -p:           # List modules or traits to predict (comma separated list, no spaces)
  -u:           # Use a user-define trait table. Absolute file path must be specified
  -m:         # give mapping file, example file under the tests/ folder


# microbetag needs to link the species level taxonomy assignments 
# to a NCBI Taxonomy id. If Silva is the reference taxonomy database used to get the OTU table, 
# then microbetag will do that automatically. 
# In case silva_db is False, the user needs to have an extra column 
# on the OTU table with the NCBI taxonomy id of the species found.
# Assignmnets that are not at the species level should filled with "null".
silva_db: True