-
Notifications
You must be signed in to change notification settings - Fork 1
/
config.yml
148 lines (113 loc) · 7.23 KB
/
config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#
# microbetag parameters
#
# Run as a container - if True then the IO path will be /mnt. If false please provide the input - output path
approach:
container: False
io_path: /home/luna.kuleuven.be/u0156635/github_repos/KU/microbetag/tests/ # if docker : null
# OUTPUT
# ------
output_directory: kostas # output_dev_dada2_may # output_dev_qiime2
# OTU TABLE
# ----------
# Filename of your OTU table
otu_table: konst_test.tsv # dada2_use_case.tsv # qiime2_use_case.tsv # otu_table_silva132_partial.tsv # table.from_biom_w_taxonomy.txt
# Delimeter used in the OTU table file
otu_table_delim: \t
# Taxonomy.
# Set it to "GTDB" in case you have used the GTDB for your bins.
# Set it to "qiime2" or "dada2" in case you have used one of those
# Set it to "any" in case none of the previous apply
# REMEMBER! In all cases, you need to provide a 7-level taxonomy as described in the "how to"
taxonomy: dada2 # qiime2
# Column name denoting the OTU id; if special characters, for example "#", use double quotes
otu_identifier_column: "ASV_ID" # qiime2 -> "#OTU ID" dada2 -> "ASV_id"
# Column name denoting the taxonomy assignment of the OTU
taxonomy_column_name : Taxonomy
# Delimeter in the taxonomy column
taxonomy_delimeter: ";"
# Character denoting commented lines if any in the OTU table
comments_character: "#"
# [ATTENTION! dada2 and qiime2 tests highlight difference] Column names are in the last comment line prior to any
# The FAPROTAX step will break if this is not set properly
column_names_are_in : False # dada2 -> False qiime2 -> True
# EDGE LIST
# ---------
# If co-occurrence network already available, please provide its edge list; a 2 column tab separated file
edge_list: #tests/output_dev_qiime2/flashweave/network_detailed_output.edgelist
# METADATA
# --------
metadata_file:
# STEPS
# --------
PhenDB: True
FAPROTAX: True
# BugBase can be quite tricky to install in case you are not using the container option.
# If you are still to use it, please make sure you have the $BUGBASE_PATH environmental variable as in the installation.sh script.
# Further, you need to remember that BugBase only works with GreenGenes IDs; you can get those using OTU pickers such as https://github.com/GabeAl/NINJA-OPS
# If no GreenGenes IDs for OTUs or IMG IDs for shotgun data are present, microbetag will fail (https://github.com/knights-lab/BugBase/issues/2)
BugBase: True
NetCooperate: True
pathway_complementarity: False
# ------
# TOOLS
# ------
# Flashweave
flashweave_opt:
# Else, microbetag will invoke flashweave to build a co-occurrence network; fill in its paratemeters
# If edge_list is not empty, please skip this section
algorithmic_parameters:
--heterogeneous: # enable heterogeneous mode for multi-habitat or -protocol data
# with at least thousands of samples (FlashWeaveHE)
--sensitive: # enable fine-grained associations (FlashWeave-S, FlashWeaveHE-S),
# sensitive=false results in the fast modes FlashWeave-F or FlashWeaveHE-F
--max_k: # maximum size of conditioning sets, high values can strongly increase
# runtime. max_k=0 results in no conditioning (univariate mode)
--alpha: # threshold used to determine statistical significance
--conv: # convergence threshold, i.e. if conv=0.01 assume convergence
# if the number of edges increased by only 1% after 100% more runtime (checked in intervals)
--feed_forward: # enable feed-forward heuristic
--max_tests: # maximum number of conditional tests that should be performed on a variable pair
# before association is assumed
--hps: # reliability criterion for statistical tests when sensitive=false
--FDR: # perform False Discovery Rate correction (Benjamini-Hochberg method)
# on pairwise associations
--n_obs_min: # don't compute associations between variables having less reliable samples
# (i.e. non-zero if heterogeneous=true) than this number.
# -1: automatically choose a threshold.
--time_limit: # if feed-forward heuristic is active, determines the interval (seconds)
# at which neighborhood information is updated
general_parameters:
--normalize: # automatically choose and perform data normalization (based on sensitive and heterogeneous)
--track_rejections: # store for each discarded edge, which variable set lead to its exclusion (can be memory intense for large networks)
--verbose: # print progress information
--transposed: # if true, rows of data are variables and columns are samples
--prec: # precision in bits to use for calculations (16, 32, 64 or 128)
--make_sparse: # use a sparse data representation (should be left at true in almost all cases)
# FAPROTAX optional parameters
faprotax_opt:
--force: False
# BugBase optional parameters
# [ATTENTION!] If no GreenGenes IDs for OTUs or IMG IDs for shotgun data are present, microbetag will fail (https://github.com/knights-lab/BugBase/issues/2)
bugbase_opt:
-c: taxonomy # Map column header to plot by (which column denotes treatment groups)
-w: # Data is shotgun metagenomic data (picked against IMG database)
-a: # Plot all samples (no stats will be run)
-x: # Output prediction files only, no plots will be made
-g: # Specify subset of groups in map column to plot (list, comma separated)
-z: # Data is of type continuous
-C: # Use coefficient of variance instead of variance to determine thresholds
-l: # Centered log-ratio transform the data instead of using relative abundance
-t: 3 # Taxa level to plot OTU contributions by (number 1-7)
-T: # Specify a threshold to use for all traits (number 0-1)
-k: # Use the KEGG modules instead of default traits (Note: you must specify which modules!)
-p: # List modules or traits to predict (comma separated list, no spaces)
-u: # Use a user-define trait table. Absolute file path must be specified
-m: # give mapping file, example file under the tests/ folder
# microbetag needs to link the species level taxonomy assignments
# to a NCBI Taxonomy id. If Silva is the reference taxonomy database used to get the OTU table,
# then microbetag will do that automatically.
# In case silva_db is False, the user needs to have an extra column
# on the OTU table with the NCBI taxonomy id of the species found.
# Assignmnets that are not at the species level should filled with "null".
silva_db: True