-
Notifications
You must be signed in to change notification settings - Fork 0
/
config-setup.txt
executable file
·133 lines (101 loc) · 3.97 KB
/
config-setup.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
## =============== ##
## CONFIG FILE ##
## =============== ##
# This file consists of variables one needs to change to suit their dataset
# It mainly consists of file paths and data specific features. Some defaults
# are given for you, but please check all entries.
## --------------- ##
## GLOBALLY NEEDED ##
## --------------- ##
# Full file path to the root of this repository.
# Ensure to surround in quotes
REPO_DIR=""
# Base directory to send processed data to.
BASE_DIR=""
cell_type="bulk"
# Full file path to conda.sh found in your conda's installation directory
# specifically in path/to/conda/etc/profile.d/conda.sh
CONDA_SHELL=""
## -------------- ##
## ONT AND BS-SEQ ##
## -------------- ##
# Bed file locations
ONT_bed_file_location=
WGBS_bed_file_location=
oxBS_bed_file_location=
# See README.md for details
CPG_ISLANDS_REFERENCE="${REPO_DIR}/references/cpg_islands_hg19.bed.gz"
# Thresholds on the read depth considered for filtering input bed files
# Max read depth is included due to
# [this paper](https://doi.org/10.1186/s13059-024-03207-9)
# If you don't believe this finding, just set this to be a really big number.
minimum_read_depth=30
maximum_read_depth=10000
# Thresholds for creating 'good reference datasets' for obtaining value of p
# that is used in binomial distribution (look at README.md for more info)
# This is split into thresholds for 5mC and 5hmC as 5hmC usually has lower
# read depth and less sites at high levels of hydroxymethylation (for ONT data).
reference_read_depth_threshold_m=50
reference_percentage_threshold_m=5
reference_read_depth_threshold_h=50
reference_percentage_threshold_h=5
# Default is very strict, loosen this if you want a more lax definition of
# '(hydroxy)methylated'
binomial_threshold=1.7e-8
# This is the bin size that will be used by ChromHMM. Ensure you use the same
# value when using ChromHMM. (ChromHMM default is also 200bp)
BIN_SIZE=200
# file path to chromosome sizes for assembly.
# This file is expected to be of the form "chromosome \t size"
# These files are included in the installation of ChromHMM, not using these
# files might cause some errors.
chromosome_sizes=
# single quoted value
# m -> methylation, h -> hydroxymethylation
mark="m"
# Specifically for extracting 5hmC, this is the significance level to use with
# confidence intervals. Please see README.md for more information.
# WARNING: Going for anything more strict than a 99% confidence interval will
# result in zero hydroxymethylation signal unless you have very high average
# read depth.
significance_level=0.05
# The threshold used by the beta distribution when determining densely
# methylated bins
beta_threshold=0.001
## --------------------- ##
## CHIP_SEQ BINARIZATION ##
## --------------------- ##
# This will appear as the epigenetic mark name in the resultant binary file
epigenetic_mark_name=
input_MACS_file=
## ------------- ##
## SUPPLEMENTARY ##
## ------------- ##
# bed file you wish to analyse further in supplementary scripts
bed_file_location="${ONT_bed_file_location}"
## ......................... ##
## Plotting error rates ##
## ......................... ##
# In some datasets, the maximum value for the read depth of a single site
# can be in the thousands. However, most sites are well below this number,
# putting a maximum value here helps with interpretability of plots.
max_N_value=750
## .................. ##
## CpG robustness ##
## .................. ##
# Minimum distance between CpGs that you would consider 'close proximity'
min_distance=0
# Maximum distance between CpGs that you would consider 'close proximity'
max_distance=30
## ============== ##
## PARAMETERS ##
## ============== ##
# Ideally these don't need to be touched. You can change them if you wish
# however.
LOG_DIR=${BASE_DIR}/LogFiles
RSCRIPT_DIR="${REPO_DIR}/Rscripts"
FUNCTIONS_DIR="${REPO_DIR}/functions"
AWK_DIR="${REPO_DIR}/AWKscripts"
BINARY_DIR="${BASE_DIR}/BinaryFiles"
# Change to 1 to keep certain intermediate files and print debug logs
DEBUG_MODE=0