Skip to content

Commit 70a0ec5

Browse files
committed
Update LICENSE
1 parent 3195a37 commit 70a0ec5

7 files changed

Lines changed: 876 additions & 36 deletions

File tree

LICENSE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
The MIT License (MIT)
2+
13
Copyright (c) 2017 Christof Angermueller
24

35
Permission is hereby granted, free of charge, to any person obtaining a copy

deepcpg/models/cpg.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""CpG models.
22
3-
Provides models trained observed neighboring methylation states of multiple
3+
Provides models trained with observed neighboring methylation states of multiple
44
cells.
55
"""
66

@@ -18,6 +18,7 @@
1818

1919

2020
class CpgModel(Model):
21+
"""Abstract class of a CpG model."""
2122

2223
def __init__(self, *args, **kwargs):
2324
super(CpgModel, self).__init__(*args, **kwargs)
@@ -34,8 +35,12 @@ def _merge_inputs(self, inputs):
3435
return kl.merge(inputs, mode='concat', concat_axis=2)
3536

3637

37-
class DenseAvg(CpgModel):
38-
"""54000 params"""
38+
class FcAvg(CpgModel):
39+
"""Fully-connected layer followed by global average layer.
40+
41+
Parameters: 54,000
42+
Specification: fc[512]_gap
43+
"""
3944

4045
def _replicate_model(self, input):
4146
w_reg = kr.WeightRegularizer(l1=self.l1_decay, l2=self.l2_decay)
@@ -57,7 +62,11 @@ def __call__(self, inputs):
5762

5863

5964
class RnnL1(CpgModel):
60-
"""810000 parameters"""
65+
"""Bidirectional GRU with one layer.
66+
67+
Parameters: 810,000
68+
Specification: fc[256]_bgru[256]_do
69+
"""
6170

6271
def __init__(self, act_replicate='relu', *args, **kwargs):
6372
super(RnnL1, self).__init__(*args, **kwargs)
@@ -85,7 +94,11 @@ def __call__(self, inputs):
8594

8695

8796
class RnnL2(RnnL1):
88-
"""1112069 params"""
97+
"""Bidirectional GRU with two layers.
98+
99+
Parameters: 1,100,000
100+
Specification: fc[256]_bgru[128]_bgru[256]_do
101+
"""
89102

90103
def __call__(self, inputs):
91104
x = self._merge_inputs(inputs)

deepcpg/models/dna.py

Lines changed: 33 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@ def inputs(self, dna_wlen):
2727

2828

2929
class CnnL1h128(DnaModel):
30-
"""CNN with one convolutional and one hidden layer with 128 units.
30+
"""CNN with one convolutional and one fully-connected layer with 128 units.
3131
32-
Specification: conv[128@11]_mp[4]_fc[128]_do[0.0]
33-
Parameters: 4.100.000
32+
Parameters: 4,100,000
33+
Specification: conv[128@11]_mp[4]_fc[128]_do
3434
"""
3535

3636
def __init__(self, nb_hidden=128, *args, **kwargs):
@@ -56,10 +56,10 @@ def __call__(self, inputs):
5656

5757

5858
class CnnL1h256(CnnL1h128):
59-
"""CNN with one convolutional and one hidden layer with 256 units.
59+
"""CNN with one convolutional and one fully-connected layer with 256 units.
6060
61-
Specification: conv[128@11]_mp[4]_fc[256]_do[0.0]
62-
Parameters: 8.100.000
61+
Parameters: 8,100,000
62+
Specification: conv[128@11]_mp[4]_fc[256]_do
6363
"""
6464

6565
def __init__(self, *args, **kwargs):
@@ -68,10 +68,10 @@ def __init__(self, *args, **kwargs):
6868

6969

7070
class CnnL2h128(DnaModel):
71-
"""CNN with two convolutional and one hidden layer with 128 units.
71+
"""CNN with two convolutional and one fully-connected layer with 128 units.
7272
73-
Specification: conv[128@11]_mp[4]_conv[256@3]_mp[2]_fc[128]_do[0.0]
74-
Parameters: 4.100.000
73+
Parameters: 4,100,000
74+
Specification: conv[128@11]_mp[4]_conv[256@3]_mp[2]_fc[128]_do
7575
"""
7676

7777
def __init__(self, nb_hidden=128, *args, **kwargs):
@@ -102,10 +102,10 @@ def __call__(self, inputs):
102102

103103

104104
class CnnL2h256(CnnL2h128):
105-
"""CNN with two convolutional and one hidden layer with 256 units.
105+
"""CNN with two convolutional and one fully-connected layer with 256 units.
106106
107-
Specification: conv[128@11]_mp[4]_conv[256@3]_mp[2]_fc[256]_do[0.0]
108-
Parameters: 8.100.000
107+
Parameters: 8,100,000
108+
Specification: conv[128@11]_mp[4]_conv[256@3]_mp[2]_fc[256]_do
109109
"""
110110

111111
def __init__(self, *args, **kwargs):
@@ -114,11 +114,11 @@ def __init__(self, *args, **kwargs):
114114

115115

116116
class CnnL3h128(DnaModel):
117-
"""CNN with three convolutional and one hidden layer with 128 units.
117+
"""CNN with three convolutional and one fully-connected layer with 128 units.
118118
119+
Parameters: 4,400,000
119120
Specification: conv[128@11]_mp[4]_conv[256@3]_mp[2]_conv[512@3]_mp[2]_
120-
fc[128]_do[0.0]
121-
Parameters: 4.400.000
121+
fc[128]_do
122122
"""
123123

124124
def __init__(self, nb_hidden=128, *args, **kwargs):
@@ -154,11 +154,11 @@ def __call__(self, inputs):
154154

155155

156156
class CnnL3h256(CnnL3h128):
157-
"""CNN with three convolutional and one hidden layer with 256 units.
157+
"""CNN with three convolutional and one fully-connected layer with 256 units.
158158
159+
Parameters: 8,300,000
159160
Specification: conv[128@11]_mp[4]_conv[256@3]_mp[2]_conv[512@3]_mp[2]_
160-
fc[256]_do[0.0]
161-
Parameters: 8.300.000
161+
fc[256]_do
162162
"""
163163

164164
def __init__(self, *args, **kwargs):
@@ -172,8 +172,9 @@ class CnnRnn01(DnaModel):
172172
Convolutional-recurrent model with two convolutional layers followed by a
173173
bidirectional GRU layer.
174174
175-
Specification: conv[128@11]_pool[4]_conv[256@7]_pool[4]_bGRU[256]_do[0.0]
176-
Parameters: 1.100.000"""
175+
Parameters: 1,100,000
176+
Specification: conv[128@11]_pool[4]_conv[256@7]_pool[4]_bgru[256]_do
177+
"""
177178

178179
def __call__(self, inputs):
179180
x = inputs[0]
@@ -196,9 +197,10 @@ def __call__(self, inputs):
196197

197198

198199
class ResNet01(DnaModel):
199-
"""Residual network with 3x2 bottleneck residual units.
200+
"""Residual network with bottleneck residual units.
200201
201-
Parameters: 1.700.000
202+
Parameters: 1,700,000
203+
Specification: conv[128@11]_mp[2]_resb[2x128|2x256|2x512|1x1024]_gap_do
202204
203205
He et al., 'Identity Mappings in Deep Residual Networks.'
204206
"""
@@ -289,9 +291,10 @@ def __call__(self, inputs):
289291

290292

291293
class ResNet02(ResNet01):
292-
"""Residual network with 3x3 bottleneck residual units.
294+
"""Residual network with bottleneck residual units.
293295
294-
Parameters: 2.000.000
296+
Parameters: 2,000,000
297+
Specification: conv[128@11]_mp[2]_resb[3x128|3x256|3x512|1x1024]_gap_do
295298
296299
He et al., 'Identity Mappings in Deep Residual Networks.'
297300
"""
@@ -333,9 +336,10 @@ def __call__(self, inputs):
333336

334337

335338
class ResConv01(ResNet01):
336-
"""Residual network with two convolutional layers in each residual units.
339+
"""Residual network with two convolutional layers in each residual unit.
337340
338-
Parameters: 2.800.000
341+
Parameters: 2,800,000
342+
Specification: conv[128@11]_mp[2]_resc[2x128|1x256|1x256|1x512]_gap_do
339343
340344
He et al., 'Identity Mappings in Deep Residual Networks.'
341345
"""
@@ -420,7 +424,8 @@ class ResAtrous01(DnaModel):
420424
units. Atrous convolutional layers allow to increase the receptive field and
421425
hence better model long-range dependencies.
422426
423-
Parameters: 2.000.000
427+
Parameters: 2,000,000
428+
Specification: conv[128@11]_mp[2]_resa[3x128|3x256|3x512|1x1024]_gap_do
424429
425430
He et al., 'Identity Mappings in Deep Residual Networks.'
426431
Yu and Koltun, 'Multi-Scale Context Aggregation by Dilated Convolutions.'
@@ -518,7 +523,7 @@ def __call__(self, inputs):
518523
def list_models():
519524
models = dict()
520525
for name, value in globals().items():
521-
if inspect.isclass(value) and name.lower().find('model') == 0:
526+
if inspect.isclass(value) and name.lower().find('model') == -1:
522527
models[name] = value
523528
return models
524529

deepcpg/models/joint.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
1+
"""Joint models.
2+
3+
Provides models two join features of DNA and CpG model.
4+
"""
15
from __future__ import division
26
from __future__ import print_function
37

8+
import inspect
9+
410
from keras import layers as kl
511
from keras import models as km
612
from keras import regularizers as kr
@@ -39,12 +45,21 @@ def _build(self, models, layers=[]):
3945

4046

4147
class JointL0(JointModel):
48+
"""Concatenates inputs without trainable layers.
49+
50+
Parameters: 0
51+
"""
4252

4353
def __call__(self, models):
4454
return self._build(models)
4555

4656

4757
class JointL1h512(JointModel):
58+
"""One fully-connected layer with 512 units.
59+
60+
Parameters: 524,000
61+
Specification: fc[512]
62+
"""
4863

4964
def __init__(self, nb_layer=1, nb_hidden=512, *args, **kwargs):
5065
super(JointL1h512, self).__init__(*args, **kwargs)
@@ -64,18 +79,36 @@ def __call__(self, models):
6479

6580

6681
class JointL2h512(JointL1h512):
82+
"""Two fully-connected layers with 512 units.
83+
84+
Parameters: 786,000
85+
Specification: fc[512]_fc[512]
86+
"""
6787

6888
def __init__(self, *args, **kwargs):
6989
super(JointL2h512, self).__init__(*args, **kwargs)
7090
self.nb_layer = 2
7191

7292

7393
class JointL3h512(JointL1h512):
94+
"""Three fully-connected layers with 512 units.
95+
96+
Parameters: 1,000,000
97+
Specification: fc[512]_fc[512]_fc[512]
98+
"""
7499

75100
def __init__(self, *args, **kwargs):
76101
super(JointL3h512, self).__init__(*args, **kwargs)
77102
self.nb_layer = 3
78103

79104

105+
def list_models():
106+
models = dict()
107+
for name, value in globals().items():
108+
if inspect.isclass(value) and name.lower().find('model') == -1:
109+
models[name] = value
110+
return models
111+
112+
80113
def get(name):
81114
return get_from_module(name, globals())

docs/source/modules.md

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# Module architectures
2+
3+
DeepCpG consists of a DNA module to recognize features in the DNA sequence, a CpG module to recognize features in the methylation neighborhood of multiple cells, and joint module to combine the features from the DNA and CpG module.
4+
5+
DeepCpG provides different architectures for the DNA, CpG, and joint module. Architectures differ in the number of layers and neurons, and are hence more or less complex. More complex modules are usually more accurate, but more expensive to train. You can select a certain architecture using the `--dna_module`, `--cpg_model`, and `--joint_model` argument of `dcpg_train.py`, for example:
6+
7+
```
8+
dcpg_train.py
9+
--dna_module CnnL2h128
10+
--cpg_module RnnL1
11+
--joint_module JointL2h512
12+
```
13+
14+
In the following, the following layer specifications will be used:
15+
16+
| Specification | Description |
17+
|---------------|--------------------------------------------------------------------------|
18+
| conv[x@y] | Convolutional layer with x filters of size y |
19+
| mp[x] | Max-pooling layer with size x |
20+
| fc[x] | Full-connected layer with x units |
21+
| do | Dropout layer |
22+
| bgru[x] | Bidirectional GRU with x units |
23+
| gap | Global average pooling layer |
24+
| resb[x,y,z] | Residual network with three bottleneck residual units of size x, y, z |
25+
| resc[x,y,z] | Residual network with three convolutional residual units of size x, y, z |
26+
| resa[x,y,z] | Residual network with three Atrous residual units of size x, y, z |
27+
28+
29+
## DNA modules
30+
31+
| Name | Parameters | Specification |
32+
|-------------|------------|-------------------------------------------------------------------|
33+
| CnnL1h128 | 4,100,000 | conv[128@11]_mp[4]_fc[128]_do |
34+
| CnnL1h256 | 8,100,000 | conv[128@11]_mp[4]_fc[256]_do |
35+
| CnnL2h128 | 4,100,000 | conv[128@11]_mp[4]_conv[256@3]_mp[2]_fc[128]_do |
36+
| CnnL2h256 | 8,100,000 | conv[128@11]_mp[4]_conv[256@3]_mp[2]_fc[256]_do |
37+
| CnnL3h128 | 4,400,000 | conv[128@11]_mp[4]_conv[256@3]_mp[2]_conv[512@3]_mp[2]_fc[128]_do |
38+
| CnnL3h256 | 8,300,000 | conv[128@11]_mp[4]_conv[256@3]_mp[2]_conv[512@3]_mp[2]_fc[128]_do |
39+
| CnnRnn01 | 1,100,000 | conv[128@11]_pool[4]_conv[256@7]_pool[4]_bgru[256]_do |
40+
| ResNet01 | 1,700,000 | conv[128@11]_mp[2]_resb[2x128|2x256|2x512|1x1024]_gap_do |
41+
| ResNet02 | 2,000,000 | conv[128@11]_mp[2]_resb[3x128|3x256|3x512|1x1024]_gap_do |
42+
| ResConv01 | 2,800,000 | conv[128@11]_mp[2]_resc[2x128|1x256|1x256|1x512]_gap_do |
43+
| ResAtrous01 | 2,000,000 | conv[128@11]_mp[2]_resa[3x128|3x256|3x512|1x1024]_gap_do |
44+
45+
Th prefixes `Cnn`, `CnnRnn`, `ResNet`, `ResConv`, and `ResAtrous` denote the class of the DNA module.
46+
47+
Modules starting with `Cnn` are convolutional neural networks (CNNs). DeepCpG CNN architectures consist of a series of convolutional and max-pooling layers, which are followed by one fully-connected layer. Module `CnnLxhy` has `x` convolutional-pooling layers, and one fully-connected layer with `y` units. For example, `CnnL2h128` has two convolutional layers, and one fully-connected layer with 128 units. `CnnL3h256` has three convolutional layers and one fully-connected layer with 256 units. `CnnL1h128` is the fastest module, but modules with more layers and neurons usually perform better. In my experiments, `CnnL2h128` provided a good trade-off between performance and runtime, which I recommend as default.
48+
49+
`CnnRnn01` is a [convolutional-recurrent neural network](http://nar.oxfordjournals.org/content/44/11/e107). It consists of two convolutional-pooling layers, which are followed by a bidirectional recurrent neural network (RNN) with one layer and gated recurrent units (GRUs). `CnnRnn01` is slower than `Cnn` architectures and did not perform better in my experiments.
50+
51+
Modules starting with `ResNet` are [residual neural networks](https://arxiv.org/abs/1603.05027). ResNets are very deep networks with skip connections to improve the gradient flow and to allow learning how many layers to use. A residual network consists of multiple residual blocks, and each residual block consists of multiple residual units. Residual units have a bottleneck architecture with three convolutional layers to speed up computations. `ResNet01` and `ResNet02` have three residual blocks with two and three residual units, respectively. ResNets are slower than CNNs, but can perform better on large datasets.
52+
53+
Modules starting with `ResConv` are ResNets with modified residual units that have two convolutional layers instead of a bottleneck architecture. `ResConv` modules performed worse than `ResNet` modules in my experiments.
54+
55+
Modules starting with `ResAtrous` are ResNets with modified residual units that use [Atrous convolutional layers](http://arxiv.org/abs/1511.07122) instead of normal convolutional layers. Atrous convolutional layers have dilated filters, i.e. filters with 'holes', which allow scanning wider regions in the inputs sequence and thereby better capturing distant patters in the DNA sequence. However, `ResAtrous` modules performed worse than `ResNet` modules in my experiments
56+
57+
58+
## CpG modules
59+
60+
| Name | Parameters | Specification |
61+
|-------|------------|--------------------------------|
62+
| FcAvg | 54,000 | fc[512]_gap |
63+
| RnnL1 | 810,000 | fc[256]_bgru[256]_do |
64+
| RnnL2 | 1,100,000 | fc[256]_bgru[128]_bgru[256]_do |
65+
66+
`FcAvg` is a lightweight module with only 54000 parameters, which first transforms observed neighboring CpG sites of all cells independently, and than averages the transformed features across cells. `FcAvg` is very fast, but performs worse than RNN modules.
67+
68+
`Rnn` modules consists of bidirectional recurrent neural networks (RNNs) with gated recurrent units (GRUs) to summarize the methylation neighborhood of cells in a more clever way than averaging. `RnnL1` consists of one fully-connected layer with 256 units to transform the methylation neighborhood of each cell independently, and one bidirectional GRU with 2x256 units to summarize the transformed methylation neighborhood of cells. `RnnL2` has two instead of one GRU layer. `RnnL1` is faster and performed as good as `RnnL2` in my experiments.
69+
70+
71+
## Joint modules
72+
73+
| Name | Parameters | Specification |
74+
|-------------|------------|----------------------------------------|
75+
| JointL0 | 0 | |
76+
| JointL1h512 | 524,000 | fc[512] |
77+
| JointL2h512 | 786,000 | fc[512]_fc[512] |
78+
| JointL3h512 | 1,000,000 | Specification: fc[512]_fc[512]_fc[512] |
79+
80+
Joint modules join the feature from the DNA and CpG module. `JointL0` simply concatenates the features and has no learnable parameters (ultra fast). `JointLXh512` has `X` fully-connect layers with 512 neurons. Modules with more layers usually perform better, at the cost of a higher runtime. I recommend using `JointL2h512` or `JointL3h12`.

0 commit comments

Comments
 (0)