@@ -74,20 +74,20 @@ def _unfold(a: np.ndarray, window: int, axis: int):
7474
7575
7676def _get_hydrogen_atom_position (coord : np .ndarray ) -> np .ndarray :
77- """Fills in hydrogen atoms positions if they are abscent , under the
77+ """Fills in hydrogen atoms positions if they are absent , under the
7878 assumption that C-N-H and H-N-CA angles are perfect 120 degrees,
7979 and N-H bond length is 1.01 A.
8080
8181 Parameters
8282 ----------
8383 coord : np.ndarray
84- input coordinates in Angstrom, shape (n_atoms , 4, 3),
84+ input coordinates in Angstrom, shape (n_residues , 4, 3),
8585 where second axes corresponds to (N, CA, C, O) atom coordinates
8686
8787 Returns
8888 -------
8989 np.ndarray
90- coordinates of additional hydrogens, shape (n_atoms -1, 3)
90+ coordinates of additional hydrogens, shape (n_residues -1, 3)
9191
9292 .. versionadded:: 2.8.0
9393 """
@@ -118,6 +118,7 @@ def _get_hydrogen_atom_position(coord: np.ndarray) -> np.ndarray:
118118
119119def get_hbond_map (
120120 coord : np .ndarray ,
121+ donor_mask : np .ndarray = None ,
121122 cutoff : float = DEFAULT_CUTOFF ,
122123 margin : float = DEFAULT_MARGIN ,
123124 return_e : bool = False ,
@@ -128,8 +129,15 @@ def get_hbond_map(
128129 ----------
129130 coord : np.ndarray
130131 input coordinates in either (n, 4, 3) or (n, 5, 3) shape
131- (without or with hydrogens). If hydrogens are not present, then
132- ideal positions (see :func:_get_hydrogen_atom_positions) are used.
132+ (without or with hydrogens respectively), where ``n`` is number of residues.
133+ If hydrogens are not present, then ideal positions (see :func:_get_hydrogen_atom_positions)
134+ are used.
135+ donor_mask : np.array
136+ Mask out any hydrogens that should not be considered (in particular HN
137+ in PRO). If ``None`` then all H will be used (behavior up to 2.10.0).
138+
139+ .. versionadded:: 2.10.0
140+
133141 cutoff : float, optional
134142 cutoff, by default DEFAULT_CUTOFF
135143 margin : float, optional
@@ -144,8 +152,12 @@ def get_hbond_map(
144152
145153
146154 .. versionadded:: 2.8.0
155+
156+ .. versionchanged:: 2.10.0
157+ Support masking of hydrogen donors via `donor_mask` (especially needed
158+ for ignoring HN on proline residues). Backport of PRO fix from pydssp 0.9.1.
147159 """
148- n_atoms , n_atom_types , _ = coord .shape
160+ n_residues , n_atom_types , _xyz = coord .shape
149161 assert n_atom_types in (
150162 4 ,
151163 5 ,
@@ -161,13 +173,13 @@ def get_hbond_map(
161173 "Number of atoms should be 4 (N,CA,C,O) or 5 (N,CA,C,O,H)"
162174 )
163175 # after this:
164- # h.shape == (n_atoms , 3)
165- # coord.shape == (n_atoms , 4, 3)
176+ # h.shape == (n_residues , 3)
177+ # coord.shape == (n_residues , 4, 3)
166178
167179 # distance matrix
168180 n_1 , c_0 , o_0 = coord [1 :, 0 ], coord [0 :- 1 , 2 ], coord [0 :- 1 , 3 ]
169181
170- n = n_atoms - 1
182+ n = n_residues - 1
171183 cmap = np .tile (c_0 , (n , 1 , 1 ))
172184 omap = np .tile (o_0 , (n , 1 , 1 ))
173185 nmap = np .tile (n_1 , (1 , 1 , n )).reshape (n , n , 3 )
@@ -191,18 +203,32 @@ def get_hbond_map(
191203 return e
192204
193205 # mask for local pairs (i,i), (i,i+1), (i,i+2)
194- local_mask = ~ np .eye (n_atoms , dtype = bool )
195- local_mask *= ~ np .diag (np .ones (n_atoms - 1 , dtype = bool ), k = - 1 )
196- local_mask *= ~ np .diag (np .ones (n_atoms - 2 , dtype = bool ), k = - 2 )
206+ local_mask = ~ np .eye (n_residues , dtype = bool )
207+ local_mask *= ~ np .diag (np .ones (n_residues - 1 , dtype = bool ), k = - 1 )
208+ local_mask *= ~ np .diag (np .ones (n_residues - 2 , dtype = bool ), k = - 2 )
209+ # mask for donor H absence (Proline)
210+ donor_mask = (
211+ np .array (donor_mask ).astype (float )
212+ if donor_mask is not None
213+ else np .ones (n_residues , dtype = float )
214+ )
215+ donor_mask = np .tile (donor_mask [:, np .newaxis ], (1 , n_residues ))
197216 # hydrogen bond map (continuous value extension of original definition)
198217 hbond_map = np .clip (cutoff - margin - e , a_min = - margin , a_max = margin )
199218 hbond_map = (np .sin (hbond_map / margin * np .pi / 2 ) + 1.0 ) / 2
200- hbond_map = hbond_map * local_mask
219+
220+ assert hbond_map .shape == local_mask .shape == donor_mask .shape
221+
222+ hbond_map *= local_mask
223+ hbond_map *= donor_mask
201224
202225 return hbond_map
203226
204227
205- def assign (coord : np .ndarray ) -> np .ndarray :
228+ def assign (
229+ coord : np .ndarray ,
230+ donor_mask : np .ndarray = None ,
231+ ) -> np .ndarray :
206232 """Assigns secondary structure for a given coordinate array,
207233 either with or without assigned hydrogens
208234
@@ -214,6 +240,12 @@ def assign(coord: np.ndarray) -> np.ndarray:
214240 (N, CA, C, O) atoms coordinates (if k=4), or (N, CA, C, O, H) coordinates
215241 (when k=5).
216242
243+ donor_mask : np.array
244+ Mask out any hydrogens that should not be considered (in particular HN
245+ in PRO). If ``None`` then all H will be used (behavior up to 2.9.0).
246+
247+ .. versionadded:: 2.10.0
248+
217249 Returns
218250 -------
219251 np.ndarray
@@ -222,9 +254,13 @@ def assign(coord: np.ndarray) -> np.ndarray:
222254
223255
224256 .. versionadded:: 2.8.0
257+
258+ .. versionchanged:: 2.10.0
259+ Support masking of donors.
260+
225261 """
226262 # get hydrogen bond map
227- hbmap = get_hbond_map (coord )
263+ hbmap = get_hbond_map (coord , donor_mask = donor_mask )
228264 hbmap = np .swapaxes (hbmap , - 1 , - 2 ) # convert into "i:C=O, j:N-H" form
229265
230266 # identify turn 3, 4, 5
0 commit comments