Skip to content
This repository has been archived by the owner on Sep 11, 2023. It is now read-only.

Commit

Permalink
Merge pull request #1252 from clonker/patches
Browse files Browse the repository at this point in the history
[coordinates] Disable random access optimization in patches
  • Loading branch information
marscher authored Feb 17, 2018
2 parents 78dae90 + 317dbce commit 62bab56
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 12 deletions.
4 changes: 2 additions & 2 deletions doc/source/CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
Changelog
=========

2.5.1 (02-14-2018)
2.5.1 (02-17-2018)
------------------

Quick fix release to repair chunking in the coordinates package.

**Fixes**:

- msm: fix bug in ImpliedTimescales, which happened when an estimation failed for a given lag time. #1248
- coordinates: fixed handling of default chunksize. #1247, #1251
- coordinates: fixed handling of default chunksize. #1247, #1251, #1252
- base: updated pybind to 2.2.2. #1249


Expand Down
75 changes: 72 additions & 3 deletions pyemma/coordinates/tests/test_coordinates_iterator.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import tempfile
import unittest
import shutil
import os
from glob import glob
import numpy as np

from pyemma.coordinates.data import DataInMemory
from pyemma.util.contexts import settings
from pyemma.util.files import TemporaryDirectory
import os
from glob import glob



class TestCoordinatesIterator(unittest.TestCase):
Expand All @@ -15,6 +16,12 @@ class TestCoordinatesIterator(unittest.TestCase):
def setUpClass(cls):
cls.d = [np.random.random((100, 3)) for _ in range(3)]

def setUp(self):
self.tempdir = tempfile.mktemp()

def tearDown(self):
shutil.rmtree(self.tempdir, ignore_errors=True)

def test_current_trajindex(self):
r = DataInMemory(self.d)
expected_itraj = 0
Expand Down Expand Up @@ -273,5 +280,67 @@ def test_invalid_data_in_input_inf(self):
for itraj, X in it:
pass

def test_lagged_iterator(self):
import pyemma.coordinates as coor
from pyemma.coordinates.tests.util import create_traj, get_top

trajectory_length = 4720
lagtime = 1000
n_trajs = 15

top = get_top()
trajs_data = [create_traj(top=top, length=trajectory_length) for _ in range(n_trajs)]
trajs = [t[0] for t in trajs_data]
xyzs = [t[1].reshape(-1, 9) for t in trajs_data]

reader = coor.source(trajs, top=top, chunksize=5000)

for chunk in [None, 0, trajectory_length, trajectory_length+1, trajectory_length+1000]:
it = reader.iterator(lag=lagtime, chunk=chunk, return_trajindex=True)
with it:
for itraj, X, Y in it:
np.testing.assert_equal(X.shape, Y.shape)
np.testing.assert_equal(X.shape[0], trajectory_length - lagtime)
np.testing.assert_array_almost_equal(X, xyzs[itraj][:trajectory_length-lagtime])
np.testing.assert_array_almost_equal(Y, xyzs[itraj][lagtime:])

def test_lagged_iterator_optimized(self):
import pyemma.coordinates as coor
from pyemma.coordinates.tests.util import create_traj, get_top
from pyemma.coordinates.util.patches import iterload

trajectory_length = 4720
lagtime = 20
n_trajs = 15
stride = iterload.MAX_STRIDE_SWITCH_TO_RA + 1

top = get_top()
trajs_data = [create_traj(top=top, length=trajectory_length) for _ in range(n_trajs)]
trajs = [t[0] for t in trajs_data]
xyzs = [t[1].reshape(-1, 9)[::stride] for t in trajs_data]
xyzs_lagged = [t[1].reshape(-1, 9)[lagtime::stride] for t in trajs_data]

reader = coor.source(trajs, stride=stride, top=top, chunksize=5000)

memory_cutoff = iterload.MEMORY_CUTOFF
try:
iterload.MEMORY_CUTOFF = 8
it = reader.iterator(stride=stride, lag=lagtime, chunk=5000, return_trajindex=True)
with it:
curr_itraj = 0
t = 0
for itraj, X, Y in it:
if itraj != curr_itraj:
curr_itraj = itraj
t = 0
np.testing.assert_equal(X.shape, Y.shape)
l = len(X)
np.testing.assert_array_almost_equal(X, xyzs[itraj][t:t+l])
np.testing.assert_array_almost_equal(Y, xyzs_lagged[itraj][t:t+l])
t += l
finally:
iterload.MEMORY_CUTOFF = memory_cutoff


if __name__ == '__main__':
unittest.main()
11 changes: 6 additions & 5 deletions pyemma/coordinates/tests/test_random_access_stride.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,8 @@ def test_fragmented_reader_random_access1(self):

def test_RA_high_stride(self):
""" ensure we use a random access pattern for high strides chunksize combinations to avoid memory issues."""
from pyemma.coordinates.util.patches import iterload

n=int(1e5)
n_bytes = 3*3*8*n # ~8Mb
savable_formats_mdtra_18 = (
Expand All @@ -475,24 +477,23 @@ def test_RA_high_stride(self):
r = coor.source(traj, top=get_top())
it = r.iterator(stride=1000, chunk=100000)
next(it)
assert it._mditer.is_ra_iter
assert iterload._DEACTIVATE_RANDOM_ACCESS_OPTIMIZATION or it._mditer.is_ra_iter

out_ra = r.get_output(stride=1000, chunk=10000)
it = r.iterator(stride=1)
next(it)
assert not it._mditer.is_ra_iter
assert iterload._DEACTIVATE_RANDOM_ACCESS_OPTIMIZATION or not it._mditer.is_ra_iter
out = r.get_output(stride=1000)
np.testing.assert_equal(out_ra, out)

# check max stride exceeding
from pyemma.coordinates.util.patches import iterload
it = r.iterator(stride=iterload.MAX_STRIDE_SWITCH_TO_RA+1)
next(it)
assert it._mditer.is_ra_iter
assert iterload._DEACTIVATE_RANDOM_ACCESS_OPTIMIZATION or it._mditer.is_ra_iter

it = r.iterator(stride=iterload.MAX_STRIDE_SWITCH_TO_RA)
next(it)
assert not it._mditer.is_ra_iter
assert iterload._DEACTIVATE_RANDOM_ACCESS_OPTIMIZATION or not it._mditer.is_ra_iter

if __name__ == '__main__':
unittest.main()
7 changes: 5 additions & 2 deletions pyemma/coordinates/util/patches.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ class iterload(object):
MEMORY_CUTOFF = int(128 * 1024**2) # 128 MB
MAX_STRIDE_SWITCH_TO_RA = 20

_DEACTIVATE_RANDOM_ACCESS_OPTIMIZATION = True

def __init__(self, filename, trajlen, chunk=1000, **kwargs):
"""An iterator over a trajectory from one or more files on disk, in fragments
Expand Down Expand Up @@ -148,9 +150,10 @@ def __init__(self, filename, trajlen, chunk=1000, **kwargs):
else:
n_atoms = self._topology.n_atoms

if (self.is_ra_iter or
# temporarily(?) disable RA mode, test_lagged_iterator_optimized fails otherwise
if self.is_ra_iter or (not self._DEACTIVATE_RANDOM_ACCESS_OPTIMIZATION and (self.is_ra_iter or
self._stride > iterload.MAX_STRIDE_SWITCH_TO_RA or
(8 * self._chunksize * self._stride * n_atoms > iterload.MEMORY_CUTOFF)):
(8 * self._chunksize * self._stride * n_atoms > iterload.MEMORY_CUTOFF))):
self._mode = 'random_access'
self._f = (lambda x:
md_open(x, n_atoms=self._topology.n_atoms)
Expand Down

0 comments on commit 62bab56

Please sign in to comment.