Implement exact tps baseline

plainerman · plainerman · commit 673b6d9c95c5 · 2024-08-06T21:07:40.000+02:00
diff --git a/tps/second_order.py b/tps/second_order.py
@@ -2,6 +2,8 @@
 import jax.numpy as jnp
 from tqdm import tqdm
 
+from utils.plot import human_format
+
 MAX_STEPS = 2_000
 MAX_ABS_VALUE = 5
 
@@ -153,7 +155,8 @@ def two_way_shooting(system, trajectory, _previous_velocities, fixed_length, _dt
     return False, new_trajectory, new_velocities
 
 
-def mcmc_shooting(system, proposal, initial_trajectory, num_paths, dt, key, fixed_length=0, warmup=50, stored=None):
+def mcmc_shooting(system, proposal, initial_trajectory, num_paths, dt, key, fixed_length=0, warmup=50, stored=None,
+                  max_force_evaluations=10 ** 10):
     # pick an initial trajectory
     trajectories = [initial_trajectory]
     velocities = []
@@ -177,6 +180,7 @@ def mcmc_shooting(system, proposal, initial_trajectory, num_paths, dt, key, fixe
     num_tries = 0
     num_force_evaluations = 0
     num_metropolis_rejected = 0
+    total_num_force_evaluations = sum(statistics['num_force_evaluations'])
     try:
         with tqdm(total=num_paths + warmup, initial=len(trajectories) - 1,
                   desc='warming up' if warmup > 0 else '') as pbar:
@@ -197,6 +201,9 @@ def mcmc_shooting(system, proposal, initial_trajectory, num_paths, dt, key, fixe
                                                                      trajectories) > 1 else None,
                                                                  fixed_length, dt, ikey)
                 num_force_evaluations += len(new_trajectory) - 1
+                total_num_force_evaluations += len(new_trajectory) - 1
+
+                pbar.set_postfix({'total_force_evaluations': human_format(total_num_force_evaluations)})
 
                 if not found:
                     continue
@@ -218,6 +225,10 @@ def mcmc_shooting(system, proposal, initial_trajectory, num_paths, dt, key, fixe
                     pbar.update(1)
                 else:
                     num_metropolis_rejected += 1
+
+                if total_num_force_evaluations > max_force_evaluations:
+                    print('Max force evaluations reached, stopping early')
+                    break
     except KeyboardInterrupt:
         print('SIGINT received, stopping early')
         # Fix in case we stop when adding a trajectory
diff --git a/tps_baseline.py b/tps_baseline.py
@@ -20,14 +20,14 @@
 
 from utils.angles import phi_psi_from_mdtraj
 from utils.animation import save_trajectory, to_md_traj
-from utils.plot import show_or_save_fig
+from utils.plot import show_or_save_fig, human_format
 from utils.rmsd import kabsch_align, kabsch_rmsd
 
 from argparse import ArgumentParser
 
 parser = ArgumentParser()
 parser.add_argument('--mechanism', type=str, choices=['one-way-shooting', 'two-way-shooting'], required=True)
-parser.add_argument('--states', type=str, default='phi-psi', choices=['phi-psi', 'rmsd'])
+parser.add_argument('--states', type=str, default='phi-psi', choices=['phi-psi', 'rmsd', 'exact'])
 parser.add_argument('--fixed_length', type=int, default=0)
 parser.add_argument('--warmup', type=int, default=0)
 parser.add_argument('--num_paths', type=int, required=True)
@@ -39,23 +39,6 @@
                     help='Ensure that the initial path connects A with B by prepending A and appending B.')
 
 
-def human_format(num):
-    """https://stackoverflow.com/a/45846841/4417954"""
-    num = float('{:.3g}'.format(num))
-    if num >= 1:
-        magnitude = 0
-        while abs(num) >= 1000:
-            magnitude += 1
-            num /= 1000.0
-        return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'K', 'M', 'B', 'T'][magnitude])
-    else:
-        magnitude = 0
-        while abs(num) < 1:
-            magnitude += 1
-            num *= 1000.0
-        return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'm', 'µ', 'n', 'p', 'f'][magnitude])
-
-
 dt_as_unit = unit.Quantity(value=1, unit=unit.femtosecond)
 dt_in_ps = dt_as_unit.value_in_unit(unit.picosecond)
 dt = dt_as_unit.value_in_unit(unit.second)
@@ -101,6 +84,8 @@ def step_n(step, _x, _v, n, _key):
         savedir += f'-{args.fixed_length}steps'
     if args.states == 'rmsd':
         savedir += '-rmsd'
+    elif args.states == 'exact':
+        savedir += '-exact'
 
     os.makedirs(savedir, exist_ok=True)
 
@@ -118,6 +103,7 @@ def U_padded(x):
         x = x_empty.at[:x.shape[0], :].set(x.reshape(-1, 66))
         return system.U(x)[:orig_length]
 
+
     @jax.jit
     def step(_x, _key):
         """Perform one step of forward euler"""
@@ -197,6 +183,19 @@ def langevin_log_path_likelihood(path_and_velocities):
         state_B = jax.jit(
             lambda s: is_within(phis_psis(s.reshape(-1, 22, 3)).reshape(-1, 2), phis_psis(system.B.reshape(-1, 22, 3)),
                                 radius))
+    elif args.states == 'exact':
+        from scipy.stats import chi2
+        percentile = 0.99
+        noise_scale = 1e-4
+        threshold = jnp.sqrt(chi2.ppf(percentile, system.A.shape[0]) * noise_scale)
+        print(threshold)
+        def kabsch_l2(A, B):
+            a, b = kabsch_align(A, B)
+
+            return jnp.linalg.norm(a - b)
+
+        state_A = jax.jit(jax.vmap(lambda s: kabsch_l2(system.A.reshape(22, 3), s.reshape(22, 3)) <= threshold))
+        state_B = jax.jit(jax.vmap(lambda s: kabsch_l2(system.B.reshape(22, 3), s.reshape(22, 3)) <= threshold))
     else:
         raise ValueError(f"Unknown states {args.states}")
 
@@ -216,9 +215,10 @@ def langevin_log_path_likelihood(path_and_velocities):
     save_trajectory(system.mdtraj_topology, jnp.array(initial_trajectory), f'{savedir}/initial_trajectory.pdb')
 
     if args.resume:
-        paths = [[x for x in p.astype(np.float32)] for p in np.load(f'{savedir}/paths.npy', allow_pickle=True)]
+        print('Loading stored data.')
+        paths = [[x for x in p.astype(np.float32)] for p in tqdm(np.load(f'{savedir}/paths.npy', allow_pickle=True))]
         velocities = [[v for v in p.astype(np.float32)] for p in
-                      np.load(f'{savedir}/velocities.npy', allow_pickle=True)]
+                      tqdm(np.load(f'{savedir}/velocities.npy', allow_pickle=True))]
         with open(f'{savedir}/stats.json', 'r') as fp:
             statistics = json.load(fp)
 
@@ -227,6 +227,8 @@ def langevin_log_path_likelihood(path_and_velocities):
             'velocities': velocities,
             'statistics': statistics
         }
+
+        print('Loaded', len(paths), 'paths.')
     else:
         if os.path.exists(f'{savedir}/paths.npy') and not args.override:
             print(f"The target directory is not empty.\n"
@@ -235,8 +237,8 @@ def langevin_log_path_likelihood(path_and_velocities):
 
         stored = None
 
-    assert ((tps_config.start_state(system.A) and tps_config.target_state(system.B))
-            or (tps_config.start_state(system.B) and tps_config.target_state(system.A))), \
+    assert ((tps_config.start_state(system.A.reshape(1, -1)) and tps_config.target_state(system.B.reshape(1, -1)))
+            or (tps_config.start_state(system.B.reshape(1, -1)) and tps_config.target_state(system.A.reshape(1, -1)))), \
         'A and B are not in the correct states. Please check your settings.'
 
     if args.mechanism == 'one-way-shooting':
@@ -258,14 +260,19 @@ def langevin_log_path_likelihood(path_and_velocities):
                                                            fixed_length=args.fixed_length,
                                                            stored=stored)
         # paths = tps2.unguided_md(tps_config, B, 1, key)
-        paths = [jnp.array(p) for p in paths]
-        velocities = [jnp.array(p) for p in velocities]
-        # store paths
-        np.save(f'{savedir}/paths.npy', np.array(paths, dtype=object), allow_pickle=True)
-        np.save(f'{savedir}/velocities.npy', np.array(velocities, dtype=object), allow_pickle=True)
-        # save statistics, which is a dictionary
-        with open(f'{savedir}/stats.json', 'w') as fp:
-            json.dump(statistics, fp)
+        print('Converting paths to jax.numpy arrays.')
+        paths = [jnp.array(p) for p in tqdm(paths)]
+        velocities = [jnp.array(p) for p in tqdm(velocities)]
+
+        if not args.resume:
+            # If we are resuming, everything is already stored
+            print('Storing paths ...')
+            np.save(f'{savedir}/paths.npy', np.array(paths, dtype=object), allow_pickle=True)
+            print('Storing velocities ...')
+            np.save(f'{savedir}/velocities.npy', np.array(velocities, dtype=object), allow_pickle=True)
+            # save statistics, which is a dictionary
+            with open(f'{savedir}/stats.json', 'w') as fp:
+                json.dump(statistics, fp)
     except Exception as e:
         print(traceback.format_exc())
         breakpoint()
@@ -280,8 +287,11 @@ def langevin_log_path_likelihood(path_and_velocities):
     if args.fixed_length == 0:
         print([len(p) for p in paths])
         plt.hist([len(p) for p in paths], bins=jnp.sqrt(len(paths)).astype(int).item())
-        plt.savefig(f'{savedir}/lengths.png', bbox_inches='tight')
-        plt.show()
+        show_or_save_fig(savedir, 'lengths', 'png')
+
+    max_energy = [jnp.max(U_padded(path)) for path in tqdm(paths)]
+    max_energy = np.array(max_energy)
+    np.save(f'{savedir}/max_energy.npy', max_energy)
 
     plt.title(f"{human_format(len(paths))} paths @ {temp} K, dt = {human_format(dt)}s")
     system.plot(trajectories=paths, alpha=0.7)
diff --git a/utils/plot.py b/utils/plot.py
@@ -12,6 +12,23 @@
 from flax.training.train_state import TrainState
 
 
+def human_format(num):
+    """https://stackoverflow.com/a/45846841/4417954"""
+    num = float('{:.3g}'.format(num))
+    if num >= 1:
+        magnitude = 0
+        while abs(num) >= 1000:
+            magnitude += 1
+            num /= 1000.0
+        return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'K', 'M', 'B', 'T'][magnitude])
+    else:
+        magnitude = 0
+        while abs(num) < 1:
+            magnitude += 1
+            num *= 1000.0
+        return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'm', 'µ', 'n', 'p', 'f'][magnitude])
+
+
 def log_scale(log_plot: bool, x: bool, y: bool):
     if log_plot:
         if x: