diff --git a/docs/faq.rst b/docs/faq.rst index 3badf038..d62eb1b1 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -27,10 +27,10 @@ imaged planets. A detailed description of how τ is related to other quantities Our goal with the default prior is to have an isotropic distribution of the orbital plane. To obtain this, we use inclination and position angle of the ascending node (PAN) to -define the orbital plane. They actually coorespond to the two angles in a spherical coordinate system: -inclinaion is the polar angle and PAN is the azimuthal angle. Becuase of this choice of coordinates, +define the orbital plane. They actually correspond to the two angles in a spherical coordinate system: +inclination is the polar angle and PAN is the azimuthal angle. Because of this choice of coordinates, there are less orbital configurations near the poles (when inclination is near 0 or 180), so we use -a sine prior to downweigh those areas to give us an isotropic distribution. +a sine prior to downweight those areas to give us an isotropic distribution. A more detailed discussion of this is here: .. toctree:: diff --git a/orbitize/driver.py b/orbitize/driver.py index f92b2cdf..ebcbdd01 100644 --- a/orbitize/driver.py +++ b/orbitize/driver.py @@ -16,7 +16,9 @@ class Driver(object): input_data: Either a relative path to data file or astropy.table.Table object in the orbitize format. See ``orbitize.read_input`` sampler_str (str): algorithm to use for orbit computation. "MCMC" for - Markov Chain Monte Carlo, "OFTI" for Orbits for the Impatient + Markov Chain Monte Carlo, "OFTI" for Orbits for the Impatient, + "NestedSampler" for nested sampling with Dynesty, or "MultiNest" + for nested sampling with (Py)MultiNest. num_secondary_bodies (int): number of secondary bodies in the system. Should be at least 1. stellar_or_system_mass (float): mass of the primary star (if fitting for diff --git a/orbitize/results.py b/orbitize/results.py index cecacaff..12500e24 100644 --- a/orbitize/results.py +++ b/orbitize/results.py @@ -45,6 +45,8 @@ def __init__( self.lnlike = lnlike self.curr_pos = curr_pos self.version_number = version_number + self.ln_evidence = None + self.ln_evidence_err = None if self.system is not None: self.tau_ref_epoch = self.system.tau_ref_epoch @@ -114,6 +116,12 @@ def save_results(self, filename): hf.attrs['sampler_name'] = self.sampler_name hf.attrs['version_number'] = self.version_number + if self.ln_evidence is not None: + hf.attrs['ln_evidence'] = self.ln_evidence + + if self.ln_evidence_err is not None: + hf.attrs['ln_evidence_err'] = self.ln_evidence_err + # Now add post and lnlike from the results object as datasets hf.create_dataset('post', data=self.post) # hf.create_dataset('data', data=self.data) @@ -147,19 +155,18 @@ def load_results(self, filename, append=False): hf = h5py.File(filename, 'r') # Opens file for reading # Load up each dataset from hdf5 file sampler_name = str(hf.attrs['sampler_name']) - try: + if 'version_number' in hf.attrs: version_number = str(hf.attrs['version_number']) - except KeyError: + else: version_number = "<= 1.13" post = np.array(hf.get('post')) lnlike = np.array(hf.get('lnlike')) - - try: + if 'num_secondary_bodies' in hf.attrs: num_secondary_bodies = int(hf.attrs['num_secondary_bodies']) - except KeyError: + else: # old, has to be single planet fit - num_secondary_bodies = 1 + num_secondary_bodies = 1 try: data_table = table.Table(np.array(hf.get('data'))) diff --git a/orbitize/sampler.py b/orbitize/sampler.py index 5ee50f21..5fdfba0c 100644 --- a/orbitize/sampler.py +++ b/orbitize/sampler.py @@ -3,6 +3,7 @@ import astropy.constants as consts import abc import time +import warnings from astropy.time import Time import dynesty @@ -1314,13 +1315,35 @@ def check_prior_support(self): class NestedSampler(Sampler): """ - Implements nested sampling using Dynesty package. + Implements nested sampling using the Dynesty package. + + Args: + system (system.System): system.System object + chi2_type (str, optional): either "standard", or "log" + like (str): name of likelihood function in ``lnlike.py`` + custom_lnlike (func): ability to include an addition custom likelihood + function in the fit. The function looks like + ``clnlikes = custon_lnlike(params)`` where ``params`` is a RxM array + of fitting parameters, where R is the number of orbital paramters + (can be passed in system.compute_model()), and M is the number of + orbits we need model predictions for. It returns ``clnlikes`` + which is an array of length M, or it can be a single float if M = 1. Thea McKenna, Sarah Blunt, & Lea Hirsch 2024 """ - def __init__(self, system): - super(NestedSampler, self).__init__(system) + def __init__(self, + system, + chi2_type="standard", + like="chi2_lnlike", + custom_lnlike=None, + ): + super(NestedSampler, self).__init__( + system, + like=like, + chi2_type=chi2_type, + custom_lnlike=custom_lnlike, + ) # create an empty results object self.results = orbitize.results.Results( @@ -1346,14 +1369,16 @@ def ptform(self, u): """ utform = np.zeros(len(u)) for i in range(len(u)): - try: + if hasattr(self.system.sys_priors[i], 'transform_samples'): utform[i] = self.system.sys_priors[i].transform_samples(u[i]) - except AttributeError: # prior is a fixed number + else: + # prior is a fixed number utform[i] = self.system.sys_priors[i] return utform def run_sampler( self, + nlive=500, static=False, bound="multi", pfrac=1.0, @@ -1364,6 +1389,11 @@ def run_sampler( """Runs the nested sampler from the Dynesty package. Args: + nlive (int): Number of live points. A larger numbers results + in a more finely sampled posterior and a more accurate + evidence, but also a larger number of iterations is + required to converge (default: 500). The value is only + used by the static sampler (i.e. with static=True). static (bool): True if using static nested sampling, False if using dynamic. bound (str): Method used to approximately bound the prior @@ -1391,10 +1421,9 @@ def run_sampler( """ mp.set_start_method(start_method, force=True) - if static and pfrac != 1.0: - raise ValueError( - """The static nested sampler does not take alternate values for pfrac.""" - ) + + if not static: + run_nested_kwargs['wt_kwargs'] = {"pfrac": pfrac} if num_threads > 1: with dynesty.pool.Pool(num_threads, self._logl, self.ptform) as pool: @@ -1406,8 +1435,8 @@ def run_sampler( pool=pool, bound=bound, bootstrap=False, + nlive=nlive, ) - self.dynesty_sampler.run_nested(**run_nested_kwargs) else: self.dynesty_sampler = dynesty.DynamicNestedSampler( pool.loglike, @@ -1416,10 +1445,10 @@ def run_sampler( pool=pool, bound=bound, bootstrap=False, + nlive=None, ) - self.dynesty_sampler.run_nested( - wt_kwargs={"pfrac": pfrac}, **run_nested_kwargs - ) + self.dynesty_sampler.run_nested(**run_nested_kwargs) + else: if static: self.dynesty_sampler = dynesty.NestedSampler( @@ -1427,23 +1456,223 @@ def run_sampler( self.ptform, len(self.system.sys_priors), bound=bound, + nlive=nlive, ) - self.dynesty_sampler.run_nested(**run_nested_kwargs) else: self.dynesty_sampler = dynesty.DynamicNestedSampler( self._logl, self.ptform, len(self.system.sys_priors), bound=bound, + nlive=None, ) - self.dynesty_sampler.run_nested( - wt_kwargs={"pfrac": pfrac}, **run_nested_kwargs - ) + self.dynesty_sampler.run_nested(**run_nested_kwargs) self.results.add_samples( self.dynesty_sampler.results["samples"], self.dynesty_sampler.results["logl"], ) - num_iter = self.dynesty_sampler.results["niter"] - return self.dynesty_sampler.results["samples"], num_iter + self.results.ln_evidence = self.dynesty_sampler.results["logz"][-1] + self.results.ln_evidence_err = self.dynesty_sampler.results["logzerr"][-1] + + return self.dynesty_sampler.results["samples"], self.dynesty_sampler.results["niter"] + + +class MultiNest(Sampler): + """ + Implements nested sampling using the (Py)MultiNest package. In order + to use this sampler, MultiNest should be `manually compiled + `_. + The sampler supports multiprocessing with MPI, which requires the + installation of mpi4py (e.g. as "pip install mpi4py"). + + Args: + system (system.System): system.System object + chi2_type (str, optional): either "standard", or "log" + like (str): name of likelihood function in ``lnlike.py`` + custom_lnlike (func): ability to include an addition custom likelihood + function in the fit. The function looks like + ``clnlikes = custon_lnlike(params)`` where ``params`` is a RxM array + of fitting parameters, where R is the number of orbital paramters + (can be passed in system.compute_model()), and M is the number of + orbits we need model predictions for. It returns ``clnlikes`` + which is an array of length M, or it can be a single float if M = 1. + + Tomas Stolker 2024 + """ + + def __init__(self, + system, + chi2_type="standard", + like="chi2_lnlike", + custom_lnlike=None, + ): + super(MultiNest, self).__init__( + system, + like=like, + chi2_type=chi2_type, + custom_lnlike=custom_lnlike, + ) + + # create an empty results object + self.results = orbitize.results.Results( + self.system, + sampler_name=self.__class__.__name__, + post=None, + lnlike=None, + version_number=orbitize.__version__, + ) + + def run_sampler( + self, + n_live_points=500, + output_basename='./multinest', + hdf5_file=None, + multinest_kwargs=None, + ): + """Runs the nested sampler from the (Py)MultiNest package. + + Args: + n_live_points (int): Number of live points. A larger numbers results + in a more finely sampled posterior and a more accurate + evidence, but also a larger number of iterations is + required to converge (default: 500). + output_basename (str): Basename for the MultiNest output. + Can be a folder and/or prefix for the filenames. Any + (sub)folder should first be manually created. + hdf5_file (str): HDF5 filename in which the results are stored. + Setting the argument will store the results by calling the + save_results method of the Results objects. This parameter + was implemented because calling save_results separately + after the sampling has finished, may cause an error when + using MPI because only one process should write the results. + The results are not stored if the argument is set to None. + multinest_kwargs (dict): dictionary of keywords that will be + passed to pymultinest.run(). + + Returns: + numpy.array of float: posterior samples + """ + + # Import here because it will otherwise give a warning + # if the compiled MultiNest library is not found + # when importing orbitize + import pymultinest + + # Number of parameters to fit + n_parameters = len(self.system.labels) + + # Create empty dictionary if needed + + if multinest_kwargs is None: + multinest_kwargs = {} + + # Add the resume parameter + + if "resume" not in multinest_kwargs: + multinest_kwargs["resume"] = False + + # Check multinest_kwargs keywords + + if "n_live_points" in multinest_kwargs: + warnings.warn( + "Please specify the number of live points " + "as argument of 'n_live_points' instead " + "of using 'multinest_kwargs'." + ) + + del multinest_kwargs["n_live_points"] + + if "outputfiles_basename" in multinest_kwargs: + warnings.warn( + "Please use the 'output_basename' " + "parameter instead of setting the " + "value of 'outputfiles_basename' " + "in 'multinest_kwargs'." + ) + + del multinest_kwargs["outputfiles_basename"] + + def _logprior_multinest(param_cube, n_dim, n_param): + """ + Parameters + ---------- + param_cube : LP_c_double + Unit cube. + n_dim : int + Number of dimensions. This parameter is mandatory. + n_param : int + Number of parameters. This parameter is mandatory. + + Returns + ------- + LP_c_double + Parameter cube. + """ + + for i in range(n_param): + if hasattr(self.system.sys_priors[i], 'transform_samples'): + param_cube[i] = self.system.sys_priors[i].transform_samples(param_cube[i]) + else: + # The prior is a fixed number + param_cube[i] = self.system.sys_priors[i] + + return param_cube + + def _loglike_multinest(param_cube, n_dim, n_param): + """ + Parameters + ---------- + param_cube : LP_c_double + Parameter cube. + n_dim : int + Number of dimensions. This parameter is mandatory. + n_param : int + Number of parameters. This parameter is mandatory. + + Returns + ------- + float + Log-likelihood. + """ + + # Convert LP_c_double to np.float64 + param_array = np.zeros(n_param) + for i in range(n_param): + param_array[i] = param_cube[i] + + return self._logl(param_array) + + pymultinest.run( + _loglike_multinest, + _logprior_multinest, + n_parameters, + outputfiles_basename=output_basename, + n_live_points=n_live_points, + **multinest_kwargs, + ) + + analyzer = pymultinest.analyse.Analyzer( + n_parameters, outputfiles_basename=output_basename) + + sampling_stats = analyzer.get_stats() + post_samples = analyzer.get_equal_weighted_posterior() + + # The log-likelihood is stored in the last column + self.results.add_samples(post_samples[:, :-1], post_samples[:, -1]) + self.results.ln_evidence = sampling_stats["nested sampling global log-evidence"] + self.results.ln_evidence_err = sampling_stats["nested sampling global log-evidence error"] + + # Get the MPI rank of the process + try: + from mpi4py import MPI + mpi_rank = MPI.COMM_WORLD.Get_rank() + except ModuleNotFoundError: + mpi_rank = 0 + + if hdf5_file is not None and mpi_rank == 0: + # Only a single process should write to the HDF5 file + self.results.save_results(filename=hdf5_file) + + return post_samples[:, :-1] diff --git a/requirements.txt b/requirements.txt index ddc47100..3af429eb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,3 +16,4 @@ sphinx docutils<0.17 rebound dynesty +pymultinest diff --git a/tests/test_nested_sampler.py b/tests/test_nested_sampler.py index a6130c35..a00fb8ca 100644 --- a/tests/test_nested_sampler.py +++ b/tests/test_nested_sampler.py @@ -1,5 +1,5 @@ """ -Tests the NestedSampler class by fixing all parameters except for eccentricity. +Tests the NestedSampler and MultiNest classes by fixing all parameters except for eccentricity. """ from orbitize import system, sampler @@ -40,25 +40,56 @@ def test_nested_sampler(): # run both static & dynamic nested samplers mysampler = sampler.NestedSampler(sys) - _ = mysampler.run_sampler(bound="multi", pfrac=0.95, static=False, num_threads=8) + _ = mysampler.run_sampler(bound="multi", pfrac=0.95, static=False, num_threads=8, run_nested_kwargs={}) print("Finished first run!") dynamic_eccentricities = mysampler.results.post[:, lab["ecc1"]] assert np.median(dynamic_eccentricities) == pytest.approx(ecc, abs=0.1) - _ = mysampler.run_sampler(bound="multi", static=True, num_threads=8) + _ = mysampler.run_sampler(bound="multi", static=True, num_threads=8, run_nested_kwargs={}) print("Finished second run!") static_eccentricities = mysampler.results.post[:, lab["ecc1"]] assert np.median(static_eccentricities) == pytest.approx(ecc, abs=0.1) - # check that the static sampler raises an error when user tries to set pfrac - # for static sampler - try: - mysampler.run_sampler(pfrac=0.1, static=True) - except ValueError: - pass + +def test_multinest(): + # generate data + mtot = 1.2 # total system mass [M_sol] + plx = 60.0 # parallax [mas] + orbit_frac = 95 + data_table, sma = generate_synthetic_data( + orbit_frac, + mtot, + plx, + num_obs=30, + ) + + # assumed ecc value + ecc = 0.5 + + # initialize orbitize `System` object + sys = system.System(1, data_table, mtot, plx) + lab = sys.param_idx + + ecc = 0.5 # eccentricity + + # set all parameters except eccentricity to fixed values (same as used to generate data) + sys.sys_priors[lab["inc1"]] = np.pi / 4 + sys.sys_priors[lab["sma1"]] = sma + sys.sys_priors[lab["aop1"]] = np.pi / 4 + sys.sys_priors[lab["pan1"]] = np.pi / 4 + sys.sys_priors[lab["tau1"]] = 0.8 + sys.sys_priors[lab["plx"]] = plx + sys.sys_priors[lab["mtot"]] = mtot + + # running the actual sampler is not possible without compiling MultiNest + mysampler = sampler.MultiNest(sys) + assert hasattr(mysampler, "run_sampler") + assert hasattr(mysampler, "results") + assert hasattr(mysampler, "system") if __name__ == "__main__": test_nested_sampler() + test_multinest()