Added support for nested sampling with Dynesty in FitModel

tomasstolker · Feb 16, 2024 · 404d9ef · 404d9ef
1 parent 3f07949
commit 404d9ef
Show file tree

Hide file tree

Showing 4 changed files with 447 additions and 64 deletions.
diff --git a/docs/tutorials/fitting_model_spectra.ipynb b/docs/tutorials/fitting_model_spectra.ipynb
@@ -89,7 +89,6 @@
       "Configuration settings:\n",
       "   - Database: /Users/tomasstolker/applications/species/docs/tutorials/species_database.hdf5\n",
       "   - Data folder: /Users/tomasstolker/applications/species/docs/tutorials/data\n",
-      "   - Interpolation method: linear\n",
       "   - Magnitude of Vega: 0.03\n",
       "Creating species_database.hdf5... [DONE]\n",
       "Creating data folder... [DONE]\n",
@@ -101,7 +100,7 @@
     {
      "data": {
       "text/plain": [
-       "<species.core.species_init.SpeciesInit at 0x10ebfaa50>"
+       "<species.core.species_init.SpeciesInit at 0x197597ad0>"
       ]
      },
      "execution_count": 3,
@@ -128,7 +127,7 @@
     {
      "data": {
       "text/plain": [
-       "('betapicb_gpi_h.dat', <http.client.HTTPMessage at 0x194ca0fd0>)"
+       "('betapicb_gpi_h.dat', <http.client.HTTPMessage at 0x1972d68d0>)"
       ]
      },
      "execution_count": 4,
@@ -205,7 +204,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|████████████████████████████████████████| 240M/240M [00:00<00:00, 142GB/s]\n",
+      "100%|████████████████████████████████████████| 240M/240M [00:00<00:00, 442GB/s]\n",
       "SHA256 hash of downloaded file: ba71a5e4d3d399a6f8ae249590c2e174e90ec2b55e712d350dad8ca1ae83a907\n",
       "Use this value as the 'known_hash' argument of 'pooch.retrieve' to ensure that the file hasn't changed if it is downloaded again in the future.\n"
      ]
@@ -311,7 +310,7 @@
      "output_type": "stream",
      "text": [
       "Downloading data from 'https://archive.stsci.edu/hlsps/reference-atlases/cdbs/current_calspec/alpha_lyr_stis_011.fits' to file '/Users/tomasstolker/applications/species/docs/tutorials/data/alpha_lyr_stis_011.fits'.\n",
-      "100%|███████████████████████████████████████| 288k/288k [00:00<00:00, 92.5MB/s]"
+      "100%|████████████████████████████████████████| 288k/288k [00:00<00:00, 177MB/s]"
      ]
     },
     {
@@ -325,7 +324,9 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\n"
+      "\n",
+      "/Users/tomasstolker/applications/species/species/data/database.py:1355: UserWarning: Found 33 fluxes with NaN in the data of GPI_YJHK. Removing the spectral fluxes that contain a NaN.\n",
+      "  warnings.warn(\n"
      ]
     },
     {
@@ -396,18 +397,10 @@
       "      - Database tag: GRAVITY\n",
       "      - Filename: ./data/companion_data/BetaPictorisb_2018-09-22.fits\n",
       "      - Data shape: (237, 237)\n",
-      "   - Spectral resolution:\n",
+      "   - Resolution:\n",
       "      - GPI_YJHK: 40.0\n",
       "      - GRAVITY: 500.0\n"
      ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/tomasstolker/applications/species/species/data/database.py:1355: UserWarning: Found 33 fluxes with NaN in the data of GPI_YJHK. Removing the spectral fluxes that contain a NaN.\n",
-      "  warnings.warn(\n"
-     ]
     }
    ],
    "source": [
@@ -488,7 +481,7 @@
       "      - Database tag: GRAVITY\n",
       "      - Filename: BetaPictorisb_2018-09-22.fits\n",
       "      - Data shape: (237, 237)\n",
-      "   - Spectral resolution:\n",
+      "   - Resolution:\n",
       "      - GRAVITY: 500.0\n",
       "      - GPI_Y: 40.0\n",
       "      - GPI_J: 40.0\n",
@@ -633,7 +626,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We are now ready to sample the posterior distribution by either using [MultiNest](https://johannesbuchner.github.io/PyMultiNest/index.html) or [UltraNest](https://johannesbuchner.github.io/UltraNest/index.html) with the [run_multinest](https://species.readthedocs.io/en/latest/species.fit.html#species.fit.fit_model.FitModel.run_multinest) and [run_ultranest](https://species.readthedocs.io/en/latest/species.fit.html#species.fit.fit_model.FitModel.run_ultranest) methods of `FitModel`. Both are nested sampling algorithms which are powerful in sampling multi-modal distributions and will estimate the marginalized likelihood (i.e. *model evidence*), which enables pair-wise model comparison through the Bayes factor."
+    "We are now ready to sample the posterior distribution by either using [MultiNest](https://johannesbuchner.github.io/PyMultiNest/index.html), [UltraNest](https://johannesbuchner.github.io/UltraNest/index.html), or [Dynesty](https://dynesty.readthedocs.io/en/latest/index.html) with the [run_multinest](https://species.readthedocs.io/en/latest/species.fit.html#species.fit.fit_model.FitModel.run_multinest), [run_ultranest](https://species.readthedocs.io/en/latest/species.fit.html#species.fit.fit_model.FitModel.run_ultranest), and [run_dynesty](https://species.readthedocs.io/en/latest/species.fit.html#species.fit.fit_model.FitModel.run_dynesty) methods of `FitModel`. Both are nested sampling algorithms which are powerful in sampling multi-modal distributions and will estimate the marginalized likelihood (i.e. *model evidence*), which enables pair-wise model comparison through the Bayes factor."
    ]
   },
   {
@@ -647,7 +640,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "To speed up the computation, it is possible to run the nested sampling in parallel (e.g. with `mpirun`) to benefit from the multiprocessing support by `UltraNest` and `MultiNest`. In that case it is important that any functions of `species` that will write to the [Database](https://species.readthedocs.io/en/latest/species.data.html#species.data.database.Database) will be commented out since simultaneous writing to the HDF5 database by different processes is not possible. It is therefore recommended to first add all the required data to the database and then only run [SpeciesInit](https://species.readthedocs.io/en/latest/species.core.html#species.core.species_init.SpeciesInit), [FitModel](https://species.readthedocs.io/en/latest/species.fit.html#species.fit.fit_model.FitModel), and the sampler ([run_multinest](https://species.readthedocs.io/en/latest/species.fit.html#species.fit.fit_model.FitModel.run_multinest) or [run_ultranest](https://species.readthedocs.io/en/latest/species.fit.html#species.fit.fit_model.FitModel.run_ultranest)) in parallel with MPI."
+    "To speed up the computation, it is possible to run the nested sampling in parallel (e.g. with `mpirun`) to benefit from the multiprocessing support by `MultiNest`, `UltraNest`, and `Dynesty`. In that case it is important that any functions of `species` that will write to the [Database](https://species.readthedocs.io/en/latest/species.data.html#species.data.database.Database) will be commented out since simultaneous writing to the HDF5 database by different processes is not possible. It is therefore recommended to first add all the required data to the database and then only run [SpeciesInit](https://species.readthedocs.io/en/latest/species.core.html#species.core.species_init.SpeciesInit), [FitModel](https://species.readthedocs.io/en/latest/species.fit.html#species.fit.fit_model.FitModel), and the sampler ([run_multinest](https://species.readthedocs.io/en/latest/species.fit.html#species.fit.fit_model.FitModel.run_multinest), [run_ultranest](https://species.readthedocs.io/en/latest/species.fit.html#species.fit.fit_model.FitModel.run_ultranest), [run_dynesty](https://species.readthedocs.io/en/latest/species.fit.html#species.fit.fit_model.FitModel.run_dynesty)) in parallel with MPI."
    ]
   },
   {
@@ -664,6 +657,10 @@
       "Nested sampling with MultiNest\n",
       "------------------------------\n",
       "\n",
+      "Database tag: betapic\n",
+      "Number of live points: 500\n",
+      "Resume previous fit: False\n",
+      "Output folder: multinest/\n",
       " *****************************************************\n",
       " MultiNest v3.10\n",
       " Copyright Farhan Feroz & Mike Hobson\n",
@@ -672,21 +669,21 @@
       " no. of live points =  500\n",
       " dimensionality =    6\n",
       " *****************************************************\n",
-      " ln(ev)=   16478.369649839929      +/-  0.19072622217532367     \n",
-      "  analysing data from multinest/.txt\n",
-      " Total Likelihood Evaluations:        49040\n",
+      "  analysing data from multinest/.txt ln(ev)=   16478.154973650067      +/-  0.19161049564628616     \n",
+      " Total Likelihood Evaluations:        46185\n",
       " Sampling finished. Exiting MultiNest\n",
       "\n",
-      "Nested sampling global log-evidence: 16478.37 +/- 0.19\n",
-      "Nested importance sampling global log-evidence: 16476.34 +/- 0.01\n",
+      "\n",
+      "Nested sampling global log-evidence: 16478.15 +/- 0.19\n",
+      "Nested importance sampling global log-evidence: 16476.48 +/- 0.09\n",
       "\n",
       "Sample with the highest probability:\n",
       "   - Log-likelihood = 16499.09\n",
-      "   - teff = 1710.52\n",
-      "   - logg = 3.85\n",
+      "   - teff = 1709.63\n",
+      "   - logg = 3.84\n",
       "   - feh = 0.12\n",
-      "   - radius = 1.48\n",
-      "   - parallax = 50.97\n",
+      "   - radius = 1.49\n",
+      "   - parallax = 50.88\n",
       "   - scaling_GPI_H = 1.11\n",
       "\n",
       "---------------------\n",
@@ -695,15 +692,15 @@
       "\n",
       "Database tag: betapic\n",
       "Sampler: multinest\n",
-      "Array shape: (2757, 6)\n",
+      "Array shape: (2755, 6)\n",
       "\n",
       "Integrated autocorrelation time:\n",
-      "   - teff: 1.05\n",
-      "   - logg: 1.30\n",
-      "   - feh: 1.24\n",
-      "   - radius: 1.13\n",
-      "   - parallax: 1.34\n",
-      "   - scaling_GPI_H: 1.20\n"
+      "   - teff: 1.27\n",
+      "   - logg: 1.28\n",
+      "   - feh: 1.21\n",
+      "   - radius: 1.24\n",
+      "   - parallax: 1.30\n",
+      "   - scaling_GPI_H: 1.14\n"
      ]
     }
    ],

diff --git a/species/data/database.py b/species/data/database.py
@@ -1826,7 +1826,7 @@ def add_samples(
 
         print(f"Database tag: {tag}")
         print(f"Sampler: {sampler}")
-        print(f"Array shape: {samples.shape}")
+        print(f"Samples shape: {samples.shape}")
 
         if spec_labels is None:
             spec_labels = []
@@ -1905,7 +1905,7 @@ def add_samples(
             for i, item in enumerate(modelpar):
                 auto_corr = integrated_time(samples[:, i], quiet=True)[0]
 
-                if np.allclose(samples[:, i], np.mean(samples[:, i])):
+                if np.allclose(samples[:, i], np.mean(samples[:, i]), atol=0.0):
                     print(f"   - {item}: fixed")
                 else:
                     print(f"   - {item}: {auto_corr:.2f}")
@@ -2792,7 +2792,7 @@ def get_samples(
 
             print(f"Database tag: {tag}")
             print(f"Random samples: {random}")
-            print(f"Array shape: {samples.shape}")
+            print(f"Samples shape: {samples.shape}")
 
             attributes = {}
             for item in dset.attrs: