diff --git a/.gitattributes b/.gitattributes index 44095a9..93d5c58 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,5 @@ incense/_version.py export-subst + +*.ipynb diff=jupyternotebook + +*.ipynb merge=jupyternotebook diff --git a/demo.ipynb b/demo.ipynb index 24db4b7..8a0f70f 100755 --- a/demo.ipynb +++ b/demo.ipynb @@ -170,7 +170,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Using mongoDB queries und can also request experiments in certain time ranges." + "Using mongoDB queries you can also request experiments in certain time ranges." ] }, { @@ -194,6 +194,33 @@ "loader.find(query)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For quickly viewing the newest experiments you can use the `find_latest` method. This is especially useful when *debugging* machine learning experiments. Caching is explicitly disabled for this method." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Experiment(id=3, name=example)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "loader.find_latest()" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -204,7 +231,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -221,7 +248,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -309,7 +336,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -318,7 +345,7 @@ "'COMPLETED'" ] }, - "execution_count": 11, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -329,7 +356,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -338,7 +365,7 @@ "datetime.datetime(2019, 4, 11, 19, 23, 23, 890000)" ] }, - "execution_count": 12, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -349,7 +376,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -358,7 +385,7 @@ "0.9315000176429749" ] }, - "execution_count": 13, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -369,7 +396,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -398,7 +425,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -407,7 +434,7 @@ "pmap({'seed': 0, 'optimizer': 'sgd', 'epochs': 3})" ] }, - "execution_count": 15, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -425,7 +452,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -434,7 +461,7 @@ "3" ] }, - "execution_count": 16, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -447,12 +474,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Alternatitvely, the classic dictionary access notation can still be used. This is useful, if the the keys of the data model are not valid python identifiers." + "Alternatively, the classic dictionary access notation can still be used. This is useful, if the the keys of the data model are not valid python identifiers." ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -461,7 +488,7 @@ "False" ] }, - "execution_count": 17, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -486,7 +513,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -501,7 +528,7 @@ " 'model.hdf5': Artifact(name=model.hdf5)}" ] }, - "execution_count": 18, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -514,12 +541,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "PNG artifacts will be shown as figures by default." + "PNG artifacts will be shown as images by default." ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -529,7 +556,7 @@ "" ] }, - "execution_count": 19, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -540,7 +567,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -556,7 +583,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -623,7 +650,7 @@ "4 4 4" ] }, - "execution_count": 21, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -634,7 +661,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -709,7 +736,7 @@ "4 4 4" ] }, - "execution_count": 22, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -727,7 +754,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -743,7 +770,7 @@ "" ] }, - "execution_count": 23, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -761,7 +788,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -828,7 +855,7 @@ "4 4 4" ] }, - "execution_count": 24, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -849,12 +876,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "`.metrics` works similiar to `.artifacts`, but maps from metrics names to `pandas.Series`. Therefore, metrics can easily be plotted." + "`.metrics` works similar to `.artifacts`, but maps from metrics names to `pandas.Series`. Therefore, metrics can easily be plotted." ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -863,7 +890,7 @@ "dict_keys(['training_loss', 'training_acc', 'test_loss', 'test_acc'])" ] }, - "execution_count": 25, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -874,16 +901,16 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 26, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" }, @@ -906,16 +933,16 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 27, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" }, @@ -943,12 +970,12 @@ "metadata": {}, "source": [ "## Projecting onto DataFrames\n", - "Often you want to pull experiment attributes and metrics into a dataframe. Either just to get and overview or do a custom analysis. You can easily transform a `QuerySet` of experiments by calling `project` on it. Pass a list of dot separated paths that point to some value in the experiement model to the `on` parameter. By default the columns will be named as the last element in the path." + "Often you want to pull experiment attributes and metrics into a dataframe. Either just to get and overview or do a custom analysis. You can easily transform a `QuerySet` of experiments by calling `project` on it. Pass a list of dot separated paths that point to some value in the experiment model to the `on` parameter. By default the columns will be named as the last element in the path." ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -1014,7 +1041,7 @@ "3 example adam 1" ] }, - "execution_count": 28, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -1033,7 +1060,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -1099,7 +1126,7 @@ "3 example adam 0.218707" ] }, - "execution_count": 29, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -1118,7 +1145,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -1134,7 +1161,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ @@ -1150,12 +1177,12 @@ "metadata": {}, "source": [ "## Utils\n", - "The `utils` module contains realted functionality, that might be useful during the manual interpretation of experiments." + "The `utils` module contains related functionality, that might be useful during the manual interpretation of experiments." ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ @@ -1171,7 +1198,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 34, "metadata": {}, "outputs": [ { @@ -1180,7 +1207,7 @@ "{'epochs'}" ] }, - "execution_count": 33, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -1195,12 +1222,12 @@ "metadata": {}, "source": [ "## Deleting experiments\n", - "It is possible to completely delete experiments including their associated metrics and artifacts. Per default the method will ask for confirmation, so we do not accidentially delete our experiments. This can be skipped by passing `confirmed=True`." + "It is possible to completely delete experiments including their associated metrics and artifacts. Per default the method will ask for confirmation, so we do not accidentally delete our experiments. This can be skipped by passing `confirmed=True`." ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 35, "metadata": {}, "outputs": [ { @@ -1209,6 +1236,19 @@ "text": [ "Are you sure you want to delete Experiment(id=2, name=example)? [y/N] N\n" ] + }, + { + "ename": "StdinNotImplementedError", + "evalue": "raw_input was called, but this frontend does not support input requests.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mStdinNotImplementedError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mexp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mloader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind_by_id\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mexp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdelete\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/projects/incense/incense/experiment.py\u001b[0m in \u001b[0;36mdelete\u001b[0;34m(self, confirmed)\u001b[0m\n\u001b[1;32m 76\u001b[0m \"\"\"\n\u001b[1;32m 77\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mconfirmed\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 78\u001b[0;31m \u001b[0mconfirmed\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Are you sure you want to delete {self}? [y/N]\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"y\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 79\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mconfirmed\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_delete\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.miniconda/envs/incense-dev/lib/python3.6/site-packages/ipykernel/kernelbase.py\u001b[0m in \u001b[0;36mraw_input\u001b[0;34m(self, prompt)\u001b[0m\n\u001b[1;32m 846\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_allow_stdin\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 847\u001b[0m raise StdinNotImplementedError(\n\u001b[0;32m--> 848\u001b[0;31m \u001b[0;34m\"raw_input was called, but this frontend does not support input requests.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 849\u001b[0m )\n\u001b[1;32m 850\u001b[0m return self._input_request(str(prompt),\n", + "\u001b[0;31mStdinNotImplementedError\u001b[0m: raw_input was called, but this frontend does not support input requests." + ] } ], "source": [ diff --git a/incense/experiment_loader.py b/incense/experiment_loader.py index 3954149..b455425 100755 --- a/incense/experiment_loader.py +++ b/incense/experiment_loader.py @@ -4,6 +4,7 @@ from typing import * import gridfs +import pymongo from pymongo import MongoClient from .experiment import Experiment @@ -86,8 +87,7 @@ def find_by_config_key(self, key: str, value: Union[str, numbers.Real, tuple]) - """ key = f"config.{key}" cursor = self._search_collection(key, value) - experiments = [self._make_experiment(experiment) for experiment in cursor] - return QuerySet(experiments) + return self._read_from_cursor(cursor) @lru_cache(maxsize=MAX_CACHE_SIZE) def find_by_key(self, key: str, value: Union[str, numbers.Real]) -> QuerySet: @@ -106,8 +106,7 @@ def find_by_key(self, key: str, value: Union[str, numbers.Real]) -> QuerySet: The matched experiments. """ cursor = self._search_collection(key, value) - experiments = [self._make_experiment(experiment) for experiment in cursor] - return QuerySet(experiments) + return self._read_from_cursor(cursor) @lru_cache(maxsize=MAX_CACHE_SIZE) def find_all(self) -> QuerySet: @@ -120,6 +119,25 @@ def find_all(self) -> QuerySet: cursor = self._runs.find() return QuerySet([self._make_experiment(experiment) for experiment in cursor]) + def find_latest(self, n: int = 1, attr: str = "start_time") -> Union[Experiment, QuerySet]: + """Find the most recent experiments. + + Caching is disabled for this method. + + Args: + n: The number of latest experiments to retrieve. + attr: The attribute to determine which experiments are the most recent ones. + + Returns: + Either the latest experiment or the set of latest experiments in case more than one were requested. + """ + cursor = self._runs.find().sort(attr, pymongo.DESCENDING).limit(n) + experiments = [self._make_experiment(experiment) for experiment in cursor] + if len(experiments) == 1: + return experiments[0] + else: + return QuerySet(experiments) + def find(self, query: dict) -> QuerySet: """Find experiments based on a mongo query. @@ -130,6 +148,9 @@ def find(self, query: dict) -> QuerySet: The matched experiments. """ cursor = self._runs.find(query) + return self._read_from_cursor(cursor) + + def _read_from_cursor(self, cursor) -> QuerySet: experiments = [self._make_experiment(experiment) for experiment in cursor] return QuerySet(experiments) diff --git a/tests/conftest.py b/tests/conftest.py index 1a12913..3796e89 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,7 +11,7 @@ def loader(): @pytest.fixture -def mongo_observer(): +def delete_mongo_observer(): observer = MongoObserver.create(url=None, db_name="incense_delete_test") return observer @@ -20,3 +20,15 @@ def mongo_observer(): def delete_db_loader(): loader = ExperimentLoader(mongo_uri=None, db_name="incense_delete_test") return loader + + +@pytest.fixture +def recent_mongo_observer(): + observer = MongoObserver.create(url=None, db_name="incense_recent_test") + return observer + + +@pytest.fixture +def recent_db_loader(): + loader = ExperimentLoader(mongo_uri=None, db_name="incense_recent_test") + return loader diff --git a/tests/test_experiment.py b/tests/test_experiment.py index bb65353..863611d 100644 --- a/tests/test_experiment.py +++ b/tests/test_experiment.py @@ -55,10 +55,10 @@ def test_to_dict(loader): assert x == y -def test_delete(delete_db_loader, mongo_observer): +def test_delete(delete_db_loader, delete_mongo_observer): # Add experiment to db. ex = Experiment("to be deleted") - ex.observers.append(mongo_observer) + ex.observers.append(delete_mongo_observer) ex.add_config({"value": 1}) def run(value, _run): diff --git a/tests/test_experiment_loader.py b/tests/test_experiment_loader.py index e138aa5..77768ea 100644 --- a/tests/test_experiment_loader.py +++ b/tests/test_experiment_loader.py @@ -1,5 +1,6 @@ # -*- coding: future_fstrings -*- from pytest import raises +from sacred import Experiment as SacredExperiment from incense.experiment import Experiment @@ -45,6 +46,61 @@ def test_find_all(loader): assert len(exps) == 3 +def test_find_latest__with_newly_added_experiments(recent_db_loader, recent_mongo_observer): + ex = SacredExperiment("most recent") + ex.observers.append(recent_mongo_observer) + ex.add_config({"value": 1}) + + def run(value, _run): + return value + + ex.main(run) + ex.run() + + exp = recent_db_loader.find_latest() + assert exp.config.value == 1 + + ex = SacredExperiment("new most recent") + ex.observers.append(recent_mongo_observer) + ex.add_config({"value": 2}) + + def run(value, _run): + return value + + ex.main(run) + ex.run() + + exp = recent_db_loader.find_latest() + assert exp.config.value == 2 + + +def test_find_latest__for_multiple_with_newly_added_experiments(recent_db_loader, recent_mongo_observer): + ex = SacredExperiment("most recent") + ex.observers.append(recent_mongo_observer) + ex.add_config({"value": 1}) + + def run(value, _run): + return value + + ex.main(run) + ex.run() + + ex = SacredExperiment("new most recent") + ex.observers.append(recent_mongo_observer) + ex.add_config({"value": 2}) + + def run(value, _run): + return value + + ex.main(run) + ex.run() + + exps = recent_db_loader.find_latest(2) + + assert exps[0].config.value == 2 + assert exps[1].config.value == 1 + + def test_find(loader): exps = loader.find({"$and": [{"config.optimizer": "sgd"}, {"config.epochs": 3}]}) assert len(exps) == 1