From b84cc8e04f046d933c6ede33dc68c8d8a1b59d13 Mon Sep 17 00:00:00 2001 From: hatsy Date: Sun, 9 Apr 2017 12:36:09 -0700 Subject: [PATCH 1/9] Ported plotting from plotly to bokeh passing only json from backend to frontend --- .gitignore | 1 + cesium_app/handlers/plot_features.py | 22 ++----- cesium_app/models.py | 12 ++++ cesium_app/plot.py | 98 ++++++++++++++++------------ public/index.html | 11 ++++ public/scripts/Plot.jsx | 25 +++++-- public/scripts/custom-plotly.js | 13 ---- requirements.txt | 1 + 8 files changed, 106 insertions(+), 77 deletions(-) delete mode 100644 public/scripts/custom-plotly.js diff --git a/.gitignore b/.gitignore index e218d72..3d2a628 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ ghostdriver.log *.swo __pycache__/ node_modules/ +*.pyc diff --git a/cesium_app/handlers/plot_features.py b/cesium_app/handlers/plot_features.py index 13f0c19..dbfe3b6 100644 --- a/cesium_app/handlers/plot_features.py +++ b/cesium_app/handlers/plot_features.py @@ -4,20 +4,8 @@ class PlotFeaturesHandler(BaseHandler): - def _get_featureset(self, featureset_id): - try: - f = Featureset.get(Featureset.id == featureset_id) - except Featureset.DoesNotExist: - raise AccessError('No such feature set') - - if not f.is_owned_by(self.get_username()): - raise AccessError('No such feature set') - - return f - - def get(self, featureset_id=None): - fset = self._get_featureset(featureset_id) - features_to_plot = sorted(fset.features_list)[0:4] - data, layout = plot.feature_scatterplot(fset.file.uri, features_to_plot) - - self.success({'data': data, 'layout': layout}) + def get(self, featureset_id): + fset = Featureset.get_if_owned(featureset_id, self.get_username()) + features_to_plot = sorted(fset.features_list)[0:4] # TODO from form + docs_json, render_items = plot.feature_scatterplot(fset.file.uri, features_to_plot) + self.success({'docs_json': docs_json, 'render_items': render_items}) diff --git a/cesium_app/models.py b/cesium_app/models.py index 09851de..b73433e 100644 --- a/cesium_app/models.py +++ b/cesium_app/models.py @@ -158,6 +158,18 @@ class Featureset(BaseModel): def is_owned_by(self, username): return self.project.is_owned_by(username) + @staticmethod + def get_if_owned(fset_id, username): + try: + f = Featureset.get(Featureset.id == fset_id) + except Featureset.DoesNotExist: + raise AccessError('No such feature set') + + if not f.is_owned_by(username): + raise AccessError('No such feature set') + + return f + class Model(BaseModel): """ORM model of the Model table""" diff --git a/cesium_app/plot.py b/cesium_app/plot.py index cb1cad3..2ff58b8 100644 --- a/cesium_app/plot.py +++ b/cesium_app/plot.py @@ -1,12 +1,12 @@ +from itertools import cycle, islice import numpy as np -import pandas as pd -from sklearn.metrics import confusion_matrix -import plotly -import plotly.offline as py -from plotly.tools import FigureFactory as FF - from cesium import featurize -from .config import cfg +from bokeh.plotting import figure +from bokeh.layouts import gridplot +from bokeh.palettes import PuBu as palette +from bokeh.core.json_encoder import serialize_json +from bokeh.document import Document +from bokeh.util.serialization import make_id def feature_scatterplot(fset_path, features_to_plot): @@ -21,42 +21,56 @@ def feature_scatterplot(fset_path, features_to_plot): Returns ------- - (fig.data, fig.layout) - Returns (fig.data, fig.layout) where `fig` is an instance of - `plotly.tools.FigureFactory`. + (str, str) + Returns (script, div) tags for the desired plot as output by + `bokeh.embed.components`. """ fset, data = featurize.load_featureset(fset_path) - fset = fset[features_to_plot] + X = fset[features_to_plot] + if 'target' in fset and fset.target.values.dtype != np.float: + y = fset.target.values + labels = np.unique(y) + else: + y = [None] * len(X) + labels = [None] - if 'label' in data: - fset['label'] = data['label'] - index = 'label' + if len(labels) in palette: + colors = palette[len(labels)] else: - index = None - - # TODO replace 'trace {i}' with class labels - fig = FF.create_scatterplotmatrix(fset, diag='box', index=index, - height=800, width=800) - - py.plot(fig, auto_open=False, output_type='div') - - return fig.data, fig.layout - - -#def prediction_heatmap(pred_path): -# with xr.open_dataset(pred_path) as pset: -# pred_df = pd.DataFrame(pset.prediction.values, index=pset.name, -# columns=pset.class_label.values) -# pred_labels = pred_df.idxmax(axis=1) -# C = confusion_matrix(pset.label, pred_labels) -# row_sums = C.sum(axis=1) -# C = C / row_sums[:, np.newaxis] -# fig = FF.create_annotated_heatmap(C, x=[str(el) for el in -# pset.class_label.values], -# y=[str(el) for el in -# pset.class_label.values], -# colorscale='Viridis') -# -# py.plot(fig, auto_open=False, output_type='div') -# -# return fig.data, fig.layout + all_colors = sorted(palette.items(), key=lambda x: x[0], + reverse=True)[0][1] + colors = list(islice(cycle(all_colors), len(labels))) + + plots = np.array([[figure(width=300, height=200) + for j in range(len(features_to_plot))] + for i in range(len(features_to_plot))]) + for (i, j), p in np.ndenumerate(plots): + for l, c in zip(labels, colors): + if l is not None: + inds = np.where(y == l)[0] + else: + inds = np.arange(len(X)) + p.circle(X.values[inds, i], X.values[inds, j], color=c, + legend=(l if (i == j and l is not None) else None)) + p.legend.location = 'bottom_right' + p.legend.label_text_font_size = '6pt' + p.legend.spacing = 0 + p.legend.padding = 0 + p.xaxis.axis_label = features_to_plot[i] + p.yaxis.axis_label = features_to_plot[j] + + plot = gridplot(plots.tolist(), ncol=len(features_to_plot), mergetools=True) + + # Convert plot to json objects necessary for rendering with bokeh on the + # frontend + render_items = [{'docid':plot._id, 'elementid':make_id()}] + + doc = Document() + doc.add_root(plot) + docs_json_inner = doc.to_json() + docs_json = {render_items[0]['docid']:docs_json_inner} + + docs_json = serialize_json(docs_json) + render_items = serialize_json(render_items) + + return docs_json, render_items diff --git a/public/index.html b/public/index.html index 7ccd2ad..e85fe7f 100644 --- a/public/index.html +++ b/public/index.html @@ -5,6 +5,17 @@ Cesium + + + + + + + diff --git a/public/scripts/Plot.jsx b/public/scripts/Plot.jsx index 0eb77ad..3890907 100644 --- a/public/scripts/Plot.jsx +++ b/public/scripts/Plot.jsx @@ -1,8 +1,22 @@ import React, { Component } from 'react'; import { connect } from 'react-redux'; -import Plotly from './custom-plotly'; import { showNotification } from './Notifications'; +function bokeh_render_plot(node, docs_json, render_items) { + // Create bokeh div element + var bokeh_div = document.createElement("div"); + var inner_div = document.createElement("div"); + bokeh_div.setAttribute("class", "bk-root" ); + inner_div.setAttribute("class", "bk-plotdiv"); + inner_div.setAttribute("id", render_items[0].elementid); + bokeh_div.appendChild(inner_div); + node.appendChild(bokeh_div); + + // Generate plot + Bokeh.safely(function() { + Bokeh.embed.embed_items(docs_json, render_items); + }); +} class Plot extends Component { constructor(props) { @@ -32,16 +46,17 @@ class Plot extends Component { if (!plotData) { return Please wait while we load your plotting data...; } - - let { data, layout } = plotData; + var docs_json = JSON.parse(plotData.docs_json); + var render_items = JSON.parse(plotData.render_items); return ( plotData &&
{ - node && Plotly.plot(node, data, layout); - }} + node && bokeh_render_plot(node, docs_json, render_items) + } + } /> ); } diff --git a/public/scripts/custom-plotly.js b/public/scripts/custom-plotly.js deleted file mode 100644 index 1883840..0000000 --- a/public/scripts/custom-plotly.js +++ /dev/null @@ -1,13 +0,0 @@ -// in custom-plotly.js -import Plotly from 'plotly.js/lib/core'; - -// extra module, for example -import Choropleth from 'plotly.js/lib/choropleth'; - - -// Load in the trace types for pie, and choropleth -Plotly.register([ - Choropleth -]); - -export default Plotly; diff --git a/requirements.txt b/requirements.txt index da54396..617d62b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,4 @@ distributed>=1.14.3 selenium pytest joblib>=0.11 +bokeh==0.12.5 From b57a8658bcb17111e1d9e11c603c9e7bccd4af75 Mon Sep 17 00:00:00 2001 From: hatsy Date: Thu, 13 Apr 2017 15:31:09 -0700 Subject: [PATCH 2/9] Changed frontend test to look for bokeh plot instead of plotly --- cesium_app/tests/frontend/test_features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cesium_app/tests/frontend/test_features.py b/cesium_app/tests/frontend/test_features.py index 06acea2..5869f2a 100644 --- a/cesium_app/tests/frontend/test_features.py +++ b/cesium_app/tests/frontend/test_features.py @@ -167,7 +167,7 @@ def test_plot_features(driver): driver.find_element_by_xpath("//b[contains(text(),'Please wait while we load your plotting data...')]") driver.implicitly_wait(3) - driver.find_element_by_css_selector("[class=svg-container]") + driver.find_element_by_css_selector("[class=bk-plotdiv]") def test_delete_featureset(driver): From f21429e28332e4245163b94bbd0043f7916b7fe8 Mon Sep 17 00:00:00 2001 From: hatsy Date: Thu, 13 Apr 2017 15:33:12 -0700 Subject: [PATCH 3/9] Pep8 changes --- cesium_app/handlers/plot_features.py | 3 ++- cesium_app/plot.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cesium_app/handlers/plot_features.py b/cesium_app/handlers/plot_features.py index dbfe3b6..29d9ab8 100644 --- a/cesium_app/handlers/plot_features.py +++ b/cesium_app/handlers/plot_features.py @@ -7,5 +7,6 @@ class PlotFeaturesHandler(BaseHandler): def get(self, featureset_id): fset = Featureset.get_if_owned(featureset_id, self.get_username()) features_to_plot = sorted(fset.features_list)[0:4] # TODO from form - docs_json, render_items = plot.feature_scatterplot(fset.file.uri, features_to_plot) + docs_json, render_items = plot.feature_scatterplot(fset.file.uri, + features_to_plot) self.success({'docs_json': docs_json, 'render_items': render_items}) diff --git a/cesium_app/plot.py b/cesium_app/plot.py index 2ff58b8..d42e657 100644 --- a/cesium_app/plot.py +++ b/cesium_app/plot.py @@ -63,12 +63,12 @@ def feature_scatterplot(fset_path, features_to_plot): # Convert plot to json objects necessary for rendering with bokeh on the # frontend - render_items = [{'docid':plot._id, 'elementid':make_id()}] + render_items = [{'docid': plot._id, 'elementid': make_id()}] doc = Document() doc.add_root(plot) docs_json_inner = doc.to_json() - docs_json = {render_items[0]['docid']:docs_json_inner} + docs_json = {render_items[0]['docid']: docs_json_inner} docs_json = serialize_json(docs_json) render_items = serialize_json(render_items) From 40d370495e3d9c339efca2c1ffa87592ed906ced Mon Sep 17 00:00:00 2001 From: hatsy Date: Fri, 14 Apr 2017 15:19:30 -0700 Subject: [PATCH 4/9] Updated bokeh plot to reflect original plotly plot --- cesium_app/plot.py | 51 ++++++++++++++++------------------------------ 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/cesium_app/plot.py b/cesium_app/plot.py index d42e657..916d7ec 100644 --- a/cesium_app/plot.py +++ b/cesium_app/plot.py @@ -1,9 +1,9 @@ -from itertools import cycle, islice +from itertools import cycle import numpy as np from cesium import featurize from bokeh.plotting import figure from bokeh.layouts import gridplot -from bokeh.palettes import PuBu as palette +from bokeh.palettes import Viridis as palette from bokeh.core.json_encoder import serialize_json from bokeh.document import Document from bokeh.util.serialization import make_id @@ -22,44 +22,27 @@ def feature_scatterplot(fset_path, features_to_plot): Returns ------- (str, str) - Returns (script, div) tags for the desired plot as output by - `bokeh.embed.components`. + Returns (docs_json, render_items) json for the desired plot. """ fset, data = featurize.load_featureset(fset_path) - X = fset[features_to_plot] - if 'target' in fset and fset.target.values.dtype != np.float: - y = fset.target.values - labels = np.unique(y) - else: - y = [None] * len(X) - labels = [None] - - if len(labels) in palette: - colors = palette[len(labels)] - else: - all_colors = sorted(palette.items(), key=lambda x: x[0], - reverse=True)[0][1] - colors = list(islice(cycle(all_colors), len(labels))) - + fset = fset[features_to_plot] + colors = cycle(palette[5]) plots = np.array([[figure(width=300, height=200) for j in range(len(features_to_plot))] for i in range(len(features_to_plot))]) - for (i, j), p in np.ndenumerate(plots): - for l, c in zip(labels, colors): - if l is not None: - inds = np.where(y == l)[0] - else: - inds = np.arange(len(X)) - p.circle(X.values[inds, i], X.values[inds, j], color=c, - legend=(l if (i == j and l is not None) else None)) - p.legend.location = 'bottom_right' - p.legend.label_text_font_size = '6pt' - p.legend.spacing = 0 - p.legend.padding = 0 - p.xaxis.axis_label = features_to_plot[i] - p.yaxis.axis_label = features_to_plot[j] - plot = gridplot(plots.tolist(), ncol=len(features_to_plot), mergetools=True) + for (j, i), p in np.ndenumerate(plots): + if (j == i == 0): + p.title.text = "Scatterplot matrix" + p.circle(fset.values[:,i], fset.values[:,j], color=next(colors)) + p.xaxis.minor_tick_line_color = None + p.yaxis.minor_tick_line_color = None + p.ygrid[0].ticker.desired_num_ticks = 2 + p.xgrid[0].ticker.desired_num_ticks = 4 + p.outline_line_color = None + p.axis.visible = None + + plot = gridplot(plots.tolist(), ncol=len(features_to_plot), mergetools=True, responsive=True, title="Test") # Convert plot to json objects necessary for rendering with bokeh on the # frontend From f00be41b688b03c82fddc21b8fdfc2a936790887 Mon Sep 17 00:00:00 2001 From: hatsy Date: Fri, 14 Apr 2017 15:22:01 -0700 Subject: [PATCH 5/9] Removed plotly requirement --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 617d62b..4691b75 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,6 @@ pyyaml tornado pyzmq pyjwt -plotly>=2.0.5 simplejson distributed>=1.14.3 selenium From 5258f1b12b158ef5c68d01b52f632d50c43bf37b Mon Sep 17 00:00:00 2001 From: hatsy Date: Wed, 19 Apr 2017 16:49:27 -0700 Subject: [PATCH 6/9] Changed bokehjs imports from cdn to node module direct import --- package.json | 2 +- public/index.html | 11 ----------- public/scripts/Plot.jsx | 2 ++ 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/package.json b/package.json index d3709b4..a457cc3 100644 --- a/package.json +++ b/package.json @@ -6,13 +6,13 @@ "test": "eslint -c .eslintrc --ext .jsx,.js public/scripts/ && make test" }, "dependencies": { + "bokehjs": "^0.12.5", "bootstrap": "^3.3.7", "bootstrap-css": "^3.0.0", "css-loader": "^0.26.2", "exports-loader": "^0.6.4", "imports-loader": "^0.7.1", "jquery": "^3.1.1", - "plotly.js": "^1.23.1", "react": "^15.1.0", "react-dom": "^15.1.0", "react-redux": "^5.0.3", diff --git a/public/index.html b/public/index.html index e85fe7f..7ccd2ad 100644 --- a/public/index.html +++ b/public/index.html @@ -5,17 +5,6 @@ Cesium - - - - - - - diff --git a/public/scripts/Plot.jsx b/public/scripts/Plot.jsx index 3890907..eca56f3 100644 --- a/public/scripts/Plot.jsx +++ b/public/scripts/Plot.jsx @@ -1,6 +1,8 @@ import React, { Component } from 'react'; import { connect } from 'react-redux'; import { showNotification } from './Notifications'; +import "../../node_modules/bokehjs/build/js/bokeh.js"; +import "../../node_modules/bokehjs/build/css/bokeh.css"; function bokeh_render_plot(node, docs_json, render_items) { // Create bokeh div element From bfa40cc65f69dc9d10268f822a23ac06b206ccb7 Mon Sep 17 00:00:00 2001 From: Brett Naul Date: Thu, 20 Apr 2017 10:27:42 -0700 Subject: [PATCH 7/9] Replace install_deps.py with pip install -r --- Makefile | 2 +- tools/install_deps.py | 37 ------------------------------------- 2 files changed, 1 insertion(+), 38 deletions(-) delete mode 100755 tools/install_deps.py diff --git a/Makefile b/Makefile index 9026da0..d27f093 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ webpack = ./node_modules/.bin/webpack dependencies: - @./tools/silent_monitor.py ./tools/install_deps.py requirements.txt + @./tools/silent_monitor.py pip install -r requirements.txt @./tools/silent_monitor.py ./tools/check_js_deps.sh db_init: diff --git a/tools/install_deps.py b/tools/install_deps.py deleted file mode 100755 index ba7b2aa..0000000 --- a/tools/install_deps.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python - -import sys -import re -import subprocess - - -pkg_import = {'pyyaml': 'yaml', - 'pyzmq': 'zmq', - 'pyjwt': 'jwt'} - - -if len(sys.argv) != 2: - print("Usage: install_deps.py requirements.dev.txt") - sys.exit(1) - -req_file = sys.argv[1] - -with open(req_file) as f: - for dep in f: - dep = dep.strip() - if not dep: - continue - - if '-e' in dep: - dep = dep.split('#egg=')[-1] # use the egg name - else: - dep = re.split('[^\w\-]+', dep)[0] # discard version info - - try: - __import__(pkg_import.get(dep, dep)) - except ImportError: - print(("Development dependency '{}' unfulfilled. " - "Installing requirements.").format(dep)) - p = subprocess.call("pip install -r {}".format(req_file).split()) - sys.exit(p) -print('Dependencies from {} verified.'.format(req_file)) From 5c82d3f9811156cd00c2615bbc4da1afcaf14538 Mon Sep 17 00:00:00 2001 From: Brett Naul Date: Thu, 20 Apr 2017 10:51:32 -0700 Subject: [PATCH 8/9] Add typescript dependency for bokehjs --- package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/package.json b/package.json index a457cc3..9c70c72 100644 --- a/package.json +++ b/package.json @@ -23,6 +23,7 @@ "redux-logger": "^2.8.1", "redux-thunk": "^2.2.0", "style-loader": "^0.13.2", + "typescript": "^2.2.2", "webpack": "^2.2.1", "webpack-dev-server": "^2.4.1", "whatwg-fetch": "^2.0.2" From 6a5f968e91e7c62d8a06a9ae675430e85dc01a5a Mon Sep 17 00:00:00 2001 From: Brett Naul Date: Fri, 21 Apr 2017 11:20:43 -0700 Subject: [PATCH 9/9] Fix prediction downloads for probabilistic classifiers --- cesium_app/handlers/prediction.py | 4 ++-- cesium_app/tests/frontend/test_predict.py | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/cesium_app/handlers/prediction.py b/cesium_app/handlers/prediction.py index aa20b07..5807c3f 100644 --- a/cesium_app/handlers/prediction.py +++ b/cesium_app/handlers/prediction.py @@ -141,8 +141,8 @@ def get(self, prediction_id=None, action=None): 'label': data['labels'], 'prediction': data['preds']}, columns=['ts_name', 'label', 'prediction']) - if data.get('pred_probs'): - result['probability'] = np.max(data['pred_probs'], axis=1) + if len(data.get('pred_probs', [])) > 0: + result['probability'] = data['pred_probs'].max(axis=1).values self.set_header("Content-Type", 'text/csv; charset="utf-8"') self.set_header("Content-Disposition", "attachment; " "filename=cesium_prediction_results.csv") diff --git a/cesium_app/tests/frontend/test_predict.py b/cesium_app/tests/frontend/test_predict.py index 05f3f3e..a2bb3b9 100644 --- a/cesium_app/tests/frontend/test_predict.py +++ b/cesium_app/tests/frontend/test_predict.py @@ -7,6 +7,7 @@ from os.path import join as pjoin import numpy as np import numpy.testing as npt +import pandas as pd from cesium_app.config import cfg import json import requests @@ -185,6 +186,26 @@ def test_download_prediction_csv_class(driver): os.remove('/tmp/cesium_prediction_results.csv') +def test_download_prediction_csv_class_prob(driver): + driver.get('/') + with create_test_project() as p, create_test_dataset(p) as ds,\ + create_test_featureset(p) as fs,\ + create_test_model(fs, model_type='RandomForestClassifier') as m,\ + create_test_prediction(ds, m): + _click_download(p.id, driver) + assert os.path.exists('/tmp/cesium_prediction_results.csv') + try: + result = pd.read_csv('/tmp/cesium_prediction_results.csv') + npt.assert_array_equal(result.ts_name, np.arange(5)) + npt.assert_array_equal(result.label, ['Mira', 'Classical_Cepheid', + 'Mira', 'Classical_Cepheid', + 'Mira']) + npt.assert_array_equal(result.label, result.prediction) + assert (result.probability >= 0.0).all() + finally: + os.remove('/tmp/cesium_prediction_results.csv') + + def test_download_prediction_csv_regr(driver): driver.get('/') with create_test_project() as p, create_test_dataset(p, label_type='regr') as ds,\