Skip to content

Commit

Permalink
implement computation of statistics and fix a few regressions/bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
paulmueller committed Nov 22, 2019
1 parent 63849f6 commit 77e2260
Show file tree
Hide file tree
Showing 21 changed files with 426 additions and 155 deletions.
6 changes: 5 additions & 1 deletion CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
2.0.0b1
2.0.0a25
- feat: implement data export
- feat: implement simple computation of basic statistics
- fix: pipeline modified plain dataset filter
- fix: allow to remove all datasets or filters
- fix: failed to load some datasets b/c of zero-valued meta data
- fix: toggle buttons in Block Matrix not functional
- enh: update font sizes
- docs: minor update
2.0.0a24
- implemented polygon filters
Expand Down
14 changes: 7 additions & 7 deletions docs/sec_interface.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,14 @@ your data analysis from Shape-Out 1 to Shape-Out 2:
could be possible to convert sessions (including the corresponding
.tdms files), but the effort in doing so would probably exceed the
effort required to just rebuild a clean analysis session in Shape-Out 2.
- Shape-Out 2 currently does not provide a linear mixed effects models
(LMM) analysis. The reason behind that is quite pragmatic: LMM analysis
in Shape-Out 1 is done using
- Shape-Out 2 does not provide a linear mixed effects models
(LMM) analysis. LMM analysis in Shape-Out 1 is done using
`R/lme4 <https://cran.r-project.org/web/packages/lme4/>`_ and thus
requires a full R distribution shipped with Shape-Out 1. While this
blows up the download and installation size, it is also not clear
whether it would work just like that on macOS. However, if many users
need this feature, then we can think of a workaround.
requires a full R distribution shipped with Shape-Out 1. This
blows up the installation size and makes it more difficult to deploy.
Furthermore (and we are not saying that LMM Analysis is "bad") we are
also looking into other methods for determining statistical significance
which might be more intuitive to understand.


Basic usage
Expand Down
79 changes: 0 additions & 79 deletions docs/sec_qg_mixed_effects.rst

This file was deleted.

13 changes: 8 additions & 5 deletions shapeout2/gui/analysis/ana_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,14 @@ def format_config_key_value(section, key, value):
tip = ""
# Value formatting
if dctype == float: # pretty-print floats
# determine number of decimals
dec = int(np.ceil(np.log10(1/np.abs(value))))
if dec < 0:
dec = 0
string = ("{:." + "{}".format(dec + 2) + "f}").format(value)
if value == 0:
string = "0.0"
else:
# determine number of decimals
dec = int(np.ceil(np.log10(1/np.abs(value))))
if dec < 0:
dec = 0
string = ("{:." + "{}".format(dec + 2) + "f}").format(value)
else:
string = str(value)

Expand Down
1 change: 1 addition & 0 deletions shapeout2/gui/compute/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .comp_stats import ComputeStatistics # noqa: F401
176 changes: 176 additions & 0 deletions shapeout2/gui/compute/comp_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
import codecs
import numbers
import pathlib
import pkg_resources
import time

import dclab
from PyQt5 import uic, QtWidgets

from ...pipeline import Pipeline
from ..._version import version

STAT_METHODS = sorted(dclab.statistics.Statistics.available_methods.keys())
STAT_METHODS.remove("%-gated") # This does not make sense with Pipeline


class ComputeStatistics(QtWidgets.QDialog):
def __init__(self, parent, pipeline, *args, **kwargs):
QtWidgets.QWidget.__init__(self, parent, *args, **kwargs)
path_ui = pkg_resources.resource_filename(
"shapeout2.gui.compute", "comp_stats.ui")
uic.loadUi(path_ui, self)
# for external statistics
self.path = None
# set pipeline
self.pipeline = pipeline
# Signals
self.pushButton_path.clicked.connect(self.on_browse)
self.comboBox.currentIndexChanged.connect(self.on_combobox)
# Populate statistics methods
self.listWidget_stats.clear()
for meth in STAT_METHODS:
wid = QtWidgets.QListWidgetItem(meth)
wid.setCheckState(2)
self.listWidget_stats.addItem(wid)
# initialize rest
if len(self.pipeline.slots) == 0:
self.comboBox.setCurrentIndex(1)
else:
self.comboBox.setCurrentIndex(0)
self.on_combobox() # computes self.features

def done(self, r):
if r:
success = self.export_statistics()
else:
success = True
if success:
super(ComputeStatistics, self).done(r)

def export_statistics(self):
"""Export statistics to .tsv"""
# get features
features = []
for ii in range(self.listWidget_features.count()):
if self.listWidget_features.item(ii).checkState() == 2:
features.append(self.features[ii])
# get methods
methods = []
for ii in range(self.listWidget_stats.count()):
if self.listWidget_stats.item(ii).checkState() == 2:
methods.append(STAT_METHODS[ii])

prog = QtWidgets.QProgressDialog("Computing statistics...", "Abort", 1,
1, self)
prog.setMinimumDuration(0)
time.sleep(0.01)
prog.setValue(0)
# compute statistics
values = []
if self.comboBox.currentIndex() == 0:
# from pipeline
datasets = self.pipeline.get_datasets()
prog.setMaximum(len(datasets))
for ii, ds in enumerate(datasets):
h, v = dclab.statistics.get_statistics(ds,
methods=methods,
features=features)
h = ["Path", "Slot", "Name"] + h
v = ["{}".format(ds.path), ii, ds.title] + v
values.append(v)
if prog.wasCanceled():
break
prog.setValue(ii + 1)
QtWidgets.QApplication.processEvents()
else:
# from path
path = pathlib.Path(self.path)
files = sorted(path.rglob("*.rtdc"))
prog.setMaximum(len(files))
for ii, pp in enumerate(files):
ds = dclab.new_dataset(pp)
h, v = dclab.statistics.get_statistics(ds,
methods=methods,
features=features)
h = ["Path", "Name"] + h
v = ["{}".format(ds.path), ds.title] + v
values.append(v)
prog.setValue(ii + 1)
QtWidgets.QApplication.processEvents()
path, _ = QtWidgets.QFileDialog.getSaveFileName(
self, 'Save statistics', '', 'tab-separated values (*.tsv)')
if not path:
# Abort export
return False
elif not path.endswith(".tsv"):
path += ".tsv"

# Header
header = ["Statistics Output",
"Shape-Out {}".format(version),
"",
"\t".join(h),
]
# Data
data = []
for v in values:
line = []
for vi in v:
if (isinstance(vi, numbers.Real)
and not isinstance(vi, numbers.Integral)):
line.append("{:.5e}".format(vi))
else:
line.append("{}".format(vi))
data.append("\t".join(line))
# Write BOM
with codecs.open(path, "wb") as fd:
fd.write(codecs.BOM_UTF8)
# Write rest
with codecs.open(path, "a", encoding="utf-8") as fd:
for line in header:
fd.write("# " + line + "\r\n")
for line in data:
fd.write(line + "\r\n")
return True # True means success

def on_browse(self):
out = QtWidgets.QFileDialog.getExistingDirectory(self,
'Export directory')
if out:
self.path = out
self.lineEdit_path.setText(self.path)
self.comboBox.setCurrentIndex(1)
else:
self.path = None
self.comboBox.setCurrentIndex(0)

def on_combobox(self):
if self.comboBox.currentIndex() == 1:
self.widget_path.show()
if self.path is None:
self.on_browse()
if self.path:
self.update_feature_list(use_pipeline=False)
# else, on_combobox is triggered again
else:
self.widget_path.hide()
self.update_feature_list(use_pipeline=True)

def update_feature_list(self, use_pipeline=True):
if use_pipeline:
self.features = self.pipeline.get_features(scalar=True,
union=True,
label_sort=True)
else:
# This is just a cheap way of getting a label-sorted list
# of all scalar features.
empty_pipeline = Pipeline()
self.features = empty_pipeline.get_features(scalar=True,
label_sort=True)

self.listWidget_features.clear()
for feat in self.features:
wid = QtWidgets.QListWidgetItem(dclab.dfn.feature_name2label[feat])
wid.setCheckState(0)
self.listWidget_features.addItem(wid)
Loading

0 comments on commit 77e2260

Please sign in to comment.