-
-
Notifications
You must be signed in to change notification settings - Fork 27
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
WIP: n-dimensional support #20
Changes from all commits
759fa3d
013e9f4
24ef479
c6a8670
2fcf0d3
36dd05c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,7 +5,17 @@ | |
|
||
import re | ||
|
||
__all__ = ['chunkSlice', 'dsetChunk', 'dsetContentDict', 'dsetDict', 'groupDict', 'uriJoin', 'uriName'] | ||
from tornado.web import HTTPError | ||
|
||
__all__ = [ | ||
'chunkSlice', | ||
'dsetChunk', | ||
'dsetContentDict', | ||
'dsetDict', | ||
'groupDict', | ||
'uriJoin', | ||
'uriName' | ||
] | ||
|
||
## chunk handling | ||
def chunkSlice(chunk, s): | ||
|
@@ -14,12 +24,12 @@ def chunkSlice(chunk, s): | |
|
||
return slice(s.start*chunk, s.stop*chunk, s.step) | ||
|
||
def dsetChunk(dset, row, col): | ||
return dset[slice(*row), slice(*col)].tolist() | ||
|
||
def dsetChunk(dset, select): | ||
slices = _getHyperslabSlices(dset.shape, select) | ||
return dset[slices].tolist() | ||
|
||
## create dicts to be converted to json | ||
def dsetContentDict(dset, row=None, col=None): | ||
def dsetContentDict(dset, select=None): | ||
return dict([ | ||
# metadata | ||
('attrs', dict(*dset.attrs.items())), | ||
|
@@ -28,7 +38,7 @@ def dsetContentDict(dset, row=None, col=None): | |
('shape', dset.shape), | ||
|
||
# actual data | ||
('data', dsetChunk(dset, row, col) if row and col else None) | ||
('data', dsetChunk(dset, select) if select else None) | ||
]) | ||
|
||
def dsetDict(name, uri, content=None): | ||
|
@@ -55,3 +65,88 @@ def uriJoin(*parts): | |
|
||
def uriName(uri): | ||
return uri.split('/')[-1] | ||
|
||
|
||
def _getHyperslabSlices(dsetshape, select): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One of my big design goals in how I put the frontend, API, and backend together was to avoid the need for any complex parsing function. They're never worth the trouble, there's almost always an edge case or three that you miss, and maintenance is a pain. Instead, what I'd really like to do is make sure that the frontend is structured such that the slices it asks for don't require anything other than some simple splitting, if even that. I was even thinking that it might be good to split up the slice input box into n segments given a nD dataset (or it might be really annoying; I need to try that one out). The job of the API and backend is then to not lose the information that gets handed to them (by, eg concating all the info on all the slices into a single string). If you do ever really want a parsing function like this, I'd strongly recommend against rolling your own. Instead, you should search for prior art, or even better, come up with something that works off of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed. I think sending a list of slices in a post body as you've proposed is a cleaner approach. The idea for the |
||
""" | ||
Parse SELECT query param and return tuple of Python slice objects | ||
|
||
:param dsetshape: The shape of the dataset as returned by dset.shape | ||
:param select: The SELECT query param should be in the form: | ||
[<dim1>, <dim2>, ... , <dimn>] | ||
For each dimension, valid formats are: | ||
single integer: n | ||
start and end: n:m | ||
start, end, and stride: n:m:s | ||
:type select: str | ||
|
||
:returns: tuple of Python slices based on the SELECT query param | ||
""" | ||
|
||
if select is None: | ||
# Default: return entire dataset | ||
return tuple(slice(0, extent) for extent in dsetshape) | ||
|
||
if not select.startswith('['): | ||
msg = "Bad Request: selection query missing start bracket" | ||
raise HTTPError(400, reason=msg) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's good practice/encapsulation for a function like this to raise semantic errors (in this case, probably a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes that makes sense, good call! |
||
if not select.endswith(']'): | ||
msg = "Bad Request: selection query missing end bracket" | ||
raise HTTPError(400, reason=msg) | ||
|
||
# strip brackets | ||
select = select[1:-1] | ||
|
||
select_array = select.split(',') | ||
if len(select_array) > len(dsetshape): | ||
msg = "Bad Request: number of selected dimensions exceeds the rank of the dataset" | ||
raise HTTPError(400, reason=msg) | ||
|
||
slices = [] | ||
for dim, dim_slice in enumerate(select_array): | ||
extent = dsetshape[dim] | ||
|
||
# default slice values | ||
start = 0 | ||
stop = extent | ||
step = 1 | ||
if dim_slice.find(':') < 0: | ||
# just a number - append to SLICES, and continue to next iteration | ||
try: | ||
slices.append(int(dim_slice)) | ||
continue | ||
except ValueError: | ||
msg = "Bad Request: invalid selection parameter (can't convert to int) for dimension: " + str(dim) | ||
raise HTTPError(400, reason=msg) | ||
stop = start + 1 | ||
elif dim_slice == ':': | ||
# select everything (default) | ||
pass | ||
else: | ||
fields = dim_slice.split(":") | ||
if len(fields) > 3: | ||
msg = "Bad Request: Too many ':' seperators for dimension: " + str(dim) | ||
raise HTTPError(400, reason=msg) | ||
try: | ||
if fields[0]: | ||
start = int(fields[0]) | ||
if fields[1]: | ||
stop = int(fields[1]) | ||
if len(fields) > 2 and fields[2]: | ||
step = int(fields[2]) | ||
except ValueError: | ||
msg = "Bad Request: invalid selection parameter (can't convert to int) for dimension: " + str(dim) | ||
raise HTTPError(400, reason=msg) | ||
|
||
if start < 0 or start > extent: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here's a good example of why you generally shouldn't write your own parsing function. If we're claiming to support Numpy-like syntax, then both negative indices and indices out of bounds should be treated as valid in slices. |
||
msg = "Bad Request: Invalid selection start parameter for dimension: " + str(dim) | ||
raise HTTPError(400, reason=msg) | ||
if stop > extent: | ||
msg = "Bad Request: Invalid selection stop parameter for dimension: " + str(dim) | ||
raise HTTPError(400, reason=msg) | ||
if step <= 0: | ||
msg = "Bad Request: invalid selection step parameter for dimension: " + str(dim) | ||
raise HTTPError(400, reason=msg) | ||
slices.append(slice(start, stop, step)) | ||
|
||
return tuple(slices) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,7 +40,8 @@ | |
], | ||
install_requires=[ | ||
'h5py', | ||
'notebook' | ||
'notebook', | ||
'simplejson' | ||
] | ||
) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -73,12 +73,12 @@ export function hdfDataRequest( | |
parameters: IContentsParameters, | ||
settings: ServerConnection.ISettings | ||
): Promise<number[][]> { | ||
// require the uri, row, and col query parameters | ||
const { fpath, uri, row, col } = parameters; | ||
// require the uri query parameter, select is optional | ||
const { fpath, uri, ...select } = parameters; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oops. I'm familiar with spread syntax -- not sure why I have it there! |
||
|
||
const fullUrl = | ||
URLExt.join(settings.baseUrl, "hdf", "data", fpath).split("?")[0] + | ||
URLExt.objectToQueryString({ uri, row, col }); | ||
URLExt.objectToQueryString({ uri, ...select }); | ||
|
||
return ServerConnection.makeRequest(fullUrl, {}, settings).then(response => { | ||
if (response.status !== 200) { | ||
|
@@ -105,14 +105,13 @@ export interface IContentsParameters { | |
uri: string; | ||
|
||
/** | ||
* Row slice. Up to 3 integers, same syntax as for Python `slice` built-in. | ||
* String representing an array of slices which determine a | ||
* hyperslab selection of an n-dimensional HDF5 dataset. | ||
* Syntax and semantics matches that of h5py's Dataset indexing. | ||
* E.g, ?select=[0:3, 1:5, :, 9] | ||
* See jupyterlab_hdf/util.py for full details. | ||
*/ | ||
row?: number[]; | ||
|
||
/** | ||
* Column slice. Up to 3 integers, same syntax as for Python `slice` built-in. | ||
*/ | ||
col?: number[]; | ||
select?: string; | ||
} | ||
|
||
/** | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
NaN
handling issue was a good catch.Ultimately I want to try the plan that @kgryte sketched out in #22, but that can be a future PR
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I trust that the reviver approach is probably the best option for handling this on the front end, but is there a reason to avoid handling it (or an option to) on the server?