@@ -14,7 +14,7 @@ default_obsdim(A::AbstractArray) = ndims(A)
14
14
default_obsdim (tup:: Tuple ) = map (default_obsdim, tup)
15
15
16
16
"""
17
- getobs(data, idx; obsdim = default_obsdim(data))
17
+ getobs(data, idx, obsdim = default_obsdim(data))
18
18
19
19
Return the observations corresponding to the observation-index `idx`.
20
20
Note that `idx` can be of type `Int` or `AbstractVector`.
@@ -24,7 +24,7 @@ The returned observation(s) should be in the form intended to
24
24
be passed as-is to some learning algorithm. There is no strict
25
25
interface requirement on how this "actual data" must look like.
26
26
Every author behind some custom data container can make this
27
- decision himself/herself . We do, however, expect it to be consistent
27
+ decision themselves . We do, however, expect it to be consistent
28
28
for `idx` being an integer, as well as `idx` being an abstract
29
29
vector, respectively.
30
30
@@ -33,10 +33,11 @@ to indicate which dimension of `data` denotes the observations.
33
33
See [`default_obsdim`](@ref) for defining a default dimension.
34
34
"""
35
35
function getobs end
36
- getobs (data, idx; obsdim) = getobs (data, idx)
36
+ getobs (data, idx) = data[idx]
37
+ getobs (data, idx, obsdim) = getobs (data, idx)
37
38
38
39
"""
39
- getobs!(buffer, data, idx; obsdim = default_obsdim(obsdim))
40
+ getobs!(buffer, data, idx, obsdim = default_obsdim(obsdim))
40
41
41
42
Inplace version of `getobs(data, idx; obsdim)`. If this method
42
43
is defined for the type of `data`, then `buffer` should be used
@@ -54,7 +55,7 @@ to indicate which dimension of `data` denotes the observations.
54
55
See [`default_obsdim`](@ref) for defining a default dimension.
55
56
"""
56
57
function getobs! end
57
- getobs! (buffer, data, idx; obsdim = default_obsdim (data)) = getobs (data, idx; obsdim = obsdim)
58
+ getobs! (buffer, data, idx, obsdim = default_obsdim (data)) = getobs (data, idx, obsdim)
58
59
59
60
# --------------------------------------------------------------------
60
61
@@ -94,95 +95,14 @@ function targets end
94
95
95
96
# --------------------------------------------------------------------
96
97
97
- """
98
- abstract DataView{TElem, TData} <: AbstractVector{TElem}
99
-
100
- Baseclass for all vector-like views of some data structure.
101
- This allow for example to see some design matrix as a vector of
102
- individual observation-vectors instead of one matrix.
103
-
104
- see `MLDataPattern.ObsView` and `MLDataPattern.BatchView` for examples.
105
- """
106
- abstract type DataView{TElem, TData} <: AbstractVector{TElem} end
107
-
108
- """
109
- abstract AbstractObsView{TElem, TData} <: DataView{TElem, TData}
110
-
111
- Baseclass for all vector-like views of some data structure,
112
- that views it as some form or vector of observations.
113
-
114
- see `MLDataPattern.ObsView` for a concrete example.
115
- """
116
- abstract type AbstractObsView{TElem, TData} <: DataView{TElem, TData} end
117
-
118
- """
119
- abstract AbstractBatchView{TElem, TData} <: DataView{TElem, TData}
120
-
121
- Baseclass for all vector-like views of some data structure,
122
- that views it as some form or vector of equally sized batches.
123
-
124
- see `MLDataPattern.BatchView` for a concrete example.
125
- """
126
- abstract type AbstractBatchView{TElem, TData} <: DataView{TElem, TData} end
127
-
128
- # --------------------------------------------------------------------
129
-
130
- """
131
- abstract DataIterator{TElem,TData}
132
-
133
- Baseclass for all types that iterate over a `data` source
134
- in some manner. The total number of observations may or may
135
- not be known or defined and in general there is no contract that
136
- `getobs` or `nobs` has to be supported by the type of `data`.
137
- Furthermore, `length` should be used to query how many elements
138
- the iterator can provide, while `nobs` may return the underlying
139
- true amount of observations available (if known).
140
-
141
- see `MLDataPattern.RandomObs`, `MLDataPattern.RandomBatches`
142
- """
143
- abstract type DataIterator{TElem,TData} end
144
-
145
- """
146
- abstract ObsIterator{TElem,TData} <: DataIterator{TElem,TData}
147
-
148
- Baseclass for all types that iterate over some data source
149
- one observation at a time.
150
-
151
- ```julia
152
- using MLDataPattern
153
- @assert typeof(RandomObs(X)) <: ObsIterator
154
-
155
- for x in RandomObs(X)
156
- # ...
157
- end
158
- ```
98
+ abstract type AbstractDataContainer end
159
99
160
- see `MLDataPattern.RandomObs`
161
- """
162
- abstract type ObsIterator{TElem,TData} <: DataIterator{TElem,TData} end
163
-
164
- """
165
- abstract BatchIterator{TElem,TData} <: DataIterator{TElem,TData}
166
-
167
- Baseclass for all types that iterate over of some data source one
168
- batch at a time.
169
-
170
- ```julia
171
- @assert typeof(RandomBatches(X, size=10)) <: BatchIterator
172
-
173
- for x in RandomBatches(X, size=10)
174
- @assert nobs(x) == 10
175
- # ...
176
- end
177
- ```
178
-
179
- see `MLDataPattern.RandomBatches`
180
- """
181
- abstract type BatchIterator{TElem,TData} <: DataIterator{TElem,TData} end
100
+ Base. getindex (x:: AbstractDataContainer , i) = getobs (x, i, default_obsdim (x))
101
+ Base. iterate (x:: AbstractDataContainer , state = 1 ) = getobs (x, state, default_obsdim (x)), state + 1
182
102
183
103
# --------------------------------------------------------------------
184
104
185
- # just for dispatch for those who care to
186
- const AbstractDataIterator{E,T} = Union{DataIterator{E,T}, DataView{E,T}}
187
- const AbstractObsIterator{E,T} = Union{ObsIterator{E,T}, AbstractObsView{E,T}}
188
- const AbstractBatchIterator{E,T} = Union{BatchIterator{E,T},AbstractBatchView{E,T}}
105
+ # Might need this distinction later
106
+ # e.g. shuffleobs can be anywhere in pipeline but
107
+ # eachbatch is usually at the end
108
+ abstract type AbstractDataIterator <: AbstractDataContainer end
0 commit comments