-
Notifications
You must be signed in to change notification settings - Fork 2
/
databank.py
executable file
·1132 lines (1024 loc) · 45.2 KB
/
databank.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#/bin/python
import sys
from copy import copy, deepcopy
import datetime
import databank_util as util
#--------------------------------------------------------------------
# Method to get the primary name for a particular metadata
# item when we do NOT already have an instantiated metadata
# object.
# The caller must specify the type of metadata and a string
# that they think will be a match. This routine will try to
# create a metadata object for that type of metadata and then
# map the string that was passed in to the set of valid
# names. If that is all successful, the primary name for that
# kind of metadata is returned.
# For example:
# (meta='kind', name='runoff') -> 'run'
# (meta='units', name='centimeters') -> 'cm'
# (meta='location', name='ogoki') -> 'og'
# (meta='kind', name='length') -> 'na' (no match)
# (meta='kind', name='meters') -> 'na' (no match)
# (meta='interval', name='sup') -> 'na' (no match)
#--------------------------------------------------------------------
def getPrimaryName(meta=None, name=None):
if not meta:
raise Exception('Missing "meta=" for getPrimaryName()')
if not name:
raise Exception('Missing "name=" for getPrimaryName()')
if not isinstance(meta, str):
raise Exception('Invalid "meta=" for getPrimaryName()')
if not isinstance(name, str):
raise Exception('Invalid "name=" for getPrimaryName()')
obj = None
try:
if meta.lower() == 'kind':
obj = DataKind(name)
if meta.lower() == 'units':
obj = DataUnits(name)
if meta.lower() == 'interval':
obj = DataInterval(name)
if meta.lower() == 'location':
obj = DataLocation(name)
if obj:
return obj.primaryName()
else:
return 'na'
except:
return 'na'
#-----------------------------------------------------------------------------
# Define the classes used to store metadata "things" like kind, units, etc.
#--------------------------------------------------------------------------
# BaseMeta is the base class and all of the actual functionality is
# provided by methods of BaseMeta, but the subclasses provide the
# specific character strings for the metadata items. Two tuples are
# defined for each kind of metadata. The _inputStrings tuple provides
# various lookup strings that can be searched for a match in order to
# identify specifically which item is being referenced. The _outputStrings
# tuple is intended for use when writing output to a file.
#
# When editing these 2 tuples, remember that they are a matched set.
# The rows must match. e.g. Whatever quantity is referenced in row 3 of
# _inputStrings, that same quantity must be referenced by row 3 of the
# _outputStrings.
#
# For the _inputStrings tuple, the very first entry should be a short
# (2 or 3 characters) reference name. This is the name that will be used
# when building dictionary lookup keys. It will also be used as the
# assigned metadata value for DataSeries objects.
# The rest of the tuple is really just freeform. There is no prescribed
# minimum or maximum number of entries. I suggest that the set of input
# strings be fairly large and complete in order to accomodate all of the
# reasonable possibilities. Keep in mind that all comparisons will be
# done in lowercase. So please keep all input strings 100% lowercase.
#
# Conversely, the _outputStrings tuple should be kept minimal. For now
# I am defining only a short and long entry. The entries should be
# specified with the desired case, because they will be used exactly
# as specified. Make sure that an equivalent for each output string is
# included in the input string tuple so that if/when an output file is
# later used as an input file, the name read from the file will be a
# match when comparing input strings.
#---------------------------------------------------------------------------
#
# BaseMeta should never be used directly. Please always use one of the
# derived subclasses.
#-----------------------------------------------------------------------------
class BaseMeta():
def __init__(self, initval):
if initval:
if isinstance(initval, str):
self.myValue = self.intValueFromString(initval)
else:
self.myValue = 0
else:
self.myValue = 0
#---------------------------------------------------
def className(self):
return self.__class__.__name__
#---------------------------------------------------
# Method to get the primary name for a particular entry
# in a subclass when we have an instantiated object.
# The primary name is the first entry in that
# value's _inputStrings tuple.
# If unable to find any match, returns 'na'.
#
def primaryName(self):
try:
return self.inputName(0)
except:
return 'na'
#---------------------------------------------------
# For inputName() and outputName(), first collapse the index
# into the valid range, then access the tuple entry.
#
def inputName(self, column=0):
try:
the_tuple = type(self)._inputStrings[self.myValue]
maxcol = len(the_tuple) - 1
c = max(0, min(maxcol, column))
return the_tuple[c]
except:
return 'n/a'
#---------------------------------------------------
def outputName(self, column):
try:
the_tuple = type(self)._outputStrings[self.myValue]
maxcol = len(the_tuple) - 1
c = max(0, min(maxcol, column))
return the_tuple[c]
except:
return 'n/a'
#---------------------------------------------------
def outputNameShort(self):
try:
the_tuple = type(self)._outputStrings[self.myValue]
return the_tuple[0]
except:
return 'n/a'
#---------------------------------------------------
def outputNameLong(self):
try:
the_tuple = type(self)._outputStrings[self.myValue]
maxcol = len(the_tuple) - 1
return the_tuple[maxcol]
except:
return 'n/a'
#---------------------------------------------------
# Method to convert a string into the index for that
# metadata value by finding the matching entry in _inputStrings.
# If unable to find any match, returns 0 (undefined).
#
def intValueFromString(self, string):
j = -1
s = string.lower().strip()
for i in range(len(self._inputStrings)):
if j < 0:
try:
for k in range(len(self._inputStrings[i])):
t = self._inputStrings[i][k].lower().strip()
if t == s:
return i
except:
pass
return 0
#--------------------------------------------------------------------------------
"""The DataKind class contains the defined data kinds (a.k.a types) and their
associated string literals.
"""
class DataKind(BaseMeta):
_inputStrings = (
('na', 'n/a', 'undef', 'undefined'),
('prc', 'prec', 'precip', 'precipitation'),
('run', 'runf', 'runoff'),
('evp', 'evap', 'evaporation'),
('nbs', 'net_basin_supply', 'net basin supply'),
('mlv', 'meanlev', 'mean_level', 'mean level', 'mean water level'),
('blv', 'bomlev', 'beginning_of_month_level',
'beginning of month level', 'beginning-of-month water level'),
('elv', 'eomlev', 'end_of_month_level', 'end of month level',
'end-of-month water level'),
('flw', 'flow', 'channel flow'),
('con', 'cons', 'consumptive_use', 'consuse', 'consumptive use'),
('icw', 'iceweed', 'ice_weed_retardation', 'ice & weed retardation')
)
_outputStrings = (
('undef', 'undefined'),
('Precip', 'Precipitation'),
('Runoff', 'Runoff'),
('Evap', 'Evaporation'),
('NBS', 'Net Basin Supply'),
('MeanLev', 'Mean Water Level'),
('BOMLev', 'Beginning-Of-Month Water Level'),
('EOMLev', 'End-Of-Month Water Level'),
('Flow', 'Channel Flow'),
('ConsUse', 'Consumptive Use'),
('IceWeed', 'Ice & Weed Retardation')
)
#--------------------------------------------------------------------------------
"""The DataUnits class contains the defined data units and their associated string literals.
"""
class DataUnits(BaseMeta):
_inputStrings = (
('na', 'n/a', 'undef', 'undefined'),
('mm', 'millimeter', 'millimeters'),
('cm', 'centimeter', 'centimeters'),
('m', 'meter', 'meters'),
('km', 'kilometer', 'kilometers'),
('in', 'inch', 'inches'),
('ft', 'foot', 'feet'),
('yd', 'yard', 'yards'),
('mi', 'mile', 'miles'),
('mm2', 'sqmm', 'sq_mm', 'square_mm',
'square_millimeters', 'square millimeters'),
('cm2', 'sqcm', 'sq_cm', 'square_cm',
'square_centimeters', 'square centimeters'),
('m2', 'sqm', 'sq_m', 'square_m',
'square_meters', 'square meters'),
('km2', 'sqkm', 'sq_km', 'square_km',
'square_kilometers', 'square kilometers'),
('in2', 'sqin', 'sq_in', 'square_in',
'square_inches', 'square inches'),
('ft2', 'sqft', 'sq_ft', 'square_ft',
'square_feet', 'square feet'),
('yd2', 'sqyd', 'sq_yd', 'square_yd',
'square_yards', 'square yards'),
('mi2', 'sqmi', 'sq_mi', 'square_mi',
'square_miles', 'square miles'),
('mm3', 'cu_mm', 'cubic_mm',
'cubic_millimeters', 'cubic millimeters'),
('cm3', 'cu_cm', 'cubic_cm',
'cubic_centimeters', 'cubic centimeters'),
('m3', 'cu_m', 'cubic_m',
'cubic_meters', 'cubic meters'),
('km3', 'cu_km', 'cubic_km',
'cubic_kilometers', 'cubic kilometers'),
('in3', 'cu_in', 'cubic_in',
'cubic_inches', 'cubic inches'),
('ft3', 'cu_ft', 'cubic_ft',
'cubic_feet', 'cubic feet'),
('yd3', 'cu_yd', 'cubic_yd',
'cubic_yards', 'cubic yards'),
('mi3', 'cu_mi', 'cubic_mi',
'cubic_miles', 'cubic miles'),
('cms', 'm3s', 'cu_ms', 'cubic_ms',
'cubic_meters_per_second', 'cubic meters per second'),
('10cms', '10m3s', '10m3/s', '10*cms', 'tens_cms',
'tens_of_cubic_meters_per_second',
'ten cubic meters per second'),
('cfs', 'ft3s', 'cu_fts', 'cubic_fts',
'cubic_feet_per_second', 'cubic feet per second'),
('tcfs', 'thousand_cfs',
'thousand_cubic_feet_per_second',
'thousand cubic feet per second'),
)
_outputStrings = (
('undef', 'undefined'),
('mm', 'Millimeters'),
('cm', 'Centimeters'),
('m', 'Meters'),
('km', 'Kilometers'),
('in', 'Inches'),
('ft', 'Feet'),
('yd', 'Yards'),
('mi', 'Miles'),
('mm2', 'Square Millimeters'),
('cm2', 'Square Centimeters'),
('m2', 'Square Meters'),
('km2', 'Square Kilometers'),
('in2', 'Square Inches'),
('ft2', 'Square Feet'),
('yd2', 'Square Yards'),
('mi2', 'Square Miles'),
('mm3', 'Cubic Millimeters'),
('cm3', 'Cubic Centimeters'),
('m3', 'Cubic Meters'),
('km3', 'Cubic Kilometers'),
('in3', 'Cubic Inches'),
('ft3', 'Cubic Feet'),
('yd3', 'Cubic Yards'),
('mi3', 'Cubic Miles'),
('cms', 'Cubic Meters per Second'),
('10cms', 'Ten Cubic Meters per Second'),
('cfs', 'Cubic Feet per Second'),
('tcfs', 'Thousand Cubic Feet per Second'),
)
#--------------------------------------------------------------------------------
"""The DataInterval class contains the defined time intervals and their associated string literals.
"""
class DataInterval(BaseMeta):
_inputStrings = (
('na', 'n/a', 'undef', 'undefined'),
('dy', 'day', 'dly', 'daily'),
('wk', 'week', 'wkly', 'weekly'),
('qm', 'qmon', 'qtrm', 'qtrmon', 'qtrmonth', 'qtrmonthly',
'qtr-month', 'qtr-monthly', 'quartermon', 'quartermonth',
'quartermonthly', 'quarter-month', 'quarter-monthly'),
('mn', 'mon', 'month', 'monthly'),
('yr', 'year', 'yearly', 'annual')
)
_outputStrings = (
('undef', 'undefined'),
('dy', 'Daily'),
('wk', 'Weekly'),
('qmon', 'Qtr-monthly'),
('mn', 'Monthly'),
('yr', 'Annual')
)
#--------------------------------------------------------------------------------
"""The DataLocation class contains the defined locations and their associated string literals.
"""
class DataLocation(BaseMeta):
_inputStrings = (
('na', 'n/a', 'undef', 'undefined'),
('su', 'sup', 'lksup', 'lakesup', 'superior', 'lake superior'),
('mi', 'mic', 'lkmic', 'lakemic', 'michigan', 'lake michigan'),
('hu', 'hur', 'lkhur', 'lakehur', 'huron', 'lake huron'),
('sc', 'stc', 'lkstc', 'lakestc', 'stclair', 'lake stclair'),
('er', 'eri', 'lkeri', 'lakeeri', 'erie', 'lake erie'),
('on', 'ont', 'lkont', 'lakeont', 'ontario', 'lake ontario'),
('mh', 'mih', 'mhu', 'mhn', 'lakemhu', 'lakemhn', 'michhur',
'michuron', 'mich-huron', 'lake mich-huron',
'michigan-huron', 'lake michigan-huron'),
('og', 'ogoki', 'odiv', 'ogokidiv', 'ogoki diversion'),
('ll', 'longlac', 'long lac', 'longlacdiv',
'longlac diversion', 'long lac diversion'),
('oll', 'olldiv', 'ogoki-longlac', 'ogoki-longlac diversion'),
('chi', 'chidiv', 'chicago', 'chicago diversion'),
('wel', 'weldiv', 'welland', 'welland diversion'),
('smr', 'stmriv', 'stmarys', 'stmarys river', 'st. marys river'),
('scr', 'stcriv', 'stcriver', 'stclair river', 'st. clair river'),
('det', 'detriv', 'detriver', 'detroit', 'detroit river'),
('nia', 'niariv', 'niariver', 'niagara', 'niagara river'),
('stl', 'stlriv', 'stlriver', 'stlawrence', 'stlawrence river',
'st. lawrence river')
)
#
# *** NOTE ***
# Lake St. Clair and the St. Clair River introduces an issue for us.
# The name "St. Clair" could potentially refer to either one.
# So what do I do? I have chosen to define StClair as one of the
# output strings for both. That way it can be used, in context, for
# either one. But I am leaving it as a valid input string for only
# the lake, because we cannot have duplications in that set.
# Users of this class will need to be cognizant of that situation.
#
_outputStrings = (
('undef', 'undefined'),
('su', 'Superior', 'Lake Superior'),
('mi', 'Michigan', 'Lake Michigan'),
('hu', 'Huron', 'Lake Huron'),
('sc', 'StClair', 'Lake StClair'),
('er', 'Erie', 'Lake Erie'),
('on', 'Ontario', 'Lake Ontario'),
('mh', 'Michigan-Huron', 'Lake Michigan-Huron'),
('og', 'Ogoki', 'Ogoki Diversion'),
('ll', 'LongLac', 'LongLac Diversion'),
('oll', 'Ogoki-LongLac', 'Ogoki-LongLac Diversion'),
('chi', 'Chicago', 'Chicago Diversion'),
('wel', 'Welland', 'Welland Diversion'),
('smr', 'StMarys', 'St. Marys River'),
('scr', 'StClair', 'St. Clair River'),
('det', 'Detroit', 'Detroit River'),
('nia', 'Niagara', 'Niagara River'),
('stl', 'StLawrence', 'St. Lawrence River')
)
#--------------------------------------------------------------------------------
# Define the dataseries class that stores a single timeseries of data along with
# its metadata.
#--------------------------------------------------------------------------------
class DataSeries(object):
"""The dataseries class contains a single series of data.
Metadata may be left undefined ('na') at the time one of these objects is
created. Obviously, though, once data is assigned, we will need the
metadata values to be valid.
The start and end dates are always defined as a particular DAY, even for
the larger timesteps. For the larger timesteps, the start date is the
first day of the first period and the enddate is the last day of the last
period. For example:
Suppose we have weekly data for the seven "regulation weeks"
(Friday-Thursday) of January 6, 2017 through February 23, 2017, then the
start and end dates are exactly those days.
If we have quarter-monthly data for the first 3 quarters of March, 2017,
then startdate is 3/1/2017 and enddate is 3/23/2017.
If we have a dataset with monthly data for April 2010 through November 2016,
then startdate is 4/1/2010 and enddate is 11/30/2016.
Annual start/end dates are always January 1 and December 31 of the respective
years.
"""
def __init__(self, kind=None, units=None, intvl=None, loc=None,
first=None, last=None, values=None):
self.dataKind = 'na'
self.dataUnits = 'na'
self.dataInterval = 'na'
self.dataLocation = 'na'
self.startDate = util.MISSING_DATE
self.endDate = util.MISSING_DATE
self.dataVals = []
#
# Handle metadata initialization
# They are all specified with text strings.
#
if kind:
try:
self.dataKind = getPrimaryName(meta='kind', name=kind)
except:
raise Exception('Invalid kind specifier in DataSeries init')
else:
self.dataKind = 'na'
if units:
try:
self.dataUnits = getPrimaryName(meta='units', name=units)
except:
raise Exception('Invalid units specifier in DataSeries init')
else:
self.dataUnits = 'na'
if intvl:
try:
self.dataInterval = getPrimaryName(meta='interval', name=intvl)
if isinstance(intvl, str):
self.dataInterval = DataInterval(intvl).primaryName()
except:
raise Exception('Invalid interval specifier in DataSeries init')
else:
self.dataInterval = 'na'
if loc:
try:
self.dataLocation = getPrimaryName(meta='location', name=loc)
if isinstance(loc, str):
self.dataLocation = DataLocation(loc).primaryName()
except:
raise Exception('Invalid location specifier in DataSeries init')
else:
self.dataLocation = 'na'
if first:
self.startDate = util.date_from_entry(first)
else:
self.startDate = util.MISSING_DATE
if last:
self.endDate = util.date_from_entry(last)
else:
self.endDate = util.MISSING_DATE
if values:
self.dataVals = values
else:
self.dataVals = []
#
# Future enhancement: compute the required number of values
# based on the specified start/end dates and interval. Then
# compare that to the number of entries in the values list.
#
#---------------------------------------------------------------------
def printSummary(self):
print('Summary of DataSeries...')
print(' kind = ', DataKind(self.dataKind).outputNameLong())
print(' units = ', DataUnits(self.dataUnits).outputNameLong())
print(' intvl = ', DataInterval(self.dataInterval).outputNameLong())
print(' loc = ', DataLocation(self.dataLocation).outputNameLong())
print(' start date = ', str(self.startDate))
print(' end date = ', str(self.endDate))
print(' data values = ', self.dataVals)
#---------------------------------------------------------------------
def printOneLineSummary(self):
print(
' kind = ', DataKind(self.dataKind).outputNameLong(), ';',
' units = ', DataUnits(self.dataUnits).outputNameLong(), ';',
' intvl = ', DataInterval(self.dataInterval).outputNameLong(), ';',
' loc = ', DataLocation(self.dataLocation).outputNameLong(), ';',
' dates = ', str(self.startDate),
' to ', str(self.endDate)
)
#---------------------------------------------------------------------
def getOneLineSummary(self):
return(
'kind=' + DataKind(self.dataKind).outputNameLong() + ';',
'units=' + DataUnits(self.dataUnits).outputNameLong() + ';',
'intvl=' + DataInterval(self.dataInterval).outputNameLong() + ';',
'loc=' + DataLocation(self.dataLocation).outputNameLong() + ';',
'dates=' + str(self.startDate) +
' to ' + str(self.endDate)
)
#---------------------------------------------------------------------
def add_data(self, newData):
"""Add a continuous timeseries of data to the stored data.
On success, it returns True.
If there is a problem, it returns False.
"""
#
# Compare the metadata values. Reminder -- these are the
# 2- or 3-character strings, all lowercase.
#
if newData.dataKind != self.dataKind:
print('Error. Mismatched data kinds in DataSeries.add_data')
raise TypeError('Invalid attempt to add data to a DataSeries')
if newData.dataUnits != self.dataUnits:
print('Error. Mismatched data units in DataSeries.add_data')
raise TypeError('Invalid attempt to add data to a DataSeries')
if newData.dataInterval != self.dataInterval:
print('Error. Mismatched intervals in DataSeries.add_data')
raise TypeError('Invalid attempt to add data to a DataSeries')
if newData.dataLocation != self.dataLocation:
print('Error. Mismatched locations in DataSeries.add_data')
raise TypeError('Invalid attempt to add data to a DataSeries')
#
# Compare dates
#
if newData.startDate == util.MISSING_DATE:
print("Missing start date specification in call to add_data().")
return False
if newData.endDate == util.MISSING_DATE:
print("Missing end date specification in call to add_data().")
return False
#
# Call the appropriate merge routine
#
if newData.dataInterval == getPrimaryName(meta='interval', name='dly'):
try:
self.mrg_daily_data(newData)
except:
raise Exception('Error attempting to merge daily data.')
elif newData.dataInterval == getPrimaryName(meta='interval', name='monthly'):
try:
self.mrg_monthly_data(newData)
except:
raise Exception('Error attempting to merge monthly data.')
else:
print('Unable to merge data because interval is invalid.')
return False
return True
#---------------------------------------------------------------------
def mrg_daily_data(self, newData):
"""Merge an update set of continuous daily data to the stored data.
NOT intended to be called by user code, but rather for use by other
class methods.
It overwrites any old values in the period of the new
data, but preserves existing values outside that period.
The dates and data list are modified in place.
On success, it returns True.
If there is a problem, it returns False.
Note that metadata (kind, units, interval) are NOT verified here.
They are assumed to have been verified prior to calling this function.
"""
if newData.startDate == util.MISSING_DATE:
print("Missing start date specification in call to mrg_daily_data().")
return False
if newData.endDate == util.MISSING_DATE:
print("Missing end date specification in call to merge_daily_data().")
return False
#
# Determine the new date extents.
# Just save these values for now
#
mrgStart = min(self.startDate, newData.startDate)
mrgEnd = max(self.endDate, newData.endDate)
#
# create a template list of the new size, filled with missing data values
#
ndays = mrgEnd.toordinal() - mrgStart.toordinal() + 1
mrgData = [util.MISSING_REAL] * ndays
#
# replace the appropriate slice of mrgData with the old data
# Don't forget that the end index is always 1 past the desired end
#
i = self.startDate.toordinal() - mrgStart.toordinal()
j = self.endDate.toordinal() - mrgStart.toordinal() + 1
m = 0
n = self.endDate.toordinal() - self.startDate.toordinal() + 1
mrgData[i:j] = self.dataVals[m:n]
#
# replace the appropriate slice of mrgData with the new data
# Don't forget that the end index is always 1 past the desired end
#
i = newData.startDate.toordinal() - mrgStart.toordinal()
j = newData.endDate.toordinal() - mrgStart.toordinal() + 1
m = 0
n = newData.endDate.toordinal() - newData.startDate.toordinal() + 1
mrgData[i:j] = newData.dataVals[m:n]
#
# replace/overwrite old object values with the new ones
#
self.startDate = mrgStart
self.endDate = mrgEnd
self.dataVals = mrgData
return True
#---------------------------------------------------------------------
def mrg_monthly_data(self, newData):
"""Merge an update set of continuous monthly data to the stored data.
NOT intended to be called by user code, but rather for use by other
class methods.
It overwrites any old values in the period of the new
data, but preserves existing values outside that period.
The dates and data list are modified in place.
On success, it returns True.
If there is a problem, it returns False.
Note that metadata (kind, units, interval) are NOT verified here.
They are assumed to have been verified prior to calling this function.
"""
if newData.startDate == util.MISSING_DATE:
print("Missing start date specification in call to add_data().")
return False
if newData.endDate == util.MISSING_DATE:
print("Missing end date specification in call to add_data().")
return False
if newData.startDate.day != 1:
print("Invalid start date for monthly data. Must be 1.")
return False
d = last_day_of_month(newData.endDate)
if newData.day != d.day:
print("Invalid end date for monthly data. Must be last day of the month.")
return False
#
# Determine the new date extents.
# Just save these values for now
#
mrgStart = min(self.startDate, newData.startDate)
mrgEnd = max(self.endDate, newData.endDate)
#
# create a template list of the new size, filled with missing data values
#
mons = ((mrgEnd.year - mrgStart.year)*12
+ (mrgEnd.month - mrgStart.month) + 1)
mrgData = [util.MISSING_REAL] * mons
#
# replace the appropriate slice of mrgData with the old data
# Don't forget that the end index is always 1 past the desired end
#
sd = self.startDate
ed = self.endDate
i = (sd.year - mrgStart.year)*12 + (sd.month - mrgStart.month)
j = (ed.year - mrgEnd.year)*12 + (ed.month - mrgEnd.month) + 1
m = 0
n = (ed.year - sd.year)*12 + (ed.month - sd.month) + 1
mrgData[i:j] = self.dataVals[m:n]
#
# replace the appropriate slice of mrgData with the new data
# Don't forget that the end index is always 1 past the desired end
#
sd = newData.startDate
ed = newData.endDate
i = (sd.year - mrgStart.year)*12 + (sd.month - mrgStart.month)
j = (ed.year - mrgEnd.year)*12 + (ed.month - mrgEnd.month) + 1
m = 0
n = (ed.year - sd.year)*12 + (ed.month - sd.month) + 1
mrgData[i:j] = newData.dataVals[m:n]
#
# replace/overwrite old object values with the new ones
#
self.startdate = mrgStart
self.enddate = mrgEnd
self.dataVals = mrgData
return True
#--------------------------------------------------------------------------------
# Define the DataVault class that stores a bunch of DataSeries objects and will
# be used as the repository for GLRRM data.
#
# When a dataset is stored into the vault, the data values will be "normalized"
# to a specific set of units. When data is withdrawn, the values will be
# converted into whatever was requested. When this conversion requires a lake
# area to be used (e.g. when converting from cms -> mm) we will use the
# coordinated lake areas by default. The caller will have the option to specify
# an alternate area.
# *** MODELERS BEWARE ***
# If you use this functionality to specify an alternative lake area, please be
# certain that you are consistent, and ALWAYS use it. As you can imagine, if
# you were to store values converted via coordinated areas (no area specified),
# then withdraw them using an alternative area, then store them again using
# the coordinated area (no area specified) -- You will end up changing the
# data values in the vault in a way you don't intend.
#
# Normalized units are:
# precipitation cubic meters per second
# runoff cubic meters per second
# evaporation cubic meters per second
# net basin supply cubic meters per second
# mean level meters
# bom level meters
# eom level meters
# flow cubic meters per second
# consumptive use cubic meters per second
# ice-weed ret cubic meters per second
#
# Note that lake levels are stored using a generic elevation without any
# consideration of IGLD55, IGLD85, etc. For now, it is the responsibility
# of the model to account for any IGLD conversion issues.
#--------------------------------------------------------------------------------
class DataVault(object):
#--------------------------------
# Specify the coordinated lake surface areas, in sq meters
# To be very clear, these are the lake areas only, and do not
# include upstream channels.
# Also, Lake Huron is the entire Lake Huron area, including Georgian Bay.
#--------------------------------
_coordLakeArea = (
('su', 8.21e10),
('mi', 5.78e10),
('hu', 5.96e10),
('sc', 1.11e09),
('er', 2.57e10),
('on', 1.90e10),
('mh', 1.17e11),
)
def getLakeArea(self, loc=None):
if not loc: return None
try:
s = DataLocation(loc).primaryName().lower()
for t in self. _coordLakeArea:
if t[0].lower() == s:
return t[1]
return None
except:
return None
#--------------------------------
# Specify the normalized units to use for each kind
#--------------------------------
_normalizedUnits = (
('prc', 'cms'),
('run', 'cms'),
('evp', 'cms'),
('nbs', 'cms'),
('mlv', 'm'),
('blv', 'm'),
('elv', 'm'),
('flw', 'cms'),
('con', 'cms'),
('icw', 'cms')
)
def getNormalizedUnits(self, kind=None):
if kind:
if isinstance(kind, str):
s = kind.lower()
else:
return 'na'
else:
return 'na'
try:
for t in self._normalizedUnits:
if t[0].lower() == s:
return t[1]
return 'na'
except:
return 'na'
#--------------------------------------------------------
def __init__(self):
self.vault = {} # the dictionary object
#-------------------------------------------------------------------
# Construct a lookup key for our dictionary from EITHER:
# 1) The metadata in a DataSeries object, if ds is provided.
# OR
# 2) The specific kind, interval, location specified.
# If ds is given, kind, intvl, loc will be ignored.
#
# Returns a text string that looks like this:
# kind_intvl_loc
# where:
# kind = the character string defined as the first entry in the
# _inputStrings tuple for this data kind. e.g. For runoff data,
# the _inputStrings tuple is ('run', 'runf', 'runoff'). We will
# use 'run' as the kind string. Typically, this will be a
# 3-character string, but it is not required. We will just use
# that very first entry in the tuple. Keep in mind that this
# implicitly assumes the entry is unique, which is required
# anyway.
# intvl = the character string defined as the first entry in the
# _inputStrings tuple for this data interval. Same
# story as the data kind.
# loc = the character string defined as the first entry in the
# _inputStrings tuple for this location. Same story as kind/intvl.
#
# For example, if the function call looks like:
# myVault._construct_vault_key(kind='runoff', intvl='daily', loc='ont')
# then the kind/intvl/loc values will turn into 'run'/'dy'/'on', and the
# resulting lookup key will be 'run_dy_on'.
#
#-------------------------------------------------------------------
@classmethod
def _construct_vault_key(thisclass, ds=None, kind=None,
intvl=None, loc=None):
if ds:
kstr = ds.dataKind
istr = ds.dataInterval
lstr = ds.dataLocation
return kstr + '_' + istr + '_' + lstr
#
# If all 3 items are specified, e.g.
# _construct_vault_key(kind='nbs', intvl='daily', loc='erie')
# First create temporary metadata objects for each, then
# get the correct lookup name for each.
#
if kind and intvl and loc:
kobj = DataKind(kind)
iobj = DataInterval(intvl)
lobj = DataLocation(loc)
kstr = kobj.primaryName()
istr = iobj.primaryName()
lstr = lobj.primaryName()
return kstr + '_' + istr + '_' + lstr
raise ValueError('Missing DataSeries information in _construct_vault_key')
#-------------------------------------------------------------------
def printVault(self):
for key in self.vault:
print('key=', key, ':', self.vault[key].getOneLineSummary())
#-------------------------------------------------------------------
# The deposit() function is how a user adds data to the vault.
# ds is a DataSeries object.
#-------------------------------------------------------------------
def deposit(self, ds, lake_area=None):
try:
key = type(self)._construct_vault_key(ds)
except:
raise Exception('databank.deposit: error getting the key')
#
# If user did not specify a lake area, then assign a value (if needed)
#
if not lake_area:
lake_area = self.getLakeArea(ds.dataLocation)
#
# Create a "normalized" DataSeries object such that the data
# units and values conform to the prescribed data units
# for storage in the vault.
#
tempvals = copy(ds.dataVals) # default is to use data as-is
normstr = self.getNormalizedUnits(kind=ds.dataKind)
try:
#
# If needed, convert data units.
#
if ds.dataUnits != normstr:
try:
tempvals = None
if ds.dataUnits != normstr:
oldstr = ds.dataUnits
oldvals = copy(ds.dataVals)
if (oldstr in util.linear_units) and normstr=='m':
tempvals = util.convertValues(values=oldvals,
oldunits=oldstr, newunits=normstr)
elif (oldstr in util.rate_units) and normstr=='cms':
tempvals = util.convertValues(values=oldvals,
oldunits=oldstr, newunits=normstr)
elif (oldstr in util.areal_units):
raise Exception('Error: datavault unable to store '
+ 'areal datasets.')
elif normstr=='m':
tempvals = util.convertValues(values=oldvals,
oldunits=oldstr, newunits=normstr,
area=lake_area, first=ds.startDate,
last=ds.endDate)
elif normstr=='cms':
tempvals = util.convertValues(values=oldvals,
oldunits=oldstr, newunits=normstr,
area=lake_area, first=ds.startDate,
last=ds.endDate)
else:
print('ds.dataUnits=', ds.dataUnits)
print('normstr=', normstr)
raise Exception('Unhandled data units conversion.')
except:
raise Exception('Unable to do required data conversion.')
except:
raise Exception('Unable to create dataset for the datavault.')
#
# Create a temporary dataset that contains the data to be added,
# in the correct units.
#
tds = DataSeries(kind=ds.dataKind, units=normstr, loc=ds.dataLocation,
intvl=ds.dataInterval, first=ds.startDate,
last=ds.endDate, values=tempvals)
#
# Do we already have a data series like this?
# If so, we will merge them.
#
try:
#
# Get the old data set. Note that this line will fail
# with an exception if there is no matching old dataset.
# That will cause us to drop into the except block and skip the
# rest of the try block. Seems a little ugly to me, but I
# guess that is the "python way" to do it? I don't see any
# search functionality that works "cleanly".
#
old = self.vault[key]
#
# Getting to here means we have an old dataset. Proceed with
# checking and merging.
#
# Verify that the data sets have matching metadata.
# This should actually never be an issue, but verifying is good.
#
if old.dataKind != new.dataKind:
raise ValueError('Data kind mismatch')
if old.dataInterval != new.dataInterval:
raise ValueError('Data interval mismatch')
if old.dataLocation != new.dataLocation:
raise ValueError('Data location mismatch')
#
# Merge the two DataSeries objects
#
try:
old.add_data(tds)
except:
raise('Error merging the new data into the old.')
except:
#
# No old dataset, so just add this new one to the vault.
#
self.vault.update({key:tds})
#---------------------------------------------------------------