Skip to content

Commit 2af6f4c

Browse files
committed
gitignore header replacer
Added gitignore file Added dri_header_replacer.py which updates the header of existing large files. Updated sensor puller.py to deal with errors in DRI headers.
1 parent c402028 commit 2af6f4c

File tree

3 files changed

+95
-11
lines changed

3 files changed

+95
-11
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
odm.config

dri_header_replacer.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# -*- coding: utf-8 -*-
2+
#!/home/collin/pyv/bin/python
3+
################################################################################
4+
# name: header_replacer.py
5+
# author: collin bode, email: [email protected]
6+
# date: 2016-08-07
7+
#
8+
# purpose: Replace the header on a .dat file for all files that meet filter.
9+
#
10+
################################################################################
11+
12+
import os
13+
import sys
14+
15+
# WRCC has codes for each UC weather station
16+
stations = ['ucac','ucbo','ucab','ucja',
17+
'ucbm','ucde','ucbu','ucca','ucel','ucha',
18+
'ucjp','ucmc','ucmo','ucrm','ucsc','ucse',
19+
'ucsh','ucsr','ucgr','ucyl','hipk','whpt',
20+
'sagh','croo','wmtn','barc']
21+
22+
# Loop through all the stations, webscrape, and parse
23+
for station in stations:
24+
print(station)
25+
26+
# Define path and station filename
27+
#path = '/data/sensor/UCNRS/'
28+
path = '/Users/cbode/Documents/GoogleDrive/UCNRS_WeatherStations/DatFiles_DRI/'
29+
fpath = path+station+'_dri.dat'
30+
temppath = fpath+'.temp'
31+
hpath = path+'dri_headers/'+station+'_dri.header'
32+
33+
# move existing file to temporary file
34+
os.rename(fpath,temppath)
35+
36+
# open files
37+
fin = open(temppath,'r')
38+
fout = open(fpath,'w')
39+
fhead = open(hpath,'r')
40+
41+
# Get fieldname line from header file
42+
fields = fhead.readlines()[1].split(',')
43+
fieldnames = ','.join(map(str, fields))
44+
fhead.close()
45+
#print(fieldnames)
46+
print('next is dat file')
47+
48+
# Loop through header and dat file, out put to main dat file
49+
i = 0
50+
for row in fin:
51+
i += 1
52+
#print(i)
53+
if(i == 2):
54+
fields = row.split(",")
55+
if(fields[0] == '"TIMESTAMP"'):
56+
print(station,i," OLD ROW: ",row)
57+
print(" NEW ROW: ",fieldnames)
58+
fout.write(fieldnames)
59+
else:
60+
i = 9999
61+
fin.close()
62+
fout.close()
63+
sys.exit("BAD ROW! Exiting. DAT")
64+
else:
65+
fout.write(row)
66+
67+
print('DONE with ',station,i)
68+
fin.close()
69+
fout.close()

sensor_ucnrs_dri_puller.py

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,14 @@
5353
import os
5454

5555
# Boolean controls for script. Cron job mode is false, false, true.
56-
booFirstRun = True # True = Download all data available from 1990 until now
56+
booFirstRun = False # True = Download all data available from 1990 until now
5757
# DRI controlled sites only can download 30 days,
5858
# unless you have 'secret' password.
5959
# False(default) = just download the last 24 hours
6060
# booWriteHeader will automatically be set to True.
61-
booWriteHeader = True # True = Get Long Header parse into LoggerNet header.
61+
booWriteHeader = True # True = Get Long Header parse into LoggerNet header.
6262
# False(default) = No header, just data.
63-
booDownloadData = True # True(default). False will only download headers.
63+
booDownloadData = False # True(default). False will only download headers.
6464

6565
# WRCC DRI Website
6666
website = 'http://www.wrcc.dri.edu/cgi-bin/wea_list2.pl'
@@ -77,12 +77,12 @@
7777

7878
# Loop through all the stations, webscrape, and parse
7979
for station in stations:
80-
print(station)
80+
#print(station)
8181

8282
# Define path and station filename
8383
path = '/data/sensor/UCNRS/'
8484
#path = '/Users/cbode/Documents/GoogleDrive/UCNRS_WeatherStations/DatFiles_DRI/'
85-
ftdirpath = path+'/dri_time/'
85+
ftdirpath = path+'dri_time/'
8686

8787
# Check for existance of the time files directory, if not create
8888
if(os.path.exists(ftdirpath) == False):
@@ -110,7 +110,7 @@
110110
try:
111111
ft = open(ftpath,'r') # open .time file and get last datetime pulled
112112
dtstring = (ft.read()).strip()
113-
print(station+' last date: '+dtstring)
113+
#print(station+' last date: '+dtstring)
114114
time_start_o = dt.datetime.strptime(dtstring,"%Y-%m-%d %H:%M:%S")
115115
time_start = time_start_o - dt.timedelta(days=1) # add a day for safety
116116
ft.close()
@@ -172,6 +172,7 @@
172172
row3 = '"TS","RN"'
173173
row4 = '"",""'
174174
j = 0
175+
t = 0 # how many duplicates of Min TC 10m are there?
175176
booWriteHeader = False
176177
for row in received_data:
177178
#print(row)
@@ -183,7 +184,9 @@
183184
booWriteHeader = True
184185
# DRI long headers are sentence descriptions
185186
# This segment shortens them to field names with no unusual characters
186-
fieldname = fields[0].strip()
187+
fieldname = fields[0][1:-1]
188+
fieldname = fieldname.replace(':','')
189+
fieldname = fieldname.strip()
187190
fieldname = fieldname.replace('"','')
188191
fieldname = fieldname.replace(' ','_')
189192
fieldname = fieldname.replace('_in.','_Inches')
@@ -200,11 +203,21 @@
200203
fieldname = fieldname.replace('Minimum','Min')
201204
fieldname = fieldname.replace('Temperature','Temp')
202205
fieldname = fieldname.replace('temperature','Temp')
203-
fieldname = fieldname.replace('Average','Ave')
206+
fieldname = fieldname.replace('Ave_','Avg_')
207+
fieldname = fieldname.replace('Average','Avg')
204208
fieldname = fieldname.replace('Miscellaneous','Misc')
205209
fieldname = fieldname.replace('Identification','ID')
206210
fieldname = fieldname.replace('Standard_Deviation','Std Dev')
207211
fieldname = fieldname.replace('Standard_Deveation','Std Dev')
212+
fieldname = fieldname.replace('_mag/arcsec2','_mag')
213+
fieldname = fieldname.replace('/','')
214+
fieldname = fieldname.replace('\\','')
215+
216+
# Fix Thermocouple duplicate
217+
if(fieldname == 'Min_Temp_Thermocouple_10_m'):
218+
t += 1
219+
if(t == 2):
220+
fieldname = fieldname.replace('Min','Avg')
208221
# Field units are inserted for posterity, but not used by loader
209222
fieldunits = fields[1].strip()
210223
fieldunits = (fieldunits[1:len(fieldunits)-1]).strip()
@@ -240,7 +253,7 @@
240253
# Merge date and time into TIMESTAMP
241254
timestamp = 'GEORGE'
242255
if(booDownloadData == True):
243-
print('____Data next____')
256+
#print('____Data next____')
244257
for row in received_data:
245258
if(len(row) > 0):
246259
fields = row.split(",")
@@ -257,7 +270,8 @@
257270
if(ts > time_start_o):
258271
fout.write(newrow)
259272
else:
260-
print(station+' redundant timestamp:',timestamp,' < ',time_start_o)
273+
pass
274+
#print(station+' redundant timestamp:',timestamp,' < ',time_start_o)
261275
#print(newrow)
262276
#else:
263277
# print('BAD HTML! ')
@@ -266,7 +280,7 @@
266280
ft = open(ftpath,'w')
267281
ft.write(timestamp+"\n")
268282
ft.close()
269-
print(station+" downloaded and writen to file")
283+
#print(station+" downloaded and writen to file")
270284
else:
271285
print('WARNING! '+station+' did not have any values to download.')
272286
# Finish up with station

0 commit comments

Comments
 (0)