gitignore header replacer

cbode · cbode · commit 2af6f4c9ba69 · 2016-08-08T18:29:48.000-07:00
Added gitignore file
Added dri_header_replacer.py  which updates the header of existing
large files.
Updated sensor puller.py to deal with errors in DRI headers.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+odm.config
diff --git a/dri_header_replacer.py b/dri_header_replacer.py
@@ -0,0 +1,69 @@
+# -*- coding: utf-8 -*-
+#!/home/collin/pyv/bin/python
+################################################################################
+# name: header_replacer.py
+# author: collin bode, email: collin@berkeley.edu
+# date: 2016-08-07
+# 
+# purpose: Replace the header on a .dat file for all files that meet filter.
+#
+################################################################################
+
+import os
+import sys
+
+# WRCC has codes for each UC weather station
+stations = ['ucac','ucbo','ucab','ucja',
+'ucbm','ucde','ucbu','ucca','ucel','ucha',
+'ucjp','ucmc','ucmo','ucrm','ucsc','ucse',
+'ucsh','ucsr','ucgr','ucyl','hipk','whpt',
+'sagh','croo','wmtn','barc']
+ 
+# Loop through all the stations, webscrape, and parse
+for station in stations:
+    print(station)    
+    
+    # Define path and station filename
+    #path = '/data/sensor/UCNRS/'
+    path = '/Users/cbode/Documents/GoogleDrive/UCNRS_WeatherStations/DatFiles_DRI/'
+    fpath = path+station+'_dri.dat'
+    temppath = fpath+'.temp'
+    hpath = path+'dri_headers/'+station+'_dri.header'
+    
+    # move existing file to temporary file
+    os.rename(fpath,temppath)
+    
+    # open files
+    fin = open(temppath,'r')
+    fout = open(fpath,'w')
+    fhead = open(hpath,'r')
+    
+    # Get fieldname line from header file
+    fields = fhead.readlines()[1].split(',')
+    fieldnames = ','.join(map(str, fields))
+    fhead.close()
+    #print(fieldnames)
+    print('next is dat file')
+    
+    # Loop through header and dat file, out put to main dat file
+    i = 0
+    for row in fin:
+        i += 1
+        #print(i)
+        if(i == 2):
+            fields = row.split(",")
+            if(fields[0] == '"TIMESTAMP"'):
+                print(station,i," OLD ROW: ",row)
+                print("      NEW ROW: ",fieldnames)
+                fout.write(fieldnames)
+            else:
+                i = 9999
+                fin.close()
+                fout.close()
+                sys.exit("BAD ROW! Exiting. DAT")
+        else:
+            fout.write(row)
+                
+    print('DONE with ',station,i)
+    fin.close()
+    fout.close()
diff --git a/sensor_ucnrs_dri_puller.py b/sensor_ucnrs_dri_puller.py
@@ -53,14 +53,14 @@
 import os
 
 # Boolean controls for script. Cron job mode is false, false, true.
-booFirstRun = True      # True = Download all data available from 1990 until now
+booFirstRun = False     # True = Download all data available from 1990 until now
                         #        DRI controlled sites only can download 30 days, 
                         #        unless you have 'secret' password.
                         # False(default) = just download the last 24 hours 
                         # booWriteHeader will automatically be set to True.
-booWriteHeader = True   # True = Get Long Header parse into LoggerNet header.
+booWriteHeader = True  # True = Get Long Header parse into LoggerNet header.
                         # False(default) = No header, just data. 
-booDownloadData = True  # True(default). False will only download headers.
+booDownloadData = False  # True(default). False will only download headers.
 
 # WRCC DRI Website
 website = 'http://www.wrcc.dri.edu/cgi-bin/wea_list2.pl'
@@ -77,12 +77,12 @@
    
 # Loop through all the stations, webscrape, and parse
 for station in stations:
-    print(station)    
+    #print(station)    
     
     # Define path and station filename
     path = '/data/sensor/UCNRS/'
     #path = '/Users/cbode/Documents/GoogleDrive/UCNRS_WeatherStations/DatFiles_DRI/'
-    ftdirpath = path+'/dri_time/'
+    ftdirpath = path+'dri_time/'
     
     # Check for existance of the time files directory, if not create
     if(os.path.exists(ftdirpath) == False):
@@ -110,7 +110,7 @@
         try:
             ft = open(ftpath,'r')   # open .time file and get last datetime pulled
             dtstring = (ft.read()).strip()
-            print(station+' last date: '+dtstring)
+            #print(station+' last date: '+dtstring)
             time_start_o = dt.datetime.strptime(dtstring,"%Y-%m-%d %H:%M:%S") 
             time_start = time_start_o - dt.timedelta(days=1) # add a day for safety
             ft.close()
@@ -172,6 +172,7 @@
         row3 = '"TS","RN"'
         row4 = '"",""'
         j = 0
+        t = 0   # how many duplicates of Min TC 10m are there?
         booWriteHeader = False
         for row in received_data:
             #print(row)
@@ -183,7 +184,9 @@
                     booWriteHeader = True
                     # DRI long headers are sentence descriptions
                     # This segment shortens them to field names with no unusual characters
-                    fieldname = fields[0].strip()
+                    fieldname = fields[0][1:-1]                    
+                    fieldname = fieldname.replace(':','')                    
+                    fieldname = fieldname.strip()
                     fieldname = fieldname.replace('"','')
                     fieldname = fieldname.replace(' ','_')
                     fieldname = fieldname.replace('_in.','_Inches')
@@ -200,11 +203,21 @@
                     fieldname = fieldname.replace('Minimum','Min')
                     fieldname = fieldname.replace('Temperature','Temp')
                     fieldname = fieldname.replace('temperature','Temp')
-                    fieldname = fieldname.replace('Average','Ave')
+                    fieldname = fieldname.replace('Ave_','Avg_')
+                    fieldname = fieldname.replace('Average','Avg')
                     fieldname = fieldname.replace('Miscellaneous','Misc')
                     fieldname = fieldname.replace('Identification','ID')
                     fieldname = fieldname.replace('Standard_Deviation','Std Dev')
                     fieldname = fieldname.replace('Standard_Deveation','Std Dev')
+                    fieldname = fieldname.replace('_mag/arcsec2','_mag')
+                    fieldname = fieldname.replace('/','')
+                    fieldname = fieldname.replace('\\','')
+                    
+                    # Fix Thermocouple duplicate 
+                    if(fieldname == 'Min_Temp_Thermocouple_10_m'):
+                        t += 1
+                    if(t == 2):
+                        fieldname = fieldname.replace('Min','Avg')
                     # Field units are inserted for posterity, but not used by loader
                     fieldunits = fields[1].strip()
                     fieldunits = (fieldunits[1:len(fieldunits)-1]).strip()
@@ -240,7 +253,7 @@
     # Merge date and time into TIMESTAMP 
     timestamp = 'GEORGE'
     if(booDownloadData == True):
-        print('____Data next____')
+        #print('____Data next____')
         for row in received_data:
             if(len(row) > 0):
                 fields = row.split(",")
@@ -257,7 +270,8 @@
                     if(ts > time_start_o):
                         fout.write(newrow)
                     else:
-                        print(station+' redundant timestamp:',timestamp,' < ',time_start_o)
+                        pass
+                        #print(station+' redundant timestamp:',timestamp,' < ',time_start_o)
                     #print(newrow)
                 #else:
                 #    print('BAD HTML! ')
@@ -266,7 +280,7 @@
             ft = open(ftpath,'w')
             ft.write(timestamp+"\n")
             ft.close()
-            print(station+" downloaded and writen to file")
+            #print(station+" downloaded and writen to file")
         else:
             print('WARNING! '+station+' did not have any values to download.')
     # Finish up with station