1
- package java . com .oilfield .logix .crawler ;
1
+ package com .oilfield .logix .crawler ;
2
2
3
+ import java .io .File ;
3
4
import java .io .FileReader ;
5
+ import java .io .FileWriter ;
4
6
import java .io .IOException ;
5
7
import java .net .URI ;
6
8
import java .time .LocalDate ;
7
9
import java .util .ArrayList ;
8
10
import java .util .List ;
11
+ import java .util .Optional ;
9
12
10
13
import javax .ws .rs .core .UriBuilder ;
11
14
import javax .xml .parsers .ParserConfigurationException ;
21
24
import org .jsoup .select .Elements ;
22
25
import org .xml .sax .SAXException ;
23
26
24
- import au .com .bytecode .opencsv .CSVReader ;
25
-
26
- import com .oilfield .logix .crawler .Config ;
27
- import com .oilfield .logix .crawler .Well ;
28
27
import com .oilfield .logix .crawler .Well .Form ;
29
28
29
+ import au .com .bytecode .opencsv .CSVReader ;
30
+ import au .com .bytecode .opencsv .CSVWriter ;
31
+
30
32
/**
31
33
* Main class that runs the uber jar
32
34
*
@@ -39,36 +41,39 @@ public class MainClass {
39
41
.build ();
40
42
private static Config config ;
41
43
private static List <Well > wells ;
42
- private static String oldFilePath ;
44
+ private static String oldWellsFilePath ;
45
+ private static String oldFormsFilePath ;
43
46
private static String siteUri ;
44
47
45
- public static void main (String [] args ) throws IOException , InterruptedException {
48
+ public static void main (String [] args ) throws IOException , InterruptedException , ParserConfigurationException , SAXException {
46
49
47
50
config = new Config (args [0 ],args [1 ],args [2 ]);
48
- oldFilePath = args [3 ];
49
- siteUri = args [4 ];
50
-
51
-
51
+ oldWellsFilePath = args [3 ];
52
+ oldFormsFilePath = args [4 ];
53
+ siteUri = args [5 ];
52
54
55
+ populateOldWells ();
56
+ populateNewWells ();
57
+ writeCSV ();
53
58
}
54
59
55
60
56
61
public static void populateOldWells () throws IOException {
57
- CSVReader csvReader = new CSVReader (new FileReader (oldFilePath ));
62
+ CSVReader csvReader = new CSVReader (new FileReader (oldWellsFilePath ));
58
63
List <String []> csvLines = csvReader .readAll ();
59
64
for (String [] line : csvLines ) {
60
65
wells .add (new Well (Integer .valueOf (line [0 ]), line [1 ], line [2 ], line [3 ], line [4 ],
61
66
line [5 ], line [6 ], line [7 ], LocalDate .parse (line [8 ]), LocalDate .parse (line [9 ]), LocalDate
62
67
.parse (line [10 ]), line [11 ], line [12 ], line [13 ], line [14 ], line [15 ], line [16 ]));
63
68
}
64
69
65
- csvReader = new CSVReader (new FileReader (oldFilePath ));
70
+ csvReader = new CSVReader (new FileReader (oldFormsFilePath ));
66
71
csvLines = csvReader .readAll ();
67
72
68
73
for (String [] line : csvLines ) {
69
74
for (Well well : wells ) {
70
75
if (Integer .valueOf (line [0 ]).equals (well .getId ())) {
71
- well .getForms ().add (new Form (line [1 ], LocalDate .parse (line [2 ]), LocalDate .parse (line [3 ])));
76
+ well .getForms ().add (new Form (line [1 ], LocalDate .parse (line [2 ]), line [ 3 ]. equals ( "null" ) ? Optional . empty () : Optional . of ( LocalDate .parse (line [3 ]) )));
72
77
}
73
78
}
74
79
}
@@ -88,32 +93,77 @@ public static void populateNewWells() throws ParserConfigurationException, SAXEx
88
93
HttpResponse response = httpClient .execute (httpGet );
89
94
String responseString = IOUtils .toString (response .getEntity ().getContent (), "UTF-8" );
90
95
Document document = Jsoup .parse (responseString );
91
- Elements elements = document .body ().getElementsByAttributeValue ("class" , "GroupBox1" );
96
+
97
+ Elements elements = document .body ().getElementsByAttributeValue ("class" , "DataGrid" );
92
98
Element el = elements .first ();
93
- elements = el .getElementsByTag ("strong" );
94
- if (wells .contains (id )) {
99
+ elements = el .getElementsByTag ("tr" );
100
+ List <Form > forms = new ArrayList <>();
101
+ for (Element e : elements ) {
102
+ Elements elements1 = e .getElementsByTag ("td" );
103
+ new Form (elements1 .get (0 ).text (), LocalDate .now (), elements1 .get (1 ).text ()
104
+ .equals ("Certified" ) ? Optional .of (LocalDate .now ()) : Optional .empty ());
105
+ }
95
106
107
+ elements = document .body ().getElementsByAttributeValue ("class" , "GroupBox1" );
108
+ el = elements .first ();
109
+ elements = el .getElementsByTag ("strong" );
110
+ Well oldWell = null ;
111
+ for (Well well : wells ) {
112
+ if (well .getId () == id ) {
113
+ oldWell = well ;
114
+ }
115
+ }
116
+ if (oldWell != null ) {
117
+ for (Form newForm : forms ) {
118
+ boolean found = false ;
119
+ for (Form form : oldWell .getForms ()) {
120
+ if (newForm .equals (form )) {
121
+ found = true ;
122
+ if (!form .isCertified () && newForm .isCertified ()) {
123
+ form .setCertification (Optional .of (LocalDate .now ()));
124
+ }
125
+ }
126
+ if (!found ) {
127
+ oldWell .getForms ().add (form );
128
+ }
129
+ }
130
+ }
96
131
} else {
97
- wells . add ( new Well (Integer .valueOf (elements .get (0 ).text ()), elements .get (1 ).text (), elements .get (
132
+ Well newWell = new Well (Integer .valueOf (elements .get (0 ).text ()), elements .get (1 ).text (), elements .get (
98
133
2 ).text (), elements .get (3 ).text (), elements .get (4 ).text (), elements .get (5 )
99
134
.text (), elements .get (6 ).text (), elements .get (7 ).text (),
100
135
LocalDate .parse (elements .get (8 ).text ()),
101
136
LocalDate .parse (elements .get (9 ).text ()), LocalDate .parse (elements .get (10 )
102
137
.text ()), elements .get (11 ).text (), elements .get (12 ).text (), elements
103
138
.get (13 ).text (), elements .get (14 ).text (), elements .get (15 ).text (),
104
- elements .get (16 ).text ()));
139
+ elements .get (16 ).text ());
140
+ newWell .setForms (forms );
141
+ wells .add (newWell );
105
142
}
106
143
107
144
}
108
145
}
109
146
147
+ public static void writeCSV () throws IOException {
148
+ CSVWriter csvWellWriter = new CSVWriter (new FileWriter (new File ("newWells.csv" )));
149
+ CSVWriter csvFormsWriter = new CSVWriter (new FileWriter (new File ("newForms.csv" )));
150
+ csvWellWriter .writeNext ("id,operaterName,fieldName,leaseName,rrcGasId,rrcDistrictNo,wellNumber,apiNo,submissionDate,approvalDate,completionDate,completionType,wellType,county,drillingPermitNumber,wellBoreProfile,fieldNumber" .split ("," ));
151
+ csvFormsWriter .writeNext ("type,creation,certification" .split ("," ));
152
+ for (Well well : wells ) {
153
+ csvWellWriter .writeNext (well .asCsvEntry ());
154
+ for (Form form : well .getForms ()) {
155
+ csvFormsWriter .writeNext (form .asCsvEntry ());
156
+ }
157
+ }
158
+ }
159
+
110
160
public static List <Integer > getIdList ()
111
161
throws IOException , ParserConfigurationException , SAXException {
112
162
113
163
URI listingUri = UriBuilder .fromUri (siteUri )
114
164
.path ("publicSearchAction.do" )
115
165
.queryParam ("searchArgs.paramValue" , "|0=" + config .getBeginDate () + "|1=" + config .getEndDate () + "|2=" + config .getDistrict ())
116
- .queryParam ("pager.pageSize" , "100000" )
166
+ .queryParam ("pager.pageSize" , Integer . MAX_VALUE )
117
167
.queryParam ("formData.methodHndlr.inputValue" ,"search" )
118
168
.build ();
119
169
HttpGet httpGet = new HttpGet ();
@@ -128,7 +178,13 @@ public static List<Integer> getIdList()
128
178
Elements elements = document .body ().getElementsByAttributeValue ("class" , "DataGrid" );
129
179
Element el = elements .first ();
130
180
elements = el .getElementsByTag ("a" );
131
- elements .forEach (element -> ids .add (Integer .valueOf (element .text ())));
181
+ elements .forEach (element -> {
182
+ try {
183
+ ids .add (Integer .valueOf (element .text ().trim ()));
184
+ } catch (NumberFormatException e ) {
185
+
186
+ }
187
+ });
132
188
133
189
return ids ;
134
190
0 commit comments