Skip to content

Commit 514f5db

Browse files
author
Jordan Sanderson
committed
Added assembly/fixed target files being in Git/further work on main runner class
1 parent ed4ac47 commit 514f5db

File tree

8 files changed

+149
-28
lines changed

8 files changed

+149
-28
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
.metadata
2-
target
2+
/target
3+
target/
34
.MF
45
.svn
56
.settings

pom.xml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@
139139
<archive>
140140
<manifest>
141141
<addClasspath>true</addClasspath>
142-
<mainClass>com.oilfield.logix.MainClass</mainClass>
142+
<mainClass>com.oilfield.logix.crawler.MainClass</mainClass>
143143
</manifest>
144144
</archive>
145145
</configuration>
@@ -148,8 +148,8 @@
148148
<groupId>org.apache.maven.plugins</groupId>
149149
<artifactId>maven-assembly-plugin</artifactId>
150150
<configuration>
151-
<descriptor>src/main/etc/test-assembly.xml</descriptor>
152-
<finalName>mop-test-assembly-${project.version}</finalName>
151+
<descriptor>src/main/etc/assembly.xml</descriptor>
152+
<finalName>crawler-assembly-${project.version}</finalName>
153153
</configuration>
154154
<executions>
155155
<execution>

src/main/etc/assembly.xml

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
<assembly
2+
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
5+
<id>assembly</id>
6+
<formats>
7+
<format>tar.gz</format>
8+
</formats>
9+
<includeBaseDirectory>false</includeBaseDirectory>
10+
<dependencySets>
11+
<dependencySet>
12+
<scope>runtime</scope>
13+
<useTransitiveDependencies>true</useTransitiveDependencies>
14+
<useProjectArtifact>true</useProjectArtifact>
15+
<outputDirectory>/jars</outputDirectory>
16+
<fileMode>544</fileMode>
17+
</dependencySet>
18+
</dependencySets>
19+
<fileSets>
20+
<fileSet>
21+
<outputDirectory>/properties</outputDirectory>
22+
<directory>${project.build.directory}/classes</directory>
23+
<includes>
24+
<include>**/*.properties</include>
25+
<include>**/*.json</include>
26+
</includes>
27+
</fileSet>
28+
<fileSet>
29+
<outputDirectory>/</outputDirectory>
30+
<directory>${project.build.directory}/classes</directory>
31+
<fileMode>777</fileMode>
32+
<includes>
33+
<include>**/*.sh</include>
34+
</includes>
35+
</fileSet>
36+
</fileSets>
37+
</assembly>

src/main/java/com/oilfield/logix/crawler/MainClass.java

Lines changed: 77 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
1-
package java.com.oilfield.logix.crawler;
1+
package com.oilfield.logix.crawler;
22

3+
import java.io.File;
34
import java.io.FileReader;
5+
import java.io.FileWriter;
46
import java.io.IOException;
57
import java.net.URI;
68
import java.time.LocalDate;
79
import java.util.ArrayList;
810
import java.util.List;
11+
import java.util.Optional;
912

1013
import javax.ws.rs.core.UriBuilder;
1114
import javax.xml.parsers.ParserConfigurationException;
@@ -21,12 +24,11 @@
2124
import org.jsoup.select.Elements;
2225
import org.xml.sax.SAXException;
2326

24-
import au.com.bytecode.opencsv.CSVReader;
25-
26-
import com.oilfield.logix.crawler.Config;
27-
import com.oilfield.logix.crawler.Well;
2827
import com.oilfield.logix.crawler.Well.Form;
2928

29+
import au.com.bytecode.opencsv.CSVReader;
30+
import au.com.bytecode.opencsv.CSVWriter;
31+
3032
/**
3133
* Main class that runs the uber jar
3234
*
@@ -39,36 +41,39 @@ public class MainClass {
3941
.build();
4042
private static Config config;
4143
private static List<Well> wells;
42-
private static String oldFilePath;
44+
private static String oldWellsFilePath;
45+
private static String oldFormsFilePath;
4346
private static String siteUri;
4447

45-
public static void main(String[] args) throws IOException, InterruptedException {
48+
public static void main(String[] args) throws IOException, InterruptedException, ParserConfigurationException, SAXException {
4649

4750
config = new Config(args[0],args[1],args[2]);
48-
oldFilePath = args[3];
49-
siteUri = args[4];
50-
51-
51+
oldWellsFilePath = args[3];
52+
oldFormsFilePath = args[4];
53+
siteUri = args[5];
5254

55+
populateOldWells();
56+
populateNewWells();
57+
writeCSV();
5358
}
5459

5560

5661
public static void populateOldWells() throws IOException {
57-
CSVReader csvReader = new CSVReader(new FileReader(oldFilePath));
62+
CSVReader csvReader = new CSVReader(new FileReader(oldWellsFilePath));
5863
List<String[]> csvLines = csvReader.readAll();
5964
for(String[] line : csvLines) {
6065
wells.add(new Well(Integer.valueOf(line[0]), line[1], line[2], line[3], line[4],
6166
line[5], line[6], line[7], LocalDate.parse(line[8]), LocalDate.parse(line[9]), LocalDate
6267
.parse(line[10]), line[11], line[12], line[13], line[14], line[15], line[16]));
6368
}
6469

65-
csvReader = new CSVReader(new FileReader(oldFilePath));
70+
csvReader = new CSVReader(new FileReader(oldFormsFilePath));
6671
csvLines = csvReader.readAll();
6772

6873
for(String[] line : csvLines) {
6974
for(Well well : wells) {
7075
if(Integer.valueOf(line[0]).equals(well.getId())) {
71-
well.getForms().add(new Form(line[1], LocalDate.parse(line[2]), LocalDate.parse(line[3])));
76+
well.getForms().add(new Form(line[1], LocalDate.parse(line[2]), line[3].equals("null") ? Optional.empty() : Optional.of(LocalDate.parse(line[3]))));
7277
}
7378
}
7479
}
@@ -88,32 +93,77 @@ public static void populateNewWells() throws ParserConfigurationException, SAXEx
8893
HttpResponse response = httpClient.execute(httpGet);
8994
String responseString = IOUtils.toString(response.getEntity().getContent(), "UTF-8");
9095
Document document = Jsoup.parse(responseString);
91-
Elements elements = document.body().getElementsByAttributeValue("class", "GroupBox1");
96+
97+
Elements elements = document.body().getElementsByAttributeValue("class", "DataGrid");
9298
Element el = elements.first();
93-
elements = el.getElementsByTag("strong");
94-
if (wells.contains(id)) {
99+
elements = el.getElementsByTag("tr");
100+
List<Form> forms = new ArrayList<>();
101+
for(Element e : elements) {
102+
Elements elements1 = e.getElementsByTag("td");
103+
new Form(elements1.get(0).text(), LocalDate.now(), elements1.get(1).text()
104+
.equals("Certified") ? Optional.of(LocalDate.now()) : Optional.empty());
105+
}
95106

107+
elements = document.body().getElementsByAttributeValue("class", "GroupBox1");
108+
el = elements.first();
109+
elements = el.getElementsByTag("strong");
110+
Well oldWell = null;
111+
for(Well well : wells) {
112+
if(well.getId() == id) {
113+
oldWell = well;
114+
}
115+
}
116+
if (oldWell != null) {
117+
for (Form newForm : forms) {
118+
boolean found = false;
119+
for (Form form : oldWell.getForms()) {
120+
if (newForm.equals(form)) {
121+
found = true;
122+
if (!form.isCertified() && newForm.isCertified()) {
123+
form.setCertification(Optional.of(LocalDate.now()));
124+
}
125+
}
126+
if(!found) {
127+
oldWell.getForms().add(form);
128+
}
129+
}
130+
}
96131
} else {
97-
wells.add(new Well(Integer.valueOf(elements.get(0).text()), elements.get(1).text(), elements.get(
132+
Well newWell = new Well(Integer.valueOf(elements.get(0).text()), elements.get(1).text(), elements.get(
98133
2).text(), elements.get(3).text(), elements.get(4).text(), elements.get(5)
99134
.text(), elements.get(6).text(), elements.get(7).text(),
100135
LocalDate.parse(elements.get(8).text()),
101136
LocalDate.parse(elements.get(9).text()), LocalDate.parse(elements.get(10)
102137
.text()), elements.get(11).text(), elements.get(12).text(), elements
103138
.get(13).text(), elements.get(14).text(), elements.get(15).text(),
104-
elements.get(16).text()));
139+
elements.get(16).text());
140+
newWell.setForms(forms);
141+
wells.add(newWell);
105142
}
106143

107144
}
108145
}
109146

147+
public static void writeCSV() throws IOException {
148+
CSVWriter csvWellWriter = new CSVWriter(new FileWriter(new File("newWells.csv")));
149+
CSVWriter csvFormsWriter = new CSVWriter(new FileWriter(new File("newForms.csv")));
150+
csvWellWriter.writeNext("id,operaterName,fieldName,leaseName,rrcGasId,rrcDistrictNo,wellNumber,apiNo,submissionDate,approvalDate,completionDate,completionType,wellType,county,drillingPermitNumber,wellBoreProfile,fieldNumber" .split(","));
151+
csvFormsWriter.writeNext("type,creation,certification".split(","));
152+
for(Well well : wells) {
153+
csvWellWriter.writeNext(well.asCsvEntry());
154+
for(Form form : well.getForms()) {
155+
csvFormsWriter.writeNext(form.asCsvEntry());
156+
}
157+
}
158+
}
159+
110160
public static List<Integer> getIdList()
111161
throws IOException, ParserConfigurationException, SAXException {
112162

113163
URI listingUri = UriBuilder.fromUri(siteUri)
114164
.path("publicSearchAction.do")
115165
.queryParam("searchArgs.paramValue", "|0=" + config.getBeginDate() + "|1=" + config.getEndDate() + "|2=" + config.getDistrict())
116-
.queryParam("pager.pageSize", "100000")
166+
.queryParam("pager.pageSize", Integer.MAX_VALUE)
117167
.queryParam("formData.methodHndlr.inputValue","search")
118168
.build();
119169
HttpGet httpGet = new HttpGet();
@@ -128,7 +178,13 @@ public static List<Integer> getIdList()
128178
Elements elements = document.body().getElementsByAttributeValue("class", "DataGrid");
129179
Element el = elements.first();
130180
elements = el.getElementsByTag("a");
131-
elements.forEach(element -> ids.add(Integer.valueOf(element.text())));
181+
elements.forEach(element -> {
182+
try {
183+
ids.add(Integer.valueOf(element.text().trim()));
184+
} catch (NumberFormatException e) {
185+
186+
}
187+
});
132188

133189
return ids;
134190

src/main/java/com/oilfield/logix/crawler/Well.java

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,25 @@ public String getFieldNumber() {
119119
}
120120

121121

122+
public String[] asCsvEntry() {
123+
String[] entry = {String.valueOf(id),operaterName, fieldName, leaseName, rrcGasId, rrcDistrictNo, wellNumber, apiNo,
124+
submissionDate.toString(), approvalDate.toString(), completionDate.toString(),
125+
completionType, wellType, county, drillingPermitNumber, wellBoreProfile, fieldNumber
126+
};
127+
return entry;
128+
}
129+
122130
public static class Form {
123131
private String type;
132+
133+
public void setCreation(LocalDate creation) {
134+
this.creation = creation;
135+
}
136+
137+
public void setCertification(Optional<LocalDate> certification) {
138+
this.certification = certification;
139+
}
140+
124141
private LocalDate creation;
125142
private Optional<LocalDate> certification;
126143

@@ -130,10 +147,10 @@ public Form(String type, LocalDate creation) {
130147
this.certification = Optional.empty();
131148
}
132149

133-
public Form(String type, LocalDate creation, LocalDate certification) {
150+
public Form(String type, LocalDate creation, Optional<LocalDate> certification) {
134151
this.type = type;
135152
this.creation = creation;
136-
this.certification = Optional.of(certification);
153+
this.certification = certification;
137154
}
138155

139156
public boolean isCertified() {
@@ -147,5 +164,14 @@ public String getType() {
147164
public LocalDate getCreation() {
148165
return creation;
149166
}
167+
168+
public Optional<LocalDate> getCertification() {
169+
return certification;
170+
}
171+
172+
public String[] asCsvEntry() {
173+
String[] entry = {type,creation.toString(),certification.isPresent() ? "null" : certification.get().toString()};
174+
return entry;
175+
}
150176
}
151177
}

src/main/resources/run.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#!/usr/bin/env bash
2+
java -Xms64m -Xmx256m -jar ./jars/${project.artifactId}-${project.version}.jar 1/1/2015 1/31/2015 02 ./wells.csv ./forms.csv http://webapps.rrc.state.tx.us/CMPL/

target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst

Whitespace-only changes.

target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)