Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add some more tests #33

Open
wants to merge 11 commits into
base: rdbms-sink
Choose a base branch
from
3 changes: 3 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ script: |
mvn clean verify --settings maven-settings.xml
fi

after_success:
- bash <(curl -s https://codecov.io/bash)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is running arbitrary code off a remote URL. This raises a red flag for me, especially on a repo that contains signing keys. Can we copy the script into our repo so we know what we're running?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The script at the url is pretty large and may change over time. I think this request is a bit excessive because that would also mean that we should examine and put every single maven dependency in our repo because that code could also echo environment variables or send it to a remote server.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True enough but Maven artifacts are signed. This is taking arbitrary code from a URL with no verification, and anyone can stick a proxy in between you and the server. It's always debatable how paranoid one needs to be, but the recent trend in piping the contents of URLs into bash is really a security hole.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that it's also unlikely we'd ever know if someone got the keys, which then compromises the signing system of Maven. The security problem is contagious.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah... but it's an https URL so there's a certificate for a particular domain involved. Maybe it's not so insecure then...


# Secure envs are OSSRH_JIRA_USERNAME, OSSRH_JIRA_PASSWORD, GPG_KEY_NAME, GPG_PASSPHRASE
env:
global:
Expand Down
36 changes: 35 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,26 @@
</execution>
</executions>
</plugin>
<!-- This plugin generates code coverage reports during the test phase of maven. -->
<plugin>
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<version>0.7.9</version>
<executions>
<execution>
<goals>
<goal>prepare-agent</goal>
</goals>
</execution>
<execution>
<id>report</id>
<phase>test</phase>
<goals>
<goal>report</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>

Expand All @@ -143,10 +163,18 @@
</repositories>

<dependencies>
<!-- Rest Assured is an assertion library that makes testing web apis easy. -->
<dependency>
<groupId>io.rest-assured</groupId>
<artifactId>rest-assured</artifactId>
<version>3.0.3</version>
<scope>test</scope>
</dependency>
<!-- JUnit is a java testing framework. -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<version>4.12</version>
<scope>test</scope>
</dependency>
<!-- Trove provides optimized map/set collections for native types (int, long...) -->
Expand All @@ -155,6 +183,12 @@
<artifactId>trove4j</artifactId>
<version>3.0.3</version>
</dependency>
<!-- Hamcrest is an assertion library that prints pretty messages when assertions fail -->
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>java-hamcrest</artifactId>
<version>2.0.0.0</version>
</dependency>
<dependency>
<groupId>com.beust</groupId>
<artifactId>jcommander</artifactId>
Expand Down
22 changes: 6 additions & 16 deletions src/main/java/com/conveyal/osmlib/PostgresOSMSource.java
Original file line number Diff line number Diff line change
@@ -1,26 +1,16 @@
package com.conveyal.osmlib;

import ch.qos.logback.core.db.ConnectionSource;
import ch.qos.logback.core.recovery.ResilientFileOutputStream;
import com.google.common.primitives.Longs;
import gnu.trove.iterator.TLongIterator;
import gnu.trove.list.TLongList;
import gnu.trove.list.array.TLongArrayList;
import gnu.trove.set.TLongSet;
import gnu.trove.set.hash.TLongHashSet;
import org.mapdb.Fun;
import org.mapdb.Fun.Tuple3;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.sql.DataSource;
import java.awt.font.FontRenderContext;
import java.io.IOException;
import java.sql.*;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.util.Arrays;
import java.util.NavigableSet;
import java.util.Set;
import java.util.stream.Collectors;

/**
* An OSM source that pulls OSM entities out of a Postgres database, within a geographic window.
Expand Down Expand Up @@ -94,7 +84,7 @@ private void processNodes () throws Exception {
// You can see the difference between the two, that the nodes extend outside the bounding box in version A.
// Adding the distinct keyword here lets the DB server do the filtering, but may be problematic on larger extracts.
final String sqlA = "select node_id, lat, lon, tags from" +
"(select unnest(nodes) as node_id from ways " +
"(select unnest(node_ids) as node_id from ways " +
"where rep_lat > ? and rep_lat < ? and rep_lon > ? and rep_lon < ? and tags like '%highway=%')" +
"included_nodes join nodes using (node_id)";
PreparedStatement preparedStatement = connection.prepareStatement(sqlA);
Expand Down Expand Up @@ -124,7 +114,7 @@ private void processNodes () throws Exception {
}

private void processWays () throws Exception {
final String sql = "select way_id, tags, nodes " +
final String sql = "select way_id, tags, node_ids " +
"from ways " +
"where rep_lat > ? and rep_lat < ? and rep_lon > ? and rep_lon < ? " +
"and tags like '%highway=%'";
Expand Down
18 changes: 10 additions & 8 deletions src/main/java/com/conveyal/osmlib/PostgresSink.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,12 @@
import org.slf4j.LoggerFactory;

import java.io.*;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.*;
import java.sql.*;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collector;
import java.util.stream.Collectors;

/**
Expand Down Expand Up @@ -57,7 +55,7 @@ public void writeBegin() throws IOException {
// We use column names node_id, way_id, relation_id instead of just id to facilitate joins.
statement.execute("create table updates (time timestamp, replication_epoch bigint, operation varchar)");
statement.execute("create table nodes (node_id bigint, lat float(9), lon float(9), tags varchar)");
statement.execute("create table ways (way_id bigint, tags varchar, nodes bigint array)");
statement.execute("create table ways (way_id bigint, tags varchar, node_ids bigint array)");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a good idea - I almost made this change myself a while back. However it does mean we're introducing a schema change in a PR that's ostensibly about adding tests. Ideally this would be done in a separate commit or even a separate PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the main reason I changed this was because I was trying to debug that other update statement and this was confusing me

statement.execute("create table relations (relation_id bigint, tags varchar)");
statement.execute("create table relation_members (relation_id bigint, type varchar, member_id bigint, role varchar)");
connection.commit();
Expand Down Expand Up @@ -246,7 +244,11 @@ public void writeEnd() throws IOException {
// statement.execute("create index on nodes(lat, lon)");
LOG.info("Assigning representative coordinates to ways...");
statement.execute("alter table ways add column rep_lat float(9), add column rep_lon float(9)");
statement.execute("update ways set (rep_lat, rep_lon) = (select lat, lon from nodes where nodes.node_id = nodes[array_length(nodes, 1)/2])");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the change that took you a long time to figure out? I still don't know why it was a problem. Can you add a code comment explaining why the subquery is necessary?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, this is the problem statement. I also don't know why it was a problem on travis either, but I'll add a comment.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was having a hard time understanding what you meant by "problem statement". I just finally realized you probably mean "the statement that was causing the problem", i.e. "the problematic statement" not "the statement of a problem".

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How did you fix the statement if you don't know why it was failing?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just refactored it to use a subquery and then it worked. ¯_(ツ)_/¯

// use a subquery because a previous statement without a subquery was failing on travis for unknown reasons
statement.execute("update ways " +
"set rep_lat=subq.lat, rep_lon=subq.lon " +
"from (select lat, lon, node_id FROM nodes) as subq " +
"where subq.node_id = ways.node_ids[array_length(ways.node_ids, 1)/2]");
LOG.info("Indexing representative coordinates of ways...");
statement.execute("create index on ways(rep_lat, rep_lon)");
connection.commit();
Expand Down
29 changes: 15 additions & 14 deletions src/main/java/com/conveyal/osmlib/VanillaExtract.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,11 @@
import org.apache.commons.dbcp2.PoolingDataSource;
import org.apache.commons.pool2.impl.GenericObjectPool;
import org.glassfish.grizzly.http.Method;
import org.glassfish.grizzly.http.server.HttpHandler;
import org.glassfish.grizzly.http.server.HttpServer;
import org.glassfish.grizzly.http.server.NetworkListener;
import org.glassfish.grizzly.http.server.Request;
import org.glassfish.grizzly.http.server.Response;
import org.glassfish.grizzly.http.server.*;
import org.glassfish.grizzly.http.util.HttpStatus;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.sql.ConnectionPoolDataSource;
import javax.sql.DataSource;
import java.io.IOException;
import java.io.OutputStream;
Expand Down Expand Up @@ -60,7 +55,19 @@ public static void main(String[] args) {
//
// Thread updateThread = Updater.spawnUpdateThread(osm);

DataSource dataSource = createDataSource(args[0], null, null);
HttpServer httpServer = startServer(args[0]);
if (httpServer.isStarted()) {
try {
Thread.currentThread().join();
} catch (InterruptedException ie) {
LOG.info("Interrupted, shutting down.");
}
}
httpServer.shutdown();
}

public static HttpServer startServer(String jdbcUrl) {
DataSource dataSource = createDataSource(jdbcUrl, null, null);

LOG.info("Starting VEX HTTP server on port {} of interface {}", PORT, BIND_ADDRESS);
HttpServer httpServer = new HttpServer();
Expand All @@ -70,17 +77,12 @@ public static void main(String[] args) {
httpServer.getServerConfiguration().addHttpHandler(new VexHttpHandler(dataSource), "/*");
try {
httpServer.start();
LOG.info("VEX server running.");
Thread.currentThread().join();
// updateThread.interrupt();
} catch (BindException be) {
LOG.error("Cannot bind to port {}. Is it already in use?", PORT);
} catch (IOException ioe) {
LOG.error("IO exception while starting server.");
} catch (InterruptedException ie) {
LOG.info("Interrupted, shutting down.");
}
httpServer.shutdown();
return httpServer;
}

// Planet files are named planet-150504.osm.pbf (YYMMDD format)
Expand Down Expand Up @@ -113,7 +115,6 @@ public void service(Request request, Response response) throws Exception {
response.setContentType("application/osm");
String uri = request.getDecodedRequestURI();
int suffixIndex = uri.lastIndexOf('.');
String fileType = uri.substring(suffixIndex);
OutputStream outStream = response.getOutputStream();
try {
String[] coords = uri.substring(1, suffixIndex).split("[,;]");
Expand Down
75 changes: 75 additions & 0 deletions src/test/java/com/conveyal/osmlib/PostgresTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package com.conveyal.osmlib;

import org.junit.Assert;
import org.junit.Test;

import java.sql.*;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.is;

/**
* Tests that verify osm data can be saved and queried from postgres
*/
public class PostgresTest {

/**
* Test that this library can load data into postgres and that the count of records matches expectations
*/
@Test
public void canLoadFromFileIntoDatabase() throws Exception {
String newDBName = TestUtils.generateNewDB();
if (newDBName == null) {
throw new Exception("failed to generate test db");
}
try {
String jdbcUrl = "jdbc:postgresql://localhost/" + newDBName;
String[] args = {
"./src/test/resources/bangor_maine.osm.pbf",
jdbcUrl
};

// perform file to postgres load
PostgresSink.main(args);

// verify that data was loaded into postgres
try {
Connection connection = DriverManager.getConnection(jdbcUrl);
TableTestCase[] tables = {
new TableTestCase("nodes", 35747),
new TableTestCase("relation_members", 435),
new TableTestCase("relations", 34),
new TableTestCase("updates", 0),
new TableTestCase("ways", 2976)
};

for (TableTestCase table : tables) {
PreparedStatement preparedStatement = connection.prepareStatement("Select count(*) from " + table.tableName);
preparedStatement.execute();
ResultSet resultSet = preparedStatement.getResultSet();
resultSet.next();
int numNodes = resultSet.getInt(1);
assertThat(numNodes, is(table.expectedNumNodes));
}
} catch (SQLException e) {
e.printStackTrace();
Assert.fail();
}
} finally {
TestUtils.dropDB(newDBName);
}
}
}

/**
* Helper class to iterate through when testing whether the proper amount of records got loaded into a particular table.
*/
class TableTestCase {
String tableName;
int expectedNumNodes;

public TableTestCase(String tableName, int expectedNumNodes) {
this.tableName = tableName;
this.expectedNumNodes = expectedNumNodes;
}
}
Loading