Skip to content

Commit

Permalink
Implement incremental caching of Web service responses
Browse files Browse the repository at this point in the history
Cached web service responses are updated instead of replaced and the
difference between them is saved in the unified diff format for manual
or even automated patching, in case of data fault.

Further documentation can be found in issue [#21](#21).
  • Loading branch information
re1 committed Jul 22, 2020
1 parent da102ac commit b57825a
Show file tree
Hide file tree
Showing 7 changed files with 148 additions and 64 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ public class TblWebserviceCache implements Serializable {
@Column(name = "query", nullable = false)
private String query;
@Lob
@Size(max = 2147483647)
@Column(name = "response")
private String response;
@Basic(optional = false)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package org.jacq.common.model.jpa.openup;

import javax.persistence.*;
import javax.validation.constraints.NotNull;
import java.util.Objects;

/**
* @author re1
*/
@Entity
@Table(name = "tbl_webservice_cache_diffs", schema = "openup")
public class TblWebserviceCacheDiffs {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@Basic(optional = false)
@Column(name = "id", nullable = false)
private int id;
@Basic(optional = false)
@NotNull
@Column(name = "tbl_webservice_cache_id", nullable = false)
private int tblWebserviceCacheId;
@Basic
@Lob
@NotNull
@Column(name = "diff", nullable = false, length = -1)
private String diff;
@Basic(optional = false)
@NotNull
@Column(name = "timestamp", nullable = false)
private long timestamp;

public int getId() {
return id;
}

public void setId(int id) {
this.id = id;
}

public int getTblWebserviceCacheId() {
return tblWebserviceCacheId;
}

public void setTblWebserviceCacheId(int tblWebserviceCacheId) {
this.tblWebserviceCacheId = tblWebserviceCacheId;
}

public String getDiff() {
return diff;
}

public void setDiff(String diff) {
this.diff = diff;
}

public long getTimestamp() {
return timestamp;
}

public void setTimestamp(long timestamp) {
this.timestamp = timestamp;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
TblWebserviceCacheDiffs that = (TblWebserviceCacheDiffs) o;
return id == that.id &&
tblWebserviceCacheId == that.tblWebserviceCacheId &&
timestamp == that.timestamp &&
Objects.equals(diff, that.diff);
}

@Override
public int hashCode() {
return Objects.hash(id, tblWebserviceCacheId, diff, timestamp);
}
}
5 changes: 5 additions & 0 deletions jacq-names/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@
<artifactId>pherialize</artifactId>
<version>1.2.1</version>
</dependency>
<dependency>
<groupId>io.github.java-diff-utils</groupId>
<artifactId>java-diff-utils</artifactId>
<version>4.5</version>
</dependency>
</dependencies>

<build>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
package org.jacq.service.names.sources;

import com.github.difflib.DiffUtils;
import com.github.difflib.UnifiedDiffUtils;
import com.github.difflib.algorithm.DiffException;
import com.github.difflib.patch.Patch;
import de.ailis.pherialize.Pherialize;
import org.jacq.common.model.jpa.openup.TblWebserviceCache;
import org.jacq.common.model.jpa.openup.TblWebserviceCacheDiffs;
import org.jacq.common.model.names.NameParserResponse;

import javax.persistence.EntityManager;
Expand All @@ -10,7 +15,10 @@
import javax.transaction.Transactional;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.logging.Logger;

/**
* Abstract base class for all Common Names Web service sources using the Web service cache
Expand All @@ -20,6 +28,8 @@
@Transactional
public abstract class CachedWebServiceSource implements CommonNamesSource {

private static final Logger LOGGER = Logger.getLogger(CachedWebServiceSource.class.getName());

@PersistenceContext(unitName = "openup")
protected EntityManager em;

Expand Down Expand Up @@ -60,68 +70,25 @@ private String phpSha1(String s) {
* @param query Query to look for
* @return null if no valid response was found, else the response
*/
private String getCachedResponse(String query) {
private TblWebserviceCache getWebServiceCache(String query) {
// PHP serialize queries and create a SHA1 hash for a quicker comparison with existing queries
query = phpSha1(Pherialize.serialize(query));

// build SQL lookup query for this service and query
String lookupQuery = "SELECT row FROM TblWebserviceCache row WHERE row.serviceId = :serviceId AND row.query = :query AND row.timestamp >= :timeout ORDER BY row.timestamp DESC";
String lookupQuery = "SELECT row FROM TblWebserviceCache row WHERE row.serviceId = :serviceId AND row.query = :query ORDER BY row.timestamp DESC";
TypedQuery<TblWebserviceCache> sourceQuery =
em.createQuery(lookupQuery, TblWebserviceCache.class)
// find the most recent cached response for this service and query
.setParameter("serviceId", serviceId)
// find cached entry for this query
.setParameter("query", query)
// make sure it's recent enough
.setParameter("timeout", System.currentTimeMillis() / 1000L - timeout)
// only the most recent entry
.setMaxResults(1);
// get SQL lookup query results
// TODO: Check if getSingleResult is the better option here
List<TblWebserviceCache> sourceQueryResults = sourceQuery.getResultList();
// check for valid cache entry
if (sourceQueryResults.isEmpty()) return null;
// deserialize PHP serialized response for existing queries
return Pherialize.unserialize(sourceQueryResults.get(0).getResponse()).toString();
}

/**
* Stores a response in the cache or updates it's timestamp if it already exists.
*
* @param query Query to cache this response for
* @param response Response to cache
*/
private void setCachedResponse(String query, String response) {
// PHP serialize queries and create a SHA1 hash for a quicker comparison with existing queries
query = phpSha1(Pherialize.serialize(query));
response = Pherialize.serialize(response);
// lookup existing timed out cached responses for this query and service
String lookupQuery = "SELECT row FROM TblWebserviceCache row WHERE row.serviceId = :serviceId AND row.query = :query AND row.response = :response";
TypedQuery<TblWebserviceCache> sourceQuery =
em.createQuery(lookupQuery, TblWebserviceCache.class)
.setParameter("serviceId", this.serviceId)
// find cached entry for this query
.setParameter("query", query)
// check if response is the same
.setParameter("response", response)
// only the most recent entry
.setMaxResults(1);

List<TblWebserviceCache> sourceQueryResults = sourceQuery.getResultList();

if (sourceQueryResults.isEmpty()) {
// if the response does not exist create a new one
TblWebserviceCache webserviceCache = new TblWebserviceCache();
webserviceCache.setServiceId(this.serviceId);
webserviceCache.setQuery(query);
webserviceCache.setResponse(response);
webserviceCache.setTimestamp(System.currentTimeMillis() / 1000L);
em.persist(webserviceCache);
} else {
// if the response already exists update its timestamp
TblWebserviceCache webServiceCache = sourceQueryResults.get(0);
webServiceCache.setTimestamp(System.currentTimeMillis() / 1000L);
em.persist(webServiceCache);
}
return sourceQueryResults.get(0);
}

/**
Expand All @@ -131,20 +98,52 @@ private void setCachedResponse(String query, String response) {
* @return response string for the given query
*/
public String getResponse(NameParserResponse query) {
// get cached response if possible
String response = getCachedResponse(query.getScientificName());
if (response == null) {
response = getWebServiceResponse(query);
// check if there was a webservice response and cache it or get the last timed out cached response
if (response == null) {
setTimeout(0); // timeout of zero means a cached response is always valid
response = getCachedResponse(query.getScientificName());
} else {
setCachedResponse(query.getScientificName(), response);
String cachedResponse = null;
// get cached response
TblWebserviceCache webServiceCache = getWebServiceCache(query.getScientificName());
// set response string to Web service response string if a cached response is found
if (webServiceCache != null) {
// existing responses are cached in PHP serialized format
cachedResponse = Pherialize.unserialize(webServiceCache.getResponse()).toString();
if (System.currentTimeMillis() / 1000L - webServiceCache.getTimestamp() < timeout) return cachedResponse;
}
// get the Web service response if no cached response has been returned
String webServiceResponse = getWebServiceResponse(query);
// return null or timed out cached response if there was no Web service response
if (webServiceResponse == null) return cachedResponse;
// if no cache exists for this Web service response create a new one
if (cachedResponse == null) {
webServiceCache = new TblWebserviceCache();
webServiceCache.setServiceId(this.serviceId);
// PHP serialize queries and create a SHA1 hash for a quicker comparison with existing queries
webServiceCache.setQuery(phpSha1(Pherialize.serialize(query.getScientificName())));
webServiceCache.setResponse(Pherialize.serialize(webServiceResponse));
} else {
// update the cached response if both a Web Service and cached response exist and persist the difference
if (!webServiceResponse.equals(cachedResponse)) {
try {
// calculate difference between cached response and Web service response
Patch<String> diff = DiffUtils.diffInline(cachedResponse, webServiceResponse);
// create a unified diff for this patch
List<String> unifiedDiff = UnifiedDiffUtils.generateUnifiedDiff(null, null, Collections.singletonList(cachedResponse), diff, 0);
// persist differences for the cached response and keep its timestamp
TblWebserviceCacheDiffs cacheDiff = new TblWebserviceCacheDiffs();
cacheDiff.setDiff(Pherialize.serialize(unifiedDiff));
cacheDiff.setTblWebserviceCacheId(webServiceCache.getId());
cacheDiff.setTimestamp(webServiceCache.getTimestamp());
em.persist(cacheDiff);
} catch (DiffException e) {
// Exception handling will likely be removed as it was stated unnecessary in this
// <a href="https://github.com/java-diff-utils/java-diff-utils/issues/70">java-diff-utils issue</a>.
e.printStackTrace();
}
}
}

return response;
// update timeout and persist changes to the Web service cache
webServiceCache.setTimestamp(System.currentTimeMillis() / 1000L);
em.persist(webServiceCache);
// return Web service response after updating the cache
return webServiceResponse;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ public ArrayList<CommonName> query(NameParserResponse query) {

// get cached response if possible
String response = getResponse(query);
if (response == null || response.isEmpty()) return results;

// check if result is valid JSON
try (StringReader stringReader = new StringReader(response)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,11 @@ public void init() {
*/
@Override
public ArrayList<CommonName> query(NameParserResponse query) {
String response = getResponse(query);

ArrayList<CommonName> results = new ArrayList<>();

String response = getResponse(query);
if (response == null || response.isEmpty()) return results;

try {
// iterate over response arrays
MixedArray responseArray = Pherialize.unserialize(response).toArray();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ public ArrayList<CommonName> query(NameParserResponse query) {
ArrayList<CommonName> results = new ArrayList<>();
// get Web service response
String response = getResponse(query);
// return if no vernaculars were found for this GUID
// return empty result list if response is null or empty
if (response == null || response.isEmpty()) return results;
// check if response is valid JSON
try (StringReader stringReader = new StringReader(response)) {
Expand All @@ -81,7 +81,7 @@ public ArrayList<CommonName> query(NameParserResponse query) {
}
} catch (JsonParsingException e) {
// response is not valid JSON
LOGGER.log(Level.WARNING, "Response string is not valid JSON", e);
LOGGER.log(Level.WARNING, "Response string (" + response + ") is not valid JSON", e);
} catch (JsonException e) {
// JSON object could not be created due to an i/o error
LOGGER.log(Level.WARNING, "JSON object could not be created due to an i/o error", e);
Expand Down

0 comments on commit b57825a

Please sign in to comment.