Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jbrowse2 service #258

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.apidb.apicommon.service.services.comments.AttachmentsService;
import org.apidb.apicommon.service.services.comments.UserCommentsService;
import org.apidb.apicommon.service.services.dataPlotter.ProfileSetService;
import org.apidb.apicommon.service.services.jbrowse.JBrowse2Service;
import org.apidb.apicommon.service.services.jbrowse.JBrowseService;
import org.apidb.apicommon.service.services.jbrowse.JBrowseUserDatasetsService;
import org.eupathdb.common.service.EuPathServiceApplication;
Expand Down Expand Up @@ -46,6 +47,7 @@ public Set<Class<?>> getClasses() {
.add(UserCommentsService.class)
.add(TranscriptToggleService.class)
.add(JBrowseService.class)
.add(JBrowse2Service.class)
.add(JBrowseUserDatasetsService.class)
.add(ProfileSetService.class)
.add(OrganismMetricsService.class)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
package org.apidb.apicommon.service.services.jbrowse;

import java.io.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;

import javax.sql.DataSource;
import javax.ws.rs.*;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import javax.ws.rs.BadRequestException;

import org.apache.log4j.Logger;
import org.gusdb.fgputil.db.runner.SQLRunner;
import org.gusdb.fgputil.db.runner.SQLRunnerException;
import org.gusdb.wdk.model.WdkException;
import org.gusdb.wdk.model.WdkModelException;
import org.gusdb.wdk.model.WdkRuntimeException;
import org.gusdb.wdk.service.service.AbstractWdkService;
import org.json.JSONArray;
import org.json.JSONObject;

@Path("/jbrowse2")
public class JBrowse2Service extends AbstractWdkService {

private static final Logger LOG = Logger.getLogger(JBrowse2Service.class);

private static final String VDI_CONTROL_SCHEMA_KEY ="VDI_CONTROL_SCHEMA";
private static final String VDI_DATASET_SCHEMA_KEY ="VDI_DATASETS_SCHEMA";
private static final String WEB_SVC_DIR_KEY ="WEBSERVICEMIRROR";

private static final String USER_DATASETS_DIR = "./userDatasetsData"; // hard-coded mount point in the jbrowse2 service
/*
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you use Javadoc here (two **s), our API may eventually pick it up. Doesn't now because we are using hard-coded RAML. :(

Get config for a single organism. Assumes JSON will easily fit in memory.
*/
@GET
@Path("orgview/{publicOrganismAbbrev}/config.json")
@Produces(MediaType.APPLICATION_JSON)
public Response getJbrowseSingleOrgTracks(@PathParam("publicOrganismAbbrev") String publicOrganismAbbrev,
@QueryParam("trackSets") String trackSetsString) throws IOException, WdkException {

String errMsg = "Must provide a comma delimited list of tracks in a 'trackSets' query param";
if (trackSetsString == null) throw new BadRequestException(errMsg);
List<String> trackSetsList = Arrays.asList(trackSetsString.split(","));
if (trackSetsList.isEmpty()) throw new BadRequestException(errMsg);

// get static json config, for this organism and set of tracks
String staticConfigJsonString = getStaticConfigJsonString(publicOrganismAbbrev, trackSetsString);
JSONObject staticConfigJson = new JSONObject(staticConfigJsonString);

// get similar from user datasets
JSONArray udTracks = getUserDatasetTracks(publicOrganismAbbrev, trackSetsList);

// merge UD tracks into static
staticConfigJson.getJSONArray("tracks").putAll(udTracks);

// send response
String jsonString = staticConfigJson.toString();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A little worried about the size of this. toString() means doubling the size of the completed file before returning. Wondering if we should be using Jackson to stream as Ellie suggested. Though that can probably wait until we know this is giving us what we want.

return Response.ok(jsonString, MediaType.APPLICATION_JSON).build();
}

// call out to perl code to produce static config json
String getStaticConfigJsonString(String publicOrganismAbbrev, String trackSetsString) throws IOException {

String gusHome = getWdkModel().getGusHome();
String projectId = getWdkModel().getProjectId();
String buildNumber = getWdkModel().getBuildNumber();

List<String> command = new ArrayList<>();
command.add(gusHome + "/bin/jbrowse2Config");
command.add("--orgAbbrev");
command.add(publicOrganismAbbrev);
command.add("--projectId");
command.add(projectId);
command.add("--buildNumber");
command.add(buildNumber);
command.add("--webSvcDir");
command.add(getWdkModel().getProperties().get(WEB_SVC_DIR_KEY));
command.add("--trackSets");
command.add(trackSetsString);

return stringFromCommand(command);
}

JSONArray getUserDatasetTracks(String publicOrganismAbbrev, List<String> trackSetList) throws WdkModelException {
String buildNumber = getWdkModel().getBuildNumber();
String projectId = getWdkModel().getProjectId();
Long userId = getRequestingUser().getUserId();
String vdiDatasetsSchema = getWdkModel().getProperties().get(VDI_DATASET_SCHEMA_KEY);
String vdiControlSchema = getWdkModel().getProperties().get(VDI_CONTROL_SCHEMA_KEY);

String udDataPathString = String.join("/", USER_DATASETS_DIR, vdiDatasetsSchema, "build-" + buildNumber, projectId);
JSONArray udTracks = new JSONArray();

// for now we only have rnaseq UD tracks
if (trackSetList.contains("rnaseq")) {
udTracks.put(getRnaSeqUdTracks(publicOrganismAbbrev, projectId, vdiControlSchema,
udDataPathString, userId));
}
return udTracks;
}

JSONArray getRnaSeqUdTracks(String publicOrganismAbbrev, String projectId, String vdiControlSchema,
String udDataPathString, Long userId) throws WdkModelException {

DataSource appDs = getWdkModel().getAppDb().getDataSource();
String sql = "select distinct user_dataset_id, name " +
"from " + vdiControlSchema + ".AvailableUserDatasets aud, " +
vdiControlSchema + ".dataset_dependency dd " +
"where project_id = '" + projectId + "' " +
"and (type = 'rnaseq' or type = 'bigwigfiles') " +
"and ((is_public = 1 and is_owner = 1) or user_id = " + userId + ") " +
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this work? Seems like you would want table refs in these columns. Also, is sharing covered in AvailableUserDatasets? is_public is our community datasets indicator?

"and dd.dataset_id = aud.user_dataset_id " +
"and dd.identifier = '" + publicOrganismAbbrev + "'";
try {
return new SQLRunner(appDs, sql).executeQuery(rs -> {
JSONArray rnaSeqUdTracks = new JSONArray();
while (rs.next()) {
String datasetId = rs.getString(1);
String name = rs.getString(2);
JSONObject track = createBigwigTrackJson(datasetId, name, publicOrganismAbbrev);
List<String> fileNames = getBigwigFileNames(udDataPathString + "/" +datasetId);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, we can't return to the web client the full path to these files. We're returning relative URLs. Right now there's a script in service-jbrowse2 that does this transform using sed (depends on regex matching per line), but we should probably use jq so John can send the JSON in a single line. He mentioned this in one of our meetings. But wonder if we could be doing that here? We can at least set the correct value up front for user datasets.

for (String fileName : fileNames) {
track.getJSONObject("adapter")
.getJSONArray("subadapters")
.put(createBigwigSubadapterJson(datasetId, fileName, udDataPathString));
}
}
return rnaSeqUdTracks;
});
}
catch (SQLRunnerException e) {
throw new WdkModelException("Unable to query VDI tables for RNA seq datasets. " + e.getMessage(), e.getCause());
}
}

// boilerplate method written by copilot
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't need to advertise this. :)

public static List<String> getBigwigFileNames(String directoryPath) throws SQLRunnerException {
List<String> bwFiles = new ArrayList<>();
File directory = new File(directoryPath);

if (directory.isDirectory()) {
File[] files = directory.listFiles();
if (files != null) {
for (File file : files) {
if (file.isFile() && file.getName().endsWith(".bw")) {
bwFiles.add(file.getName());
}
}
}
} else {
throw new SQLRunnerException("User Dataset directory not found for path: " + directoryPath);
}

return bwFiles;
}

/*
MULTI BIGWIG TRACK EXAMPLE
{
"assemblyNames": [
"ORG_ABBREV"
],
"trackId": "VDI_ID",
"name": "VDI_NAME",
"displays": [
{
"displayId": "wiggle_ApiCommonModel::Model::JBrowseTrackConfig::MultiBigWigTrackConfig::XY=HASH(0x2249320)",
"maxScore": 1000,
"minScore": 1,
"defaultRendering": "multirowxy",
"type": "MultiLinearWiggleDisplay",
"scaleType": "log"
}
],
"adapter": {
"subadapters": [
{
"color": "grey",
"name": "FILE_NAME",
"type": "BigWigAdapter",
"bigWigLocation": {
"locationType": "UriLocation",
"uri": "USER_DATASET_PATH/VDI_ID/FILE_NAME"
}
}
}
}
*/
JSONObject createBigwigTrackJson(String vdiId, String vdiName, String organismAbbrev) {
return new JSONObject()
.put("assemblyNames", new JSONArray().put(organismAbbrev))
.put("trackId", vdiId)
.put("name", vdiName)
.put("displays", new JSONArray()
.put(new JSONObject()
.put("displayId", "wiggle_ApiCommonModel::Model::JBrowseTrackConfig::MultiBigWigTrackConfig::XY=HASH(0x2249320)")
.put("maxScore", 1)
.put("maxScore", 1000)
.put("defaultRendering", "multirowxy")
.put("type", "MultiLinearWiggleDisplay")
.put("scaleType", "log")
)
)
.put("adapter", new JSONObject()
.put("subadapters", new JSONArray())
);
}

JSONObject createBigwigSubadapterJson(String vdiId, String fileName, String userDatasetsFilePath) {
JSONObject subAdapter = new JSONObject();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You forgot to add subAdapter anywhere. A nice feature of org.json is the ability to chain the calls together, so you can avoid misses like this. It also mirrors the structure of the resulting JSON, increasing readability. See below:

    JSONObject createBigwigTrackJson(String vdiId, String vdiName, String fileName, String organismAbbrev, String userDatasetsFilePath) {
      return new JSONObject()
          .put("assemblyNames", new JSONArray().put(organismAbbrev))
          .put("trackId", vdiId)
          .put("name", vdiName)
          .put("displays", new JSONArray()
              .put(new JSONObject()
                  .put("displayId", "wiggle_ApiCommonModel::Model::JBrowseTrackConfig::MultiBigWigTrackConfig::XY=HASH(0x2249320)")
                  .put("maxScore", 1)
                  .put("maxScore", 1000)
                  .put("defaultRendering", "multirowxy")
                  .put("type", "MultiLinearWiggleDisplay")
                  .put("scaleType", "log")))
          .put("adapter", new JSONObject()
              .put("subadapters", new JSONArray()
                  .put(new JSONObject()
                      .put("color1", "grey")
                      .put("name", fileName)
                      .put("type", "BigWigAdapter")
                      .put("bigWigLocation", new JSONObject()
                          .put("locationType", "UriLocation")
                          .put("uri", String.join("/", userDatasetsFilePath, vdiId, fileName))))));
    }

subAdapter.put("color1", "grey");
subAdapter.put("name", fileName);
subAdapter.put("type", "BigWigAdapter");
JSONObject location = new JSONObject().put("locationType", "UriLocation");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm.. maybe this is handling the filepath -> relative URL comment I made earlier. Should maybe add comments to talk about what is returned for the "location" of this track?

location.put("uri", String.join("/", userDatasetsFilePath, vdiId, fileName));
subAdapter.put("bigWigLocation", location);
return subAdapter;
}

String stringFromCommand(List<String> command) throws IOException {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to wait for the process to complete. Also I think the output collection could be simpler. Something like this:

    String stringFromCommand(List<String> command) throws IOException {
      try {
        Process p = processFromCommand(command);

        ByteArrayOutputStream stringBuffer = new ByteArrayOutputStream();
        p.getInputStream().transferTo(stringBuffer);
        String errors = stringBuffer.toString();

        stringBuffer.reset();
        p.getInputStream().transferTo(stringBuffer);

        if (p.waitFor() != 0) {
          throw new RuntimeException("Subprocess from [" + String.join(" ", command) + "] returned non-zero.  Errors:\n" + errors);
        }

        return stringBuffer.toString();
      }
      catch (InterruptedException e) {
        throw new RuntimeException("Subprocess from [" + String.join(" ", command) + "] was interrupted befor it could complete.");
      }
    }

LOG.debug("Running command: " + String.join(" ", command));
try {
Process p = processFromCommand(command);

ByteArrayOutputStream stringBuffer = new ByteArrayOutputStream();
p.getErrorStream().transferTo(stringBuffer);
String errors = stringBuffer.toString();

stringBuffer.reset();
p.getInputStream().transferTo(stringBuffer);

if (p.waitFor() != 0) {
throw new RuntimeException("Subprocess from [" + String.join(" ", command) + "] returned non-zero. Errors:\n" + errors);
}

return stringBuffer.toString();
}
catch (InterruptedException e) {
throw new RuntimeException("Subprocess from [" + String.join(" ", command) + "] was interrupted befor it could complete.");
}
}
Process processFromCommand (List<String> command) throws IOException {
for (int i = 0; i < command.size(); i++) {
if (command.get(i) == null)
throw new WdkRuntimeException(
"Command part at index " + i + " is null. Could be due to unchecked user input.");
}
ProcessBuilder pb = new ProcessBuilder(command);
Map<String, String> env = pb.environment();
env.put("GUS_HOME", getWdkModel().getGusHome());
pb.redirectErrorStream(true);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe better to not merge the streams and instead collect error output into a String and check if non-empty. Don't know the characteristics of the subprocess. I see you're not checking return value either, so how do you know if it bombed? Would be nice to log the subprocess output in the case of runtime error- Cristina will definitely ask for this later.

return pb.start();
}
}