Skip to content

Commit

Permalink
Update to latest backbone component versions
Browse files Browse the repository at this point in the history
  • Loading branch information
qqndrew committed Oct 30, 2023
1 parent 18cd154 commit e967016
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 23 deletions.
6 changes: 3 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

<groupId>org.ohnlp.medxn</groupId>
<artifactId>medxn</artifactId>
<version>1.0.5</version>
<version>1.0.6</version>
<description>The MedXN medication Information extraction pipeline</description>

<repositories>
Expand Down Expand Up @@ -104,12 +104,12 @@
<dependency>
<groupId>org.ohnlp.medtagger</groupId>
<artifactId>medtagger</artifactId>
<version>1.0.46</version>
<version>1.0.72</version>
</dependency>
<dependency>
<groupId>org.ohnlp.backbone</groupId>
<artifactId>api</artifactId>
<version>1.0.6</version>
<version>3.0.24</version>
<scope>provided</scope>
</dependency>
</dependencies>
Expand Down
41 changes: 33 additions & 8 deletions src/main/java/org/ohnlp/medxn/backbone/MedXNBackboneTransform.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.beam.sdk.schemas.Schema;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.Row;
import org.apache.uima.UIMAFramework;
Expand All @@ -23,6 +24,10 @@
import org.apache.uima.util.CasCreationUtils;
import org.apache.uima.util.InvalidXMLException;
import org.ohnlp.backbone.api.Transform;
import org.ohnlp.backbone.api.annotations.ComponentDescription;
import org.ohnlp.backbone.api.annotations.ConfigurationProperty;
import org.ohnlp.backbone.api.components.OneToOneTransform;
import org.ohnlp.backbone.api.config.InputColumn;
import org.ohnlp.backbone.api.exceptions.ComponentInitializationException;
import org.ohnlp.medxn.type.Drug;
import org.ohnlp.medxn.type.MedAttr;
Expand All @@ -40,31 +45,54 @@
* Given an input row representing a document, duplicates row contents and adds a nlp_output_json column for each
* drug mention in the input text.
*/
public class MedXNBackboneTransform extends Transform {
@ComponentDescription(
name = "MedXN",
desc = "Extracts Drug Mentions and Associated Information from Text using MedXN"
)
public class MedXNBackboneTransform extends OneToOneTransform {

@ConfigurationProperty(
path = "input",
desc = "Column to use as input"
)
private InputColumn inputField;

private Schema schema;

@Override
public void initFromConfig(JsonNode jsonNode) throws ComponentInitializationException {
// No Configurable Initialization
public Schema calculateOutputSchema(Schema schema) {
List<Schema.Field> fields = new LinkedList<>(schema.getFields());
fields.add(Schema.Field.of("nlp_output_json", Schema.FieldType.STRING));
this.schema = Schema.of(fields.toArray(new Schema.Field[0]));
return this.schema;
}

@Override
public PCollection<Row> expand(PCollection<Row> input) {
return null;
return input.apply("Run MedXN", ParDo.of(new MedXNPipelineFunction(inputField.getSourceColumnName(), schema)));
}

@Override
public void init() throws ComponentInitializationException {

}

private static class MedXNPipelineFunction extends DoFn<Row, Row> {

private static ThreadLocal<SimpleDateFormat> sdf = ThreadLocal.withInitial(() -> new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXXX"));

private final String textField;
private final Schema schema;

// UIMA components are not serializable, and thus must be initialized per-executor via the @Setup annotation
private transient AnalysisEngine aae;
private transient ResourceManager resMgr;
private transient CAS cas;
private transient ObjectMapper om;

private MedXNPipelineFunction(String textField) {
private MedXNPipelineFunction(String textField, Schema schema) {
this.textField = textField;
this.schema = schema;
}

@Setup
Expand All @@ -81,9 +109,6 @@ public void init() throws IOException, InvalidXMLException, ResourceInitializati
@ProcessElement
public void processElement(@Element Row input, OutputReceiver<Row> output) {
// First create the output row schema
List<Schema.Field> fields = new LinkedList<>(input.getSchema().getFields());
fields.add(Schema.Field.of("nlp_output_json", Schema.FieldType.STRING));
Schema schema = Schema.of(fields.toArray(new Schema.Field[0]));
String text = input.getString(this.textField);
cas.reset();
cas.setDocumentText(text);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.apache.beam.sdk.values.Row;
import org.joda.time.Instant;
import org.ohnlp.backbone.api.Transform;
import org.ohnlp.backbone.api.components.OneToOneTransform;
import org.ohnlp.backbone.api.exceptions.ComponentInitializationException;

import java.io.BufferedReader;
Expand All @@ -31,12 +32,24 @@
* <b>Important:</b> Requires that the OHDSI vocabulary load query first be run and loaded into backbone resources folder
* as ohdsi_rxnorm_map.csv. Please refer to documentation for further details
*/
public class MedXNOutputToOHDSIFormatTransform extends Transform {
public class MedXNOutputToOHDSIFormatTransform extends OneToOneTransform {
private static ThreadLocal<SimpleDateFormat> sdf = ThreadLocal.withInitial(() -> new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXXX"));
private Schema schema;


@Override
public void initFromConfig(JsonNode config) throws ComponentInitializationException {
public Schema calculateOutputSchema(Schema input) {
// First transform row schemas
List<Schema.Field> fields = new LinkedList<>(input.getFields());
fields.add(Schema.Field.of("section_concept_id", Schema.FieldType.INT32));
fields.add(Schema.Field.of("lexical_variant", Schema.FieldType.STRING));
fields.add(Schema.Field.of("snippet", Schema.FieldType.STRING));
fields.add(Schema.Field.of("note_nlp_concept_id", Schema.FieldType.INT32));
fields.add(Schema.Field.of("note_nlp_source_concept_id", Schema.FieldType.INT32));
fields.add(Schema.Field.of("nlp_datetime", Schema.FieldType.DATETIME));
fields.add(Schema.Field.of("term_modifiers", Schema.FieldType.STRING));
this.schema = Schema.of(fields.toArray(new Schema.Field[0]));
return this.schema;
}

@Override
Expand Down Expand Up @@ -65,16 +78,7 @@ public void init() {

@ProcessElement
public void processElement(@Element Row input, OutputReceiver<Row> output) throws JsonProcessingException, ParseException {
// First transform row schemas
List<Schema.Field> fields = new LinkedList<>(input.getSchema().getFields());
fields.add(Schema.Field.of("section_concept_id", Schema.FieldType.INT32));
fields.add(Schema.Field.of("lexical_variant", Schema.FieldType.STRING));
fields.add(Schema.Field.of("snippet", Schema.FieldType.STRING));
fields.add(Schema.Field.of("note_nlp_concept_id", Schema.FieldType.INT32));
fields.add(Schema.Field.of("note_nlp_source_concept_id", Schema.FieldType.INT32));
fields.add(Schema.Field.of("nlp_datetime", Schema.FieldType.DATETIME));
fields.add(Schema.Field.of("term_modifiers", Schema.FieldType.STRING));
Schema schema = Schema.of(fields.toArray(new Schema.Field[0]));


MedXNDrugBean bean = om.readValue(input.getString("nlp_output_json"), MedXNDrugBean.class);

Expand All @@ -94,4 +98,8 @@ public void processElement(@Element Row input, OutputReceiver<Row> output) throw
}));
}

@Override
public void init() throws ComponentInitializationException {

}
}

0 comments on commit e967016

Please sign in to comment.