Skip to content
This repository has been archived by the owner on Dec 18, 2023. It is now read-only.

Adjust transmart-batch to load high-dim data in the new way #122

Open
wants to merge 5 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,25 @@ class ClinicalFactsRowSet {
Date endDate
Integer instanceNum
TrialVisit trialVisit
String markerType

boolean highDimModifierFlag

Date importDate = new Date()

final List<ClinicalFact> clinicalFacts = []

void addValue(ConceptNode concept, XtrialNode xtrialNode, String value) {
clinicalFacts << new ClinicalFact(
void addValue(ConceptNode concept, XtrialNode xtrialNode, String value, String modifierCode = null,
boolean isHighDimModifier = false) {
highDimModifierFlag = isHighDimModifier
ClinicalFact fact = new ClinicalFact(
concept: concept,
xtrialNode: xtrialNode,
value: value,)
if (modifierCode) {
fact.modifierCode = modifierCode
}
clinicalFacts << fact
if (concept.ontologyNode) {
// Add entry with modifier to indicate the original variable name
// used for an observation.
Expand All @@ -53,6 +62,9 @@ class ClinicalFactsRowSet {
XtrialNode xtrialNode

ConceptType getType() {
if(highDimModifierFlag && modifierCode){
return ConceptType.NUMERICAL
}
modifierCode ? ConceptType.CATEGORICAL : concept.type
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,18 @@ package org.transmartproject.batch.facts
import org.springframework.batch.item.support.AbstractItemCountingItemStreamItemReader
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.beans.factory.annotation.Value
import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate
import org.springframework.stereotype.Component
import org.transmartproject.batch.beans.JobScopeInterfaced
import org.transmartproject.batch.clinical.db.objects.Tables
import org.transmartproject.batch.concept.ConceptNode
import org.transmartproject.batch.highdim.assays.MappingFileRow
import org.transmartproject.batch.highdim.assays.MappingFileRowToConceptMapper
import org.transmartproject.batch.highdim.assays.MappingsFileRowStore
import org.transmartproject.batch.highdim.platform.Platform
import org.transmartproject.batch.patient.PatientSet
import org.transmartproject.batch.secureobject.Study
import org.transmartproject.batch.trialvisit.TrialVisit

/**
* Generates the dummy facts for high dimensional data.
Expand All @@ -27,11 +32,20 @@ class DummyFactGenerator extends AbstractItemCountingItemStreamItemReader<Clinic
@Autowired
private PatientSet patientSet

@Autowired
Study study

@Autowired
private MappingFileRowToConceptMapper mapper

private Collection<List<MappingFileRow>> subjectConceptMappingFileRows

@Value("#{jobExecutionContext['platformObject']}")
private Platform platform

@Autowired
private NamedParameterJdbcTemplate jdbcTemplate

@Override
protected void jumpToItem(int itemIndex) throws Exception {
currentItemCount = itemIndex
Expand All @@ -50,15 +64,39 @@ class DummyFactGenerator extends AbstractItemCountingItemStreamItemReader<Clinic
ConceptNode concept = mapper[row]
assert concept != null

TrialVisit trialVisit = getTrialVisit()

ClinicalFactsRowSet rowSet = new ClinicalFactsRowSet(
studyId: studyId,
patient: patientSet[row.subjectId])

rowSet.addValue(concept, null, concept.name)
patient: patientSet[row.subjectId],
trialVisit: trialVisit
)

def sample_id = row.sampleCd
if (sample_id) {
// add text value with subject sample mapping
rowSet.addValue(concept, null, sample_id)

List<Map> result = getAssayIds(sample_id)

if (result.empty) {
return
}
// add modifier for each assay_id
String modifier = 'TRANSMART:HIGHDIM:' + platform.markerType.toUpperCase()
for (int i=0; i<result.size(); i++){
rowSet.instanceNum = i+1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it does not look right. instance_id should be unique for each row. Pass it maybe to addValue method?

rowSet.addValue(concept, null, result[i].assay_id.toString(), modifier, true)
}

} else {
rowSet.addValue(concept, null, concept.name)
}

rowSet
}


@Override
protected void doOpen() throws Exception {
subjectConceptMappingFileRows = mappingsFileRowStore
Expand All @@ -72,4 +110,31 @@ class DummyFactGenerator extends AbstractItemCountingItemStreamItemReader<Clinic

@Override
protected void doClose() throws Exception {}

private List<Map<String, Object>> getAssayIds(String sample_id) {
List result = jdbcTemplate.queryForList """
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about making the result of the query the main data source, instead of reading from subjectConceptMappingFileRows?

SELECT assay_id
FROM $Tables.SUBJ_SAMPLE_MAP
WHERE gpl_id = :gpl_id
AND sample_cd = :sample_cd
AND trial_name =:trial_name
""", [gpl_id: platform.id, sample_cd: sample_id, trial_name: studyId]
result
}

TrialVisit getTrialVisit() {
def trialVisit = jdbcTemplate.queryForList """
SELECT *
FROM $Tables.TRIAL_VISIT_DIMENSION
WHERE study_num = :study_num
""", [study_num: study.studyNum]

if (trialVisit) {
def t = trialVisit?.first()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it work for the data people to bind row with a first trial visit from the study? If so you should do it once per job.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wasn't sure how to get a trial visit there and what to do, if there is more than one visit or none (for now there is a function to add a default visit then)

return new TrialVisit(
id: t.trial_visit_num
)
}
null
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ class ObservationFactStepsConfig implements StepBuildingConfigurationTrait {
}

@Bean
Step insertPseudoFacts(ItemStreamReader<ClinicalFactsRowSet> dummyFactGenerator,
ObservationFactTableWriter observationFactTableWriter) {
steps.get('writePseudoFactsStep')
Step insertHighDimFacts(ItemStreamReader<ClinicalFactsRowSet> dummyFactGenerator,
ewelinagr marked this conversation as resolved.
Show resolved Hide resolved
ObservationFactTableWriter observationFactTableWriter) {
steps.get('writeHighDimFactsStep')
.chunk(WRITE_ASSAY_CHUNK_SIZE)
.reader(dummyFactGenerator)
.writer(observationFactTableWriter)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ abstract class AbstractTypicalHdDataJobConfig {
@Resource
Step validatePatientIntersection
@Resource
Step insertPseudoFacts
Step insertHighDimFacts

@Resource
Step deleteObservationFacts
Expand Down Expand Up @@ -114,9 +114,9 @@ abstract class AbstractTypicalHdDataJobConfig {
//.next(deleteObservationFacts)

.next(insertConcepts)
.next(insertPseudoFacts)
.next(insertConceptCounts)
.next(insertAssays)
.next(insertHighDimFacts)
.next(partitionDataTable)
.next(secondPass)
.build()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package org.transmartproject.batch.highdim.mrna.data

import org.hamcrest.Matchers
import org.junit.AfterClass
import org.junit.ClassRule
import org.junit.Test
import org.junit.rules.RuleChain
import org.junit.rules.TestRule
import org.junit.runner.RunWith
import org.springframework.test.context.ContextConfiguration
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner
import org.transmartproject.batch.beans.GenericFunctionalTestConfiguration
import org.transmartproject.batch.beans.PersistentContext
import org.transmartproject.batch.clinical.db.objects.Tables
import org.transmartproject.batch.junit.JobRunningTestTrait
import org.transmartproject.batch.junit.RunJobRule
import org.transmartproject.batch.support.TableLists

import static org.hamcrest.MatcherAssert.assertThat
import static org.hamcrest.Matchers.*
import static org.transmartproject.batch.matchers.AcceptAnyNumberIsCloseTo.castingCloseTo
import static org.transmartproject.batch.matchers.IsInteger.isIntegerNumber

/**
* test new way of mRNA data loading with observations/modifiers that high dimensional data need in 17.1
*/
@RunWith(SpringJUnit4ClassRunner)
@ContextConfiguration(classes = GenericFunctionalTestConfiguration)
class MrnaDataFactRowsTests implements JobRunningTestTrait {

private final static String STUDY_ID = 'NANONLY'
private final static String PLATFORM_ID = 'GENE-EXPRESSION-1'

@ClassRule
public final static TestRule RUN_JOB_RULES = new RuleChain([
new RunJobRule(STUDY_ID, 'expression'),
new RunJobRule(PLATFORM_ID, 'mrna_annotation'),
new RunJobRule("${STUDY_ID}", 'clinical'),
])

// needed by the trait
public final static TestRule RUN_JOB_RULE =
RUN_JOB_RULES.rulesStartingWithInnerMost[0]

@AfterClass
static void cleanDatabase() {
PersistentContext.truncator.
truncate(TableLists.CLINICAL_TABLES + TableLists.MRNA_TABLES + 'ts_batch.batch_job_instance')
}

@Test
void testNumberOfObservationFacts() {
def count = rowCounter.count Tables.OBSERVATION_FACT,
'sourcesystem_cd = :sourcesystem_cd',
sourcesystem_cd: STUDY_ID
assert count == 45
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it would be nice to add a test for the column values of the HD observation rows.

}
Loading