-
Notifications
You must be signed in to change notification settings - Fork 13
Adjust transmart-batch to load high-dim data in the new way #122
base: dev
Are you sure you want to change the base?
Changes from all commits
db76b3e
3b14c28
e781fd0
2df0c5a
a0ddd03
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,13 +3,18 @@ package org.transmartproject.batch.facts | |
import org.springframework.batch.item.support.AbstractItemCountingItemStreamItemReader | ||
import org.springframework.beans.factory.annotation.Autowired | ||
import org.springframework.beans.factory.annotation.Value | ||
import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate | ||
import org.springframework.stereotype.Component | ||
import org.transmartproject.batch.beans.JobScopeInterfaced | ||
import org.transmartproject.batch.clinical.db.objects.Tables | ||
import org.transmartproject.batch.concept.ConceptNode | ||
import org.transmartproject.batch.highdim.assays.MappingFileRow | ||
import org.transmartproject.batch.highdim.assays.MappingFileRowToConceptMapper | ||
import org.transmartproject.batch.highdim.assays.MappingsFileRowStore | ||
import org.transmartproject.batch.highdim.platform.Platform | ||
import org.transmartproject.batch.patient.PatientSet | ||
import org.transmartproject.batch.secureobject.Study | ||
import org.transmartproject.batch.trialvisit.TrialVisit | ||
|
||
/** | ||
* Generates the dummy facts for high dimensional data. | ||
|
@@ -27,11 +32,20 @@ class DummyFactGenerator extends AbstractItemCountingItemStreamItemReader<Clinic | |
@Autowired | ||
private PatientSet patientSet | ||
|
||
@Autowired | ||
Study study | ||
|
||
@Autowired | ||
private MappingFileRowToConceptMapper mapper | ||
|
||
private Collection<List<MappingFileRow>> subjectConceptMappingFileRows | ||
|
||
@Value("#{jobExecutionContext['platformObject']}") | ||
private Platform platform | ||
|
||
@Autowired | ||
private NamedParameterJdbcTemplate jdbcTemplate | ||
|
||
@Override | ||
protected void jumpToItem(int itemIndex) throws Exception { | ||
currentItemCount = itemIndex | ||
|
@@ -50,15 +64,39 @@ class DummyFactGenerator extends AbstractItemCountingItemStreamItemReader<Clinic | |
ConceptNode concept = mapper[row] | ||
assert concept != null | ||
|
||
TrialVisit trialVisit = getTrialVisit() | ||
|
||
ClinicalFactsRowSet rowSet = new ClinicalFactsRowSet( | ||
studyId: studyId, | ||
patient: patientSet[row.subjectId]) | ||
|
||
rowSet.addValue(concept, null, concept.name) | ||
patient: patientSet[row.subjectId], | ||
trialVisit: trialVisit | ||
) | ||
|
||
def sample_id = row.sampleCd | ||
if (sample_id) { | ||
// add text value with subject sample mapping | ||
rowSet.addValue(concept, null, sample_id) | ||
|
||
List<Map> result = getAssayIds(sample_id) | ||
|
||
if (result.empty) { | ||
return | ||
} | ||
// add modifier for each assay_id | ||
String modifier = 'TRANSMART:HIGHDIM:' + platform.markerType.toUpperCase() | ||
for (int i=0; i<result.size(); i++){ | ||
rowSet.instanceNum = i+1 | ||
rowSet.addValue(concept, null, result[i].assay_id.toString(), modifier, true) | ||
} | ||
|
||
} else { | ||
rowSet.addValue(concept, null, concept.name) | ||
} | ||
|
||
rowSet | ||
} | ||
|
||
|
||
@Override | ||
protected void doOpen() throws Exception { | ||
subjectConceptMappingFileRows = mappingsFileRowStore | ||
|
@@ -72,4 +110,31 @@ class DummyFactGenerator extends AbstractItemCountingItemStreamItemReader<Clinic | |
|
||
@Override | ||
protected void doClose() throws Exception {} | ||
|
||
private List<Map<String, Object>> getAssayIds(String sample_id) { | ||
List result = jdbcTemplate.queryForList """ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about making the result of the query the main data source, instead of reading from |
||
SELECT assay_id | ||
FROM $Tables.SUBJ_SAMPLE_MAP | ||
WHERE gpl_id = :gpl_id | ||
AND sample_cd = :sample_cd | ||
AND trial_name =:trial_name | ||
""", [gpl_id: platform.id, sample_cd: sample_id, trial_name: studyId] | ||
result | ||
} | ||
|
||
TrialVisit getTrialVisit() { | ||
def trialVisit = jdbcTemplate.queryForList """ | ||
SELECT * | ||
FROM $Tables.TRIAL_VISIT_DIMENSION | ||
WHERE study_num = :study_num | ||
""", [study_num: study.studyNum] | ||
|
||
if (trialVisit) { | ||
def t = trialVisit?.first() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it work for the data people to bind row with a first trial visit from the study? If so you should do it once per job. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wasn't sure how to get a trial visit there and what to do, if there is more than one visit or none (for now there is a function to add a default visit then) |
||
return new TrialVisit( | ||
id: t.trial_visit_num | ||
) | ||
} | ||
null | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
package org.transmartproject.batch.highdim.mrna.data | ||
|
||
import org.hamcrest.Matchers | ||
import org.junit.AfterClass | ||
import org.junit.ClassRule | ||
import org.junit.Test | ||
import org.junit.rules.RuleChain | ||
import org.junit.rules.TestRule | ||
import org.junit.runner.RunWith | ||
import org.springframework.test.context.ContextConfiguration | ||
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner | ||
import org.transmartproject.batch.beans.GenericFunctionalTestConfiguration | ||
import org.transmartproject.batch.beans.PersistentContext | ||
import org.transmartproject.batch.clinical.db.objects.Tables | ||
import org.transmartproject.batch.junit.JobRunningTestTrait | ||
import org.transmartproject.batch.junit.RunJobRule | ||
import org.transmartproject.batch.support.TableLists | ||
|
||
import static org.hamcrest.MatcherAssert.assertThat | ||
import static org.hamcrest.Matchers.* | ||
import static org.transmartproject.batch.matchers.AcceptAnyNumberIsCloseTo.castingCloseTo | ||
import static org.transmartproject.batch.matchers.IsInteger.isIntegerNumber | ||
|
||
/** | ||
* test new way of mRNA data loading with observations/modifiers that high dimensional data need in 17.1 | ||
*/ | ||
@RunWith(SpringJUnit4ClassRunner) | ||
@ContextConfiguration(classes = GenericFunctionalTestConfiguration) | ||
class MrnaDataFactRowsTests implements JobRunningTestTrait { | ||
|
||
private final static String STUDY_ID = 'NANONLY' | ||
private final static String PLATFORM_ID = 'GENE-EXPRESSION-1' | ||
|
||
@ClassRule | ||
public final static TestRule RUN_JOB_RULES = new RuleChain([ | ||
new RunJobRule(STUDY_ID, 'expression'), | ||
new RunJobRule(PLATFORM_ID, 'mrna_annotation'), | ||
new RunJobRule("${STUDY_ID}", 'clinical'), | ||
]) | ||
|
||
// needed by the trait | ||
public final static TestRule RUN_JOB_RULE = | ||
RUN_JOB_RULES.rulesStartingWithInnerMost[0] | ||
|
||
@AfterClass | ||
static void cleanDatabase() { | ||
PersistentContext.truncator. | ||
truncate(TableLists.CLINICAL_TABLES + TableLists.MRNA_TABLES + 'ts_batch.batch_job_instance') | ||
} | ||
|
||
@Test | ||
void testNumberOfObservationFacts() { | ||
def count = rowCounter.count Tables.OBSERVATION_FACT, | ||
'sourcesystem_cd = :sourcesystem_cd', | ||
sourcesystem_cd: STUDY_ID | ||
assert count == 45 | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it would be nice to add a test for the column values of the HD observation rows. |
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it does not look right.
instance_id
should be unique for each row. Pass it maybe toaddValue
method?