Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
djcomlab committed Mar 2, 2017
2 parents e412a1d + 477d65c commit e89e09d
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 28 deletions.
61 changes: 36 additions & 25 deletions isatools/isatab.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from itertools import tee
import pandas as pd
from progressbar import ProgressBar, SimpleProgress, Bar, ETA
import io


logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', level=logging.INFO)
Expand Down Expand Up @@ -771,14 +772,14 @@ def write_assay_table_files(inv_obj, output_dir):
columns[i] = col[col.rindex(".") + 1:]
elif "Parameter Value[" in col:
columns[i] = col[col.rindex(".") + 1:]
elif "Protocol REF" in col:
columns[i] = "Protocol REF"
elif col.endswith("Date"):
columns[i] = "Date"
elif col.endswith("Performer"):
columns[i] = "Performer"
elif "Comment[" in col:
columns[i] = col[col.rindex(".") + 1:]
elif "Protocol REF" in col:
columns[i] = "Protocol REF"

print("Rendered {} paths".format(len(DF.index)))
if len(DF.index) > 1:
Expand Down Expand Up @@ -886,28 +887,37 @@ def _build_section_df(f):
df = df.reindex(df.index.drop(0)) # Reindex the DataFrame
return df

memf = io.StringIO()
while True:
line = fp.readline()
if not line:
break
if not line.lstrip().startswith('#'):
memf.write(line)
memf.seek(0)

df_dict = dict()

# Read in investigation file into DataFrames first
df_dict['ontology_sources'] = _build_section_df(_read_tab_section(
f=fp,
f=memf,
sec_key='ONTOLOGY SOURCE REFERENCE',
next_sec_key='INVESTIGATION'
))
# assert({'Term Source Name', 'Term Source File', 'Term Source Version', 'Term Source Description'}
# .issubset(set(ontology_sources_df.columns.values))) # Check required labels are present
df_dict['investigation'] = _build_section_df(_read_tab_section(
f=fp,
df_dict['investigation'] = _build_section_df(_read_tab_section(
f=memf,
sec_key='INVESTIGATION',
next_sec_key='INVESTIGATION PUBLICATIONS'
))
df_dict['i_publications'] = _build_section_df(_read_tab_section(
f=fp,
df_dict['i_publications'] = _build_section_df(_read_tab_section(
f=memf,
sec_key='INVESTIGATION PUBLICATIONS',
next_sec_key='INVESTIGATION CONTACTS'
))
df_dict['i_contacts'] = _build_section_df(_read_tab_section(
f=fp,
df_dict['i_contacts'] = _build_section_df(_read_tab_section(
f=memf,
sec_key='INVESTIGATION CONTACTS',
next_sec_key='STUDY'
))
Expand All @@ -918,39 +928,39 @@ def _build_section_df(f):
df_dict['s_assays'] = list()
df_dict['s_protocols'] = list()
df_dict['s_contacts'] = list()
while _peek(fp): # Iterate through STUDY blocks until end of file
while _peek(memf): # Iterate through STUDY blocks until end of file
df_dict['studies'].append(_build_section_df(_read_tab_section(
f=fp,
f=memf,
sec_key='STUDY',
next_sec_key='STUDY DESIGN DESCRIPTORS'
)))
df_dict['s_design_descriptors'] .append(_build_section_df(_read_tab_section(
f=fp,
f=memf,
sec_key='STUDY DESIGN DESCRIPTORS',
next_sec_key='STUDY PUBLICATIONS'
)))
df_dict['s_publications'].append(_build_section_df(_read_tab_section(
f=fp,
f=memf,
sec_key='STUDY PUBLICATIONS',
next_sec_key='STUDY FACTORS'
)))
df_dict['s_factors'].append(_build_section_df(_read_tab_section(
f=fp,
f=memf,
sec_key='STUDY FACTORS',
next_sec_key='STUDY ASSAYS'
)))
df_dict['s_assays'].append(_build_section_df(_read_tab_section(
f=fp,
f=memf,
sec_key='STUDY ASSAYS',
next_sec_key='STUDY PROTOCOLS'
)))
df_dict['s_protocols'].append(_build_section_df(_read_tab_section(
f=fp,
f=memf,
sec_key='STUDY PROTOCOLS',
next_sec_key='STUDY CONTACTS'
)))
df_dict['s_contacts'].append(_build_section_df(_read_tab_section(
f=fp,
f=memf,
sec_key='STUDY CONTACTS',
next_sec_key='STUDY'
)))
Expand Down Expand Up @@ -2824,14 +2834,15 @@ def get_contacts(section_df):

ontology_source_map = dict(map(lambda x: (x.name, x), investigation.ontology_source_references))

row = df_dict['investigation'].iloc[0]
investigation.identifier = row['Investigation Identifier']
investigation.title = row['Investigation Title']
investigation.description = row['Investigation Description']
investigation.submission_date = row['Investigation Submission Date']
investigation.public_release_date = row['Investigation Public Release Date']
investigation.publications = get_publications(df_dict['i_publications'])
investigation.contacts = get_contacts(df_dict['i_contacts'])
if len(df_dict['investigation'].index) > 0:
row = df_dict['investigation'].iloc[0]
investigation.identifier = row['Investigation Identifier']
investigation.title = row['Investigation Title']
investigation.description = row['Investigation Description']
investigation.submission_date = row['Investigation Submission Date']
investigation.public_release_date = row['Investigation Public Release Date']
investigation.publications = get_publications(df_dict['i_publications'])
investigation.contacts = get_contacts(df_dict['i_contacts'])

for i in range(0, len(df_dict['studies'])):
row = df_dict['studies'][i].iloc[0]
Expand Down
2 changes: 1 addition & 1 deletion isatools/sampletab.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def load(FP):
samples[sample.name] = sample

study.materials['sources'] = list(sources.values())
study.materials['samples'] = [x for x in list(samples.values()) if x not in list(sources.values())]
study.materials['samples'] = [x for x in set(samples.values()) if x.name not in [y.name for y in list(sources.values())]]
study.process_sequence = list(processes.values())

return investigation
6 changes: 6 additions & 0 deletions tests/test_isatab.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,12 @@ def setUp(self):
def tearDown(self):
shutil.rmtree(self._tmp_dir)

def test_isatab_load_sdata201414_isa1(self):
with open(os.path.join(self._tab_data_dir, 'sdata201414-isa1', 'i_Investigation.txt')) as fp:
ISA = isatab.load(fp)
self.assertListEqual([s.filename for s in ISA.studies], ['s_chambers.txt']) # 1 study in i_investigation.txt
self.assertListEqual([a.filename for a in ISA.studies[0].assays], ['a_chambers.txt']) # 1 assays in s_chambers.txt

def test_isatab_load_bii_i_1(self):
with open(os.path.join(self._tab_data_dir, 'BII-I-1', 'i_investigation.txt')) as fp:
ISA = isatab.load(fp)
Expand Down
22 changes: 22 additions & 0 deletions tests/test_sampletab.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import unittest
from tests import utils
from isatools import sampletab
from isatools import isatab
import os


class UnitSampleTabLoad(unittest.TestCase):

def setUp(self):
self._sampletab_data_dir = utils.SAMPLETAB_DATA_DIR

def tearDown(self):
pass

def test_sampletab_load_test1(self):
with open(os.path.join(self._sampletab_data_dir, 'test1.txt')) as fp:
ISA = sampletab.load(fp)
self.assertEqual(len(ISA.studies), 1)
self.assertEqual(len(ISA.studies[0].materials['sources']), 1)
self.assertEqual(len(ISA.studies[0].materials['samples']), 1)
# print(isatab.dumps(ISA))
4 changes: 2 additions & 2 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@

DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')

SAMPLE_DATA_DIR = os.path.join(os.path.dirname(__file__), '..', 'isatools', 'sampledata')

JSON_DATA_DIR = os.path.join(DATA_DIR, 'json')
UNIT_JSON_DATA_DIR = os.path.join(JSON_DATA_DIR, 'unit')

Expand All @@ -17,6 +15,8 @@

MZML_DATA_DIR = os.path.join(DATA_DIR, 'mzml')

SAMPLETAB_DATA_DIR = os.path.join(DATA_DIR, 'sampletab')

CONFIGS_DATA_DIR = os.path.join(DATA_DIR, 'configs')
XML_CONFIGS_DATA_DIR = os.path.join(CONFIGS_DATA_DIR, 'xml')
DEFAULT2015_XML_CONFIGS_DATA_DIR = os.path.join(XML_CONFIGS_DATA_DIR, 'isaconfig-default_v2015-07-02')
Expand Down

0 comments on commit e89e09d

Please sign in to comment.