Merge branch 'develop'

ISA-tools · Mar 2, 2017 · e89e09d · e89e09d
2 parents e412a1d + 477d65c
commit e89e09d
Show file tree

Hide file tree

Showing 5 changed files with 67 additions and 28 deletions.
diff --git a/isatools/isatab.py b/isatools/isatab.py
@@ -14,6 +14,7 @@
 from itertools import tee
 import pandas as pd
 from progressbar import ProgressBar, SimpleProgress, Bar, ETA
+import io
 
 
 logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', level=logging.INFO)
@@ -771,14 +772,14 @@ def write_assay_table_files(inv_obj, output_dir):
                     columns[i] = col[col.rindex(".") + 1:]
                 elif "Parameter Value[" in col:
                     columns[i] = col[col.rindex(".") + 1:]
-                elif "Protocol REF" in col:
-                    columns[i] = "Protocol REF"
                 elif col.endswith("Date"):
                     columns[i] = "Date"
                 elif col.endswith("Performer"):
                     columns[i] = "Performer"
                 elif "Comment[" in col:
                     columns[i] = col[col.rindex(".") + 1:]
+                elif "Protocol REF" in col:
+                    columns[i] = "Protocol REF"
 
             print("Rendered {} paths".format(len(DF.index)))
             if len(DF.index) > 1:
@@ -886,28 +887,37 @@ def _build_section_df(f):
         df = df.reindex(df.index.drop(0))  # Reindex the DataFrame
         return df
 
+    memf = io.StringIO()
+    while True:
+        line = fp.readline()
+        if not line:
+            break
+        if not line.lstrip().startswith('#'):
+            memf.write(line)
+    memf.seek(0)
+
     df_dict = dict()
 
     # Read in investigation file into DataFrames first
     df_dict['ontology_sources'] = _build_section_df(_read_tab_section(
-        f=fp,
+        f=memf,
         sec_key='ONTOLOGY SOURCE REFERENCE',
         next_sec_key='INVESTIGATION'
     ))
     # assert({'Term Source Name', 'Term Source File', 'Term Source Version', 'Term Source Description'}
     #        .issubset(set(ontology_sources_df.columns.values)))  # Check required labels are present
-    df_dict['investigation']  = _build_section_df(_read_tab_section(
-        f=fp,
+    df_dict['investigation'] = _build_section_df(_read_tab_section(
+        f=memf,
         sec_key='INVESTIGATION',
         next_sec_key='INVESTIGATION PUBLICATIONS'
     ))
-    df_dict['i_publications']  = _build_section_df(_read_tab_section(
-        f=fp,
+    df_dict['i_publications'] = _build_section_df(_read_tab_section(
+        f=memf,
         sec_key='INVESTIGATION PUBLICATIONS',
         next_sec_key='INVESTIGATION CONTACTS'
     ))
-    df_dict['i_contacts']  = _build_section_df(_read_tab_section(
-        f=fp,
+    df_dict['i_contacts'] = _build_section_df(_read_tab_section(
+        f=memf,
         sec_key='INVESTIGATION CONTACTS',
         next_sec_key='STUDY'
     ))
@@ -918,39 +928,39 @@ def _build_section_df(f):
     df_dict['s_assays'] = list()
     df_dict['s_protocols'] = list()
     df_dict['s_contacts'] = list()
-    while _peek(fp):  # Iterate through STUDY blocks until end of file
+    while _peek(memf):  # Iterate through STUDY blocks until end of file
         df_dict['studies'].append(_build_section_df(_read_tab_section(
-            f=fp,
+            f=memf,
             sec_key='STUDY',
             next_sec_key='STUDY DESIGN DESCRIPTORS'
         )))
         df_dict['s_design_descriptors'] .append(_build_section_df(_read_tab_section(
-            f=fp,
+            f=memf,
             sec_key='STUDY DESIGN DESCRIPTORS',
             next_sec_key='STUDY PUBLICATIONS'
         )))
         df_dict['s_publications'].append(_build_section_df(_read_tab_section(
-            f=fp,
+            f=memf,
             sec_key='STUDY PUBLICATIONS',
             next_sec_key='STUDY FACTORS'
         )))
         df_dict['s_factors'].append(_build_section_df(_read_tab_section(
-            f=fp,
+            f=memf,
             sec_key='STUDY FACTORS',
             next_sec_key='STUDY ASSAYS'
         )))
         df_dict['s_assays'].append(_build_section_df(_read_tab_section(
-            f=fp,
+            f=memf,
             sec_key='STUDY ASSAYS',
             next_sec_key='STUDY PROTOCOLS'
         )))
         df_dict['s_protocols'].append(_build_section_df(_read_tab_section(
-            f=fp,
+            f=memf,
             sec_key='STUDY PROTOCOLS',
             next_sec_key='STUDY CONTACTS'
         )))
         df_dict['s_contacts'].append(_build_section_df(_read_tab_section(
-            f=fp,
+            f=memf,
             sec_key='STUDY CONTACTS',
             next_sec_key='STUDY'
         )))
@@ -2824,14 +2834,15 @@ def get_contacts(section_df):
 
     ontology_source_map = dict(map(lambda x: (x.name, x), investigation.ontology_source_references))
 
-    row = df_dict['investigation'].iloc[0]
-    investigation.identifier = row['Investigation Identifier']
-    investigation.title = row['Investigation Title']
-    investigation.description = row['Investigation Description']
-    investigation.submission_date = row['Investigation Submission Date']
-    investigation.public_release_date = row['Investigation Public Release Date']
-    investigation.publications = get_publications(df_dict['i_publications'])
-    investigation.contacts = get_contacts(df_dict['i_contacts'])
+    if len(df_dict['investigation'].index) > 0:
+        row = df_dict['investigation'].iloc[0]
+        investigation.identifier = row['Investigation Identifier']
+        investigation.title = row['Investigation Title']
+        investigation.description = row['Investigation Description']
+        investigation.submission_date = row['Investigation Submission Date']
+        investigation.public_release_date = row['Investigation Public Release Date']
+        investigation.publications = get_publications(df_dict['i_publications'])
+        investigation.contacts = get_contacts(df_dict['i_contacts'])
 
     for i in range(0, len(df_dict['studies'])):
         row = df_dict['studies'][i].iloc[0]

diff --git a/isatools/sampletab.py b/isatools/sampletab.py
@@ -290,7 +290,7 @@ def load(FP):
             samples[sample.name] = sample
 
     study.materials['sources'] = list(sources.values())
-    study.materials['samples'] = [x for x in list(samples.values()) if x not in list(sources.values())]
+    study.materials['samples'] = [x for x in set(samples.values()) if x.name not in [y.name for y in list(sources.values())]]
     study.process_sequence = list(processes.values())
 
     return investigation
diff --git a/tests/test_isatab.py b/tests/test_isatab.py
@@ -244,6 +244,12 @@ def setUp(self):
     def tearDown(self):
         shutil.rmtree(self._tmp_dir)
 
+    def test_isatab_load_sdata201414_isa1(self):
+        with open(os.path.join(self._tab_data_dir, 'sdata201414-isa1', 'i_Investigation.txt')) as fp:
+            ISA = isatab.load(fp)
+            self.assertListEqual([s.filename for s in ISA.studies], ['s_chambers.txt'])  # 1 study in i_investigation.txt
+            self.assertListEqual([a.filename for a in ISA.studies[0].assays], ['a_chambers.txt'])  # 1 assays in s_chambers.txt
+
     def test_isatab_load_bii_i_1(self):
         with open(os.path.join(self._tab_data_dir, 'BII-I-1', 'i_investigation.txt')) as fp:
             ISA = isatab.load(fp)

diff --git a/tests/test_sampletab.py b/tests/test_sampletab.py
@@ -0,0 +1,22 @@
+import unittest
+from tests import utils
+from isatools import sampletab
+from isatools import isatab
+import os
+
+
+class UnitSampleTabLoad(unittest.TestCase):
+
+    def setUp(self):
+        self._sampletab_data_dir = utils.SAMPLETAB_DATA_DIR
+
+    def tearDown(self):
+        pass
+
+    def test_sampletab_load_test1(self):
+        with open(os.path.join(self._sampletab_data_dir, 'test1.txt')) as fp:
+            ISA = sampletab.load(fp)
+            self.assertEqual(len(ISA.studies), 1)
+            self.assertEqual(len(ISA.studies[0].materials['sources']), 1)
+            self.assertEqual(len(ISA.studies[0].materials['samples']), 1)
+            # print(isatab.dumps(ISA))
diff --git a/tests/utils.py b/tests/utils.py
@@ -6,8 +6,6 @@
 
 DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
 
-SAMPLE_DATA_DIR = os.path.join(os.path.dirname(__file__), '..', 'isatools', 'sampledata')
-
 JSON_DATA_DIR = os.path.join(DATA_DIR, 'json')
 UNIT_JSON_DATA_DIR = os.path.join(JSON_DATA_DIR, 'unit')
 
@@ -17,6 +15,8 @@
 
 MZML_DATA_DIR = os.path.join(DATA_DIR, 'mzml')
 
+SAMPLETAB_DATA_DIR = os.path.join(DATA_DIR, 'sampletab')
+
 CONFIGS_DATA_DIR = os.path.join(DATA_DIR, 'configs')
 XML_CONFIGS_DATA_DIR = os.path.join(CONFIGS_DATA_DIR, 'xml')
 DEFAULT2015_XML_CONFIGS_DATA_DIR = os.path.join(XML_CONFIGS_DATA_DIR, 'isaconfig-default_v2015-07-02')