Merge branch 'develop'

ISA-tools · Mar 23, 2017 · 4794312 · 4794312
2 parents 423bb8d + a671a56
commit 4794312
Show file tree

Hide file tree

Showing 7 changed files with 57 additions and 11 deletions.
diff --git a/isatools/convert/isatab2magetab.py b/isatools/convert/isatab2magetab.py
@@ -0,0 +1,14 @@
+from isatools import isatab, magetab
+import logging
+
+logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+def convert(source_inv_fp, output_path):
+    """ Converter for ISA-Tab to MAGE-TAB.
+    :param source_inv_fp: File descriptor of input investigation file
+    :param output_dir: Path to directory to write output MAGE-TAB files to
+    """
+    ISA = isatab.load(source_inv_fp)
+    magetab.dump(ISA, output_path)
diff --git a/isatools/io/mtbls.py b/isatools/io/mtbls.py
@@ -129,7 +129,7 @@ def slice_data_files(dir, factor_selection=None):
     # first collect matching samples
     for table_file in glob.iglob(os.path.join(dir, '[a|s]_*')):
         logger.info("Loading {}".format(table_file))
-        with open(table_file) as fp:
+        with open(table_file, encoding='utf-8') as fp:
             df = isatab.load_table(fp)
             if factor_selection is None:
                 matches = df['Sample Name'].items()
@@ -164,7 +164,7 @@ def slice_data_files(dir, factor_selection=None):
     for result in results:
         sample_name = result['sample']
         for table_file in glob.iglob(os.path.join(dir, 'a_*')):
-            with open(table_file) as fp:
+            with open(table_file, encoding='utf-8') as fp:
                 df = isatab.load_table(fp)
                 data_files = list()
                 table_headers = list(df.columns.values)
@@ -191,7 +191,7 @@ def get_factor_names(mtbls_study_id):
     from isatools import isatab
     factors = set()
     for table_file in glob.iglob(os.path.join(tmp_dir, '[a|s]_*')):
-        with open(os.path.join(tmp_dir, table_file)) as fp:
+        with open(os.path.join(tmp_dir, table_file), encoding='utf-8') as fp:
             df = isatab.load_table(fp)
             factors_headers = [header for header in list(df.columns.values) if _RX_FACTOR_VALUE.match(header)]
             for header in factors_headers:
@@ -214,7 +214,7 @@ def get_factor_values(mtbls_study_id, factor_name):
     from isatools import isatab
     fvs = set()
     for table_file in glob.iglob(os.path.join(tmp_dir, '[a|s]_*')):
-        with open(os.path.join(tmp_dir, table_file)) as fp:
+        with open(os.path.join(tmp_dir, table_file), encoding='utf-8') as fp:
             df = isatab.load_table(fp)
             if 'Factor Value[{}]'.format(factor_name) in list(df.columns.values):
                 for indx, match in df['Factor Value[{}]'.format(factor_name)].items():

diff --git a/isatools/isatab.py b/isatools/isatab.py
@@ -3057,7 +3057,7 @@ def pairwise(iterable):
 
 def read_tfile(tfile_path, index_col=None):
 
-    with open(tfile_path) as tfile_fp:
+    with open(tfile_path, encoding='utf-8') as tfile_fp:
         reader = csv.reader(tfile_fp, delimiter='\t')
         header = list(next(reader))
         tfile_fp.seek(0)

diff --git a/isatools/magetab.py b/isatools/magetab.py
@@ -295,9 +295,14 @@ def write_sdrf_table_file(inv_obj, output_path):
     tmp = tempfile.mkdtemp()
     isatab.write_study_table_files(inv_obj=inv_obj, output_dir=tmp)
     isatab.write_assay_table_files(inv_obj=inv_obj, output_dir=tmp)
-    isatab.merge_study_with_assay_tables(os.path.join(tmp, inv_obj.studies[0].filename),
-                                         os.path.join(tmp, inv_obj.studies[0].assays[0].filename),
-                                         os.path.join(output_path, "sdrf.txt"))
+    for study in inv_obj.studies:
+        for assay in [x for x in study.assays if x.technology_type.term == "DNA microarray"]:
+            sdrf_filename = assay.filename[:assay.filename.rindex('.')]
+            print("Writing {}".format(sdrf_filename))
+            isatab.merge_study_with_assay_tables(os.path.join(tmp, study.filename),
+                                                 os.path.join(tmp, assay.filename),
+                                                 os.path.join(output_path, "{}.sdrf.txt"
+                                                              .format(sdrf_filename)))
 
 
 def dump(inv_obj, output_path):

diff --git a/setup.py b/setup.py
@@ -4,7 +4,7 @@
 
 setup(
     name='isatools',
-    version='0.7.1',
+    version='0.7.2',
     packages=['isatools', 'isatools.convert', 'isatools.io', 'isatools.model'],
     package_data={'isatools': ['schemas/cedar/*.json',
                                'schemas/isa_model_version_1_0_schemas/core/*.json',

diff --git a/tests/test_isatab.py b/tests/test_isatab.py
@@ -251,8 +251,6 @@ def test_isatab_load_issue200(self):
             self.assertEqual(len(ISA.studies[0].assays[0].materials['other_material']), 7)
             self.assertEqual(len(ISA.studies[0].assays[0].data_files), 2)
             self.assertEqual(len(ISA.studies[0].assays[0].process_sequence), 11)
-            print(isatab.dumps(ISA))
-            # isatab.dump(ISA, '/Users/dj/PycharmProjects/isa-api/tests/data/tmp')
 
     def test_isatab_load_issue201(self):
         with open(os.path.join(self._tab_data_dir, 'sdata201411-isa1', 'i_Investigation.txt')) as fp:

diff --git a/tests/test_isatab2magetab.py b/tests/test_isatab2magetab.py
@@ -0,0 +1,29 @@
+import unittest
+import os
+import shutil
+from isatools.convert import isatab2magetab
+from tests import utils
+import tempfile
+
+
+def setUpModule():
+    if not os.path.exists(utils.DATA_DIR):
+        raise FileNotFoundError("Could not fine test data directory in {0}. Ensure you have cloned the ISAdatasets "
+                                "repository using "
+                                "git clone -b tests --single-branch [email protected]:ISA-tools/ISAdatasets {0}"
+                                .format(utils.DATA_DIR))
+
+
+class TestIsaTab2MageTab(unittest.TestCase):
+
+    def setUp(self):
+        self._json_data_dir = utils.JSON_DATA_DIR
+        self._tab_data_dir = utils.TAB_DATA_DIR
+        self._tmp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self._tmp_dir)
+
+    def test_json2magetab_convert_bii_i_1(self):
+        with open(os.path.join(self._tab_data_dir, 'BII-I-1', 'i_investigation.txt')) as inv_fp:
+            isatab2magetab.convert(inv_fp, self._tmp_dir)