diff --git a/.travis.yml b/.travis.yml
index 31678c6..9225791 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -16,6 +16,7 @@ install:
 script:
         - pytest -vv dryadic/tests/test_mtypes.py
         - pytest -vv dryadic/tests/test_mcombs.py
+        - pytest -vv dryadic/tests/test_mtrees.py
         - python dryadic/tests/test_cohorts.py
         - python dryadic/tests/test_pipes.py
         - python dryadic/tests/test_kbtl.py
diff --git a/README.md b/README.md
index 193491a..aa26505 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,13 @@
-# Dryad
-
-Dryad is a Python module for representing and predicting -omic features.
-
-The following submodules are available in Dryad:
-
-### `dryadic.features` ###
-Loading -omic datasets and representing them in formats accessible to machine
-learning methods.
-
-### `dryadic.learning` ###
-Algorithms and pipelines for using -omic datasets to predict -omic and
-phenotypic features.
+# Dryad #
+`dryad` is a Python module for making predictions using -omic data. It
+consists of two main parts: `dryadic.features`, containing tools for loading
+-omic datasets and representing them in formats accessible to machine learning
+methods, and `dryadic.learning`, which incorporates these tools into pipelines
+designed to facilitate drawing biological insights from training prediction
+tasks on -omic data.
+
+## Installation ##
+The easiest way to install `dryad` is via PyPI:
+
+```pip install -i https://test.pypi.org/simple/ dryad==0.4.2```
 
diff --git a/dryadic/features/cohorts/mut.py b/dryadic/features/cohorts/mut.py
index d122188..a148758 100644
--- a/dryadic/features/cohorts/mut.py
+++ b/dryadic/features/cohorts/mut.py
@@ -18,7 +18,7 @@ class BaseMutationCohort(PresenceCohort, UniCohort):
             hierarchical representations of mutation data. Default is to
             initialize with one tree that only sorts mutations by gene.
 
-        mut_genes (set or list-like), optional
+        mut_genes (set or list-like of :obj:`str`), optional
             Set of genes whose mutation data should be considered. Recommended
             for reducing the size of large mutation datasets, as default is to
             use all mutations present in `mut_df`.
diff --git a/dryadic/features/cohorts/utils.py b/dryadic/features/cohorts/utils.py
index 0d07401..39a673d 100644
--- a/dryadic/features/cohorts/utils.py
+++ b/dryadic/features/cohorts/utils.py
@@ -81,6 +81,10 @@ def get_gencode(annot_file, include_types=None):
         annot_file (str): A .gtf file, downloaded from eg.
                           www.gencodegenes.org/releases/22.html
 
+        include_types (list-like or set of :obj:`str`), optional
+            Which annotation fields to include in the returned object.
+            The default only loads gene-level data.
+
     Returns:
         gn_annot (dict): Dictionary with keys corresponding to Ensembl gene
                          IDs and values consisting of dicts with
@@ -96,10 +100,10 @@ def get_gencode(annot_file, include_types=None):
     if include_types:
         use_types |= set(include_types)
 
-    if 'exon' in include_types:
+    if 'exon' in use_types:
         use_types |= {'UTR'}
 
-    # remove annotation records that are non-relevant or on sex chromosomes
+    # remove annotation records that are irrelevant or on sex chromosomes
     chroms_use = {'chr' + str(i+1) for i in range(22)}
     annot = annot.loc[annot['Type'].isin(use_types)
                       & annot['Chr'].isin(chroms_use), :]
@@ -127,17 +131,20 @@ def get_gencode(annot_file, include_types=None):
                          info_flds.reset_index(drop=True)],
                         axis=1)
 
+    # find records corresponding to protein-coding genes
     gene_df = info_df[(info_df.Type == 'gene')
                       & (info_df.gene_type == 'protein_coding')]
     gene_df = gene_df.set_index('gene_id')
 
-    gn_annot = {gn: dict(recs[['Chr', 'Start', 'End', 'Strand', 'gene_name']])
+    # create dictionary using these records with Ensembl gene ids as keys
+    gn_annot = {gn: {'Chr': recs.Chr, 'Start': recs.Start, 'End': recs.End,
+                     'Strand': recs.Strand, 'gene_name': recs.gene_name}
                 for gn, recs in gene_df.iterrows()}
+
     if len(use_types) > 1:
         info_df = info_df[info_df.transcript_type == 'protein_coding']
 
-    # group transcript records according to parent gene, transform gene
-    # records into a dictionary
+    # group transcript records according to parent gene
     if 'transcript' in use_types:
         tx_groups = info_df[(info_df.Type == 'transcript')
                             & info_df.gene_id.isin(gene_df.index)].groupby(
@@ -146,8 +153,11 @@ def get_gencode(annot_file, include_types=None):
         # insert the transcripts for each gene into the gene record dictionary
         for gn, tx_df in tx_groups:
             gn_annot[gn]['Transcripts'] = {
-                tx: dict(recs[['Start', 'End', 'transcript_name']])
-                for tx, recs in tx_df.set_index('transcript_id').iterrows()
+                tx_df.transcript_id.iloc[i]: {
+                    'Start': tx_df.Start.iloc[i], 'End': tx_df.End.iloc[i],
+                    'transcript_name': tx_df.transcript_name.iloc[i]
+                    }
+                for i in range(tx_df.shape[0])
                 }
 
     if 'exon' in use_types:
@@ -155,26 +165,26 @@ def get_gencode(annot_file, include_types=None):
             raise ValueError("Cannot load gene exon information without also "
                              "loading transcript information!")
 
-        # likewise, group exon records according to parent gene
-        regn_groups = info_df[info_df.Type.isin(['exon', 'UTR'])
-                              & info_df.gene_id.isin(gene_df.index)].groupby(
-                                  ['gene_id', 'transcript_id'])
+        # likewise, group exon and UTR records according to parent gene
+        use_df = info_df[info_df.gene_id.isin(gene_df.index)]
+        exn_groups = use_df[use_df.Type == 'exon'].groupby(
+            ['gene_id', 'transcript_id'])
+        utr_groups = use_df[use_df.Type == 'UTR'].groupby(
+            ['gene_id', 'transcript_id'])
 
-        for (gn, tx), regn_df in regn_groups:
-            exn_df = regn_df[regn_df.Type == 'exon']
-            utr_df = regn_df[regn_df.Type == 'UTR']
+        for (gn, tx), exn_df in exn_groups:
+            gn_annot[gn]['Transcripts'][tx]['UTRs'] = []
+            gn_annot[gn]['Transcripts'][tx]['Exons'] = exn_df[[
+                'Start', 'End', 'exon_id']].to_dict(orient='records')
 
-            gn_annot[gn]['Transcripts'][tx]['Exons'] = exn_df.sort_values(
-                by='exon_number')[['Start', 'End', 'exon_id']].apply(
-                    dict, axis=1).tolist()
             exn_count = len(gn_annot[gn]['Transcripts'][tx]['Exons'])
-
-            for i in range(len(gn_annot[gn]['Transcripts'][tx]['Exons'])):
+            for i in range(exn_count):
                 gn_annot[gn]['Transcripts'][tx]['Exons'][i][
                     'number'] = "{}/{}".format(i + 1, exn_count)
 
+        for (gn, tx), utr_df in utr_groups:
             gn_annot[gn]['Transcripts'][tx]['UTRs'] = utr_df.sort_values(
-                by='Start')[['Start', 'End']].apply(dict, axis=1).tolist()
+                by='Start')[['Start', 'End']].to_dict(orient='records')
  
     return gn_annot
 
diff --git a/dryadic/tests/test_cohorts.py b/dryadic/tests/test_cohorts.py
index ab3751e..5b51b00 100644
--- a/dryadic/tests/test_cohorts.py
+++ b/dryadic/tests/test_cohorts.py
@@ -6,7 +6,7 @@
 sys.path.extend([os.path.join(base_dir, '../..')])
 
 from dryadic.features.cohorts.base import UniCohort
-from dryadic.features.cohorts import *
+from dryadic.features.cohorts import BaseMutationCohort
 from dryadic.features.mutations import MuType
 
 import numpy as np
@@ -19,6 +19,16 @@ def load_omic_data(data_lbl):
                        sep='\t', index_col=0)
 
 
+def load_muts(muts_lbl):
+    return pd.read_csv(
+        os.path.join(os.path.dirname(__file__), 'resources',
+                     "muts_{}.tsv".format(muts_lbl)),
+        engine='python', sep='\t', comment='#',
+        names=['Gene', 'Form', 'Sample', 'Protein', 'Transcript', 'Exon',
+               'ref_count', 'alt_count', 'PolyPhen']
+        )
+
+
 def check_samp_split(cdata, expr_samps):
     assert ((set(cdata.get_train_samples()) | set(cdata.get_test_samples()))
             == set(expr_samps)), (
diff --git a/dryadic/tests/test_kbtl.py b/dryadic/tests/test_kbtl.py
index ae40441..ef246e4 100644
--- a/dryadic/tests/test_kbtl.py
+++ b/dryadic/tests/test_kbtl.py
@@ -105,7 +105,6 @@ def main():
                                            cv_seed=None, test_prop=0.3)
     trs_cdata.update_split(new_seed=101)
 
-    import pdb; pdb.set_trace()
     mult_clf.tune_coh(trs_cdata, sing_mtype,
                       test_count=4, tune_splits=2, parallel_jobs=1)
     print(mult_clf)
diff --git a/dryadic/tests/test_mtrees.py b/dryadic/tests/test_mtrees.py
index 161edfa..1780bc0 100644
--- a/dryadic/tests/test_mtrees.py
+++ b/dryadic/tests/test_mtrees.py
@@ -10,24 +10,14 @@
 
 from ..features.mutations import MuType, MuTree
 from .test_mtypes import mtype_tester
+from .test_cohorts import load_muts
 import pytest
 
-import os
 import pandas as pd
 from itertools import product, chain
 from itertools import combinations as combn
 
 
-def load_muts(muts_lbl):
-    return pd.read_csv(
-        os.path.join(os.path.dirname(__file__), 'resources',
-                     "muts_{}.tsv".format(muts_lbl)),
-        engine='python', sep='\t', comment='#',
-        names=['Gene', 'Form', 'Sample', 'Protein', 'Transcript', 'Exon',
-               'ref_count', 'alt_count', 'PolyPhen']
-        )
-
-
 def pytest_generate_tests(metafunc):
     if metafunc.function.__code__.co_argcount == 1:
         pass
diff --git a/setup.py b/setup.py
index 8d6aa4b..37cba2b 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
 import setuptools
 
 setuptools.setup(name='dryad',
-      version='0.4.2a3',
+      version='0.4.2',
       description='Prediction of Cancer Phenotypes Using Mutation Trees',
       author='Michal Radoslaw Grzadkowski',
       author_email='grzadkow@ohsu.edu',
@@ -9,7 +9,7 @@
           exclude=["dryadic.tests.*", "dryadic.tests"]),
       url = 'https://github.com/ohsu-comp-bio/dryad',
       download_url = ('https://github.com/ohsu-comp-bio/'
-                      'dryad/archive/v0.4.2a3.tar.gz'),
+                      'dryad/archive/v0.4.2.tar.gz'),
       install_requires=[
           'numpy>=1.16',
           'pandas>=0.25',