Skip to content

Commit

Permalink
Merge pull request #342 from CGATOxford/Py3-migration
Browse files Browse the repository at this point in the history
Merge Python 3 code into master
  • Loading branch information
sebastian-luna-valero authored Jul 7, 2017
2 parents 19292f6 + 56ea408 commit 754f0fa
Show file tree
Hide file tree
Showing 12 changed files with 42 additions and 54 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ env:
- TEST_ALL=1

python:
- "2.7"
# - "2.7"
- "3.5"

# Using xvfb to Run Tests That Require a GUI
Expand Down
4 changes: 3 additions & 1 deletion CGAT/Expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -2211,7 +2211,9 @@ def runEdgeR(outfile,
ref_regex)

# output heatmap plot
R.png('%(outfile_prefix)sheatmap.png' % locals())
fn = '%(outfile_prefix)sheatmap.png' % locals()
E.info("outputing heatmap to {}".format(fn))
R.png(fn)
plotCorrelationHeatmap()
r['dev.off']()

Expand Down
1 change: 0 additions & 1 deletion CGAT/FastaIterator.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ def iterate(infile, comment="#", fold=False):
------
FastaRecord
'''

h = infile.readline()[:-1]

if not h:
Expand Down
43 changes: 17 additions & 26 deletions CGAT/IOTools.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def getFirstLine(filename, nlines=1):
return line


def getLastLine(filename, nlines=1, read_size=1024):
def getLastLine(filename, nlines=1, read_size=1024, encoding="utf-8"):
"""return the last line of a file.
This method works by working back in blocks of `read_size` until
Expand All @@ -90,8 +90,8 @@ def getLastLine(filename, nlines=1, read_size=1024):
"""

# U is to open it with Universal newline support
f = open(filename, 'rU')
# py3 requires binary mode for negative seeks
f = open(filename, 'rb')
offset = read_size
f.seek(0, 2)
file_size = f.tell()
Expand All @@ -102,10 +102,8 @@ def getLastLine(filename, nlines=1, read_size=1024):
offset = file_size
f.seek(-1 * offset, 2)
read_str = f.read(offset)
# Remove newline at the end
if read_str[offset - 1] == '\n':
read_str = read_str[:-1]
lines = read_str.split('\n')
read_str = read_str.decode(encoding)
lines = read_str.strip().splitlines()
if len(lines) >= nlines + 1:
return "\n".join(lines[-nlines:])
if offset == file_size: # reached the beginning
Expand Down Expand Up @@ -189,19 +187,21 @@ def touchFile(filename, times=None):
as empty 'gzip' files, i.e., with a header.
'''
existed = os.path.exists(filename)
fhandle = open(filename, 'a')

if filename.endswith(".gz") and not existed:
# this will automatically add a gzip header
fhandle = open(filename, 'a+b')
fhandle = gzip.GzipFile(filename, fileobj=fhandle)
else:
fhandle = open(filename, 'a')

try:
os.utime(filename, times)
finally:
fhandle.close()


def openFile(filename, mode="r", create_dir=False):
def openFile(filename, mode="r", create_dir=False, encoding="utf-8"):
'''open file called *filename* with mode *mode*.
gzip - compressed files are recognized by the
Expand Down Expand Up @@ -235,16 +235,15 @@ def openFile(filename, mode="r", create_dir=False):
if ext.lower() in (".gz", ".z"):
if sys.version_info.major >= 3:
if mode == "r":
return gzip.open(filename, 'rt', encoding="ascii")
return gzip.open(filename, 'rt', encoding=encoding)
elif mode == "w":
return gzip.open(filename, 'wt', encoding="ascii")
else:
raise NotImplementedError(
"mode '{}' not implemented".format(mode))
return gzip.open(filename, 'wt', encoding=encoding)
elif mode == "a":
return gzip.open(filename, 'wt', encoding=encoding)
else:
return gzip.open(filename, mode)
else:
return open(filename, mode)
return open(filename, mode, encoding=encoding)


def force_str(iterator, encoding="ascii"):
Expand Down Expand Up @@ -812,14 +811,6 @@ def __init__(self,

self.mFiles = {}
self.mOutputPattern = output_pattern

self.open = open

if output_pattern:
_, ext = os.path.splitext(output_pattern)
if ext.lower() in (".gz", ".z"):
self.open = gzip.open

self.mCounts = collections.defaultdict(int)
self.mHeader = header
if force and output_pattern:
Expand Down Expand Up @@ -881,7 +872,7 @@ def openFile(self, filename, mode="w"):
if dirname and not os.path.exists(dirname):
os.makedirs(dirname)

return self.open(filename, mode)
return openFile(filename, mode)

def write(self, identifier, line):
"""write `line` to file specified by `identifier`"""
Expand All @@ -894,7 +885,7 @@ def write(self, identifier, line):
f.close()
self.mFiles = {}

self.mFiles[filename] = self.openFile(filename, "a")
self.mFiles[filename] = openFile(filename, "a")
if self.mHeader:
self.mFiles[filename].write(self.mHeader)

Expand Down Expand Up @@ -947,7 +938,7 @@ def close(self):
raise IOError("write on closed FilePool in close()")

for filename, data in self.data.items():
f = self.openFile(filename, "a")
f = openFile(filename, "a")
if self.mHeader:
f.write(self.mHeader)
f.write("".join(data))
Expand Down
14 changes: 6 additions & 8 deletions CGAT/Masker.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,13 +141,11 @@ def maskSequence(self, peptide_sequence):
def maskSequences(self, sequences):
'''mask a collection of sequences.'''

outfile, infile = tempfile.mkstemp()

for x, s in enumerate(sequences):
os.write(outfile, ">%i\n%s\n" % (x, s))

os.close(outfile)
with tempfile.NamedTemporaryFile(mode="w+t", delete=False) as outf:
for x, s in enumerate(sequences):
outf.write(">%i\n%s\n" % (x, s))

infile = outf.name
statement = self.mCommand % locals()

E.debug("statement: %s" % statement)
Expand All @@ -164,9 +162,9 @@ def maskSequences(self, sequences):
raise RuntimeError(
"Error in running %s \n%s\nTemporary directory" %
(statement, err))

result = [
x.sequence for x in FastaIterator.iterate(StringIO(out))]
x.sequence for x in FastaIterator.iterate(StringIO(out.decode()))]

os.remove(infile)

Expand Down
3 changes: 3 additions & 0 deletions CGAT/scripts/_bam2stats.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,9 @@ def count(AlignmentFile samfile,

if count_fastq:
read_name = pysam_bam_get_qname(read._delegate)
if read_name == NULL:
raise ValueError("file does not contain read names, can't count using fastq")

# terminate string at first space to
# truncate read names containing more than
# just the id
Expand Down
2 changes: 1 addition & 1 deletion CGAT/scripts/bed2table.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def _count(self, bed, bamfiles, offsets):
# if offsets are given, shift tags.
for samfile, offset in zip(bamfiles, offsets):

shift = offset / 2
shift = offset // 2
# for peak counting I follow the MACS protocoll,
# see the function def __tags_call_peak in PeakDetect.py
# In words
Expand Down
14 changes: 4 additions & 10 deletions CGAT/scripts/cgat_fasta2cDNA.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,12 @@
'''
cgat_fasta2cDNA.py - template for CGAT scripts
====================================================
cgat_fasta2cDNA.py - converting multi-fasta of exon features into a multi-fasta of spliced cDNAs/RNAs
======================================================================================================
:Author:
:Release: $Id$
:Date: |today|
:Tags: Python
Purpose
-------
.. Mike transcript processing - converting multi-fasta of exon
features into a multi-fasta of spliced cDNAs/RNAs
Usage
-----
Expand Down Expand Up @@ -45,15 +39,15 @@ def makeSplicedFasta(infile):
'''

fasta_dict = {}
with IOTools.openFile(infile, "rb") as fafile:
with IOTools.openFile(infile) as fafile:
for line in fafile.readlines():
if line[0] == '>':
header = line.rstrip("\n")
fasta_dict[header] = ''
else:
fasta_dict[header] += line.rstrip("\n")

for key, value in fasta_dict.items():
for key, value in sorted(fasta_dict.items()):
yield "%s\n%s\n" % (key, value)


Expand Down
3 changes: 1 addition & 2 deletions CGAT/scripts/csv_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,7 @@ def main(argv=None):
reader = csv.DictReader(CSV.CommentStripper(sys.stdin),
dialect=options.csv_dialect)

exec("f = lambda r: %s" % statement, locals())

exec("f = lambda r: %s" % statement, globals())
counter = E.Counter()
writer = csv.DictWriter(options.stdout,
reader.fieldnames,
Expand Down
6 changes: 4 additions & 2 deletions CGAT/scripts/extract_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,11 @@ def cleanStatsTable(stats_file):
Take in a table containing aggregated stats
and clean by removing duplicate columns
'''

# , mangle_dupe_cols=False)
# AH: disabled, because "ValueError: Setting mangle_dupe_cols=False is not supported yet"
_df = pd.read_table(stats_file, sep="\t", header=0,
index_col=None, mangle_dupe_cols=False)
index_col=None)

# drop duplicates is case sensitive, convert all to
# same case - SQL is not case sensitive so will throw
# a hissy fit for same column names in different cases
Expand Down
2 changes: 1 addition & 1 deletion CGAT/scripts/psl2psl.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def iterator_psl_intervals(options):
except KeyError:
tx = []

if options.stdlog >= 2:
if options.loglevel >= 2:
options.stdlog.write(
"###################################################\n")
options.stdlog.write("# testing %s\n" % (str(match)))
Expand Down
2 changes: 1 addition & 1 deletion CGAT/scripts/runExpression.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def main(argv=None):
(options, args) = E.Start(parser, argv=argv, add_output_options=True)

if options.input_filename_tags == "-":
fh = tempfile.NamedTemporaryFile(delete=False)
fh = tempfile.NamedTemporaryFile(delete=False, mode="w+t")
fh.write("".join([x for x in options.stdin]))
fh.close()
options.input_filename_tags = fh.name
Expand Down

0 comments on commit 754f0fa

Please sign in to comment.