-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: cleaned plot_mapping_length script and added logging output, closes
- Loading branch information
Rina Ahmed-Begrich
committed
Jul 24, 2024
1 parent
a8f6e3c
commit 5fb31af
Showing
3 changed files
with
64 additions
and
70 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,18 @@ | ||
#!/usr/bin/python | ||
|
||
import os | ||
import sys | ||
import argparse | ||
# PLOT MAPPING LENGTH | ||
# ----------------------------------------------------------------------------- | ||
# | ||
# This script plots the length of all alignments contained in the bam file. | ||
# Input bam file is checked for alignments. | ||
|
||
import matplotlib | ||
import pysam | ||
import pandas as pd | ||
from collections import Counter | ||
from matplotlib import pyplot as plt | ||
|
||
|
||
__authors__ = "Rina Ahmed-Begrich" | ||
__mail__ = "[email protected]" | ||
__program_name__ = "plot_mapping_length.py" | ||
__version_info__ = ('0', '0', '1') | ||
__version__ = '.'.join(__version_info__) | ||
|
||
|
||
def get_aln_length(bamfile, sample): | ||
lengths = [] | ||
for aln in bamfile.fetch(): | ||
|
@@ -25,64 +21,62 @@ def get_aln_length(bamfile, sample): | |
|
||
cnt_dict = Counter(lengths) | ||
df = pd.DataFrame.from_dict(cnt_dict, orient='index').sort_index() | ||
df.columns=[sample] | ||
df.columns = [sample] | ||
return df | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser( | ||
description=__doc__, | ||
epilog=""" | ||
Example Usage: | ||
plot_mapping_length.py -b <bam_file> -s <sample_name> -o <output_folder> | ||
Written by %s (%s), | ||
Max Planck Unit for the Science of Pathogens. Copyright (c) 2024. | ||
Copyright Holder All Rights Reserved. | ||
""" % (__authors__, __mail__), | ||
formatter_class=argparse.RawDescriptionHelpFormatter) | ||
|
||
input = str(snakemake.input["bam"]) | ||
sample_name = snakemake.wildcards.sample | ||
output_tsv = snakemake.output["tsv"] | ||
output_pdf = snakemake.output["pdf"] | ||
|
||
try: | ||
if not os.path.exists(input): | ||
sys.stderr.write( | ||
"Input file %s does not exist. " + "Exit forced\n." % input | ||
) | ||
sys.exit(1) | ||
bamfile = pysam.AlignmentFile(input, "rb") | ||
df = get_aln_length(bamfile=bamfile, sample=sample_name) | ||
|
||
# plot pdf | ||
matplotlib.rcParams['axes.grid'] = True | ||
|
||
x = df.index | ||
y = list(df[sample_name]) | ||
zipped = zip(x, y) | ||
max_index, max_val = max(zipped, key=lambda x: x[1]) | ||
|
||
fig, ax = plt.subplots() | ||
ax.plot(x, y, marker='o') | ||
ax.set_title('{}, mode at {}'.format(sample_name, max_index)) | ||
ax.axvline(max_index, c='r', linestyle='--', alpha=0.3) | ||
ax.axvspan(20, 40, color='k', alpha=0.1) | ||
ax.set_ylabel('Counts') | ||
ax.set_xlabel('Read length') | ||
ax.get_yaxis().get_major_formatter().set_scientific(False) | ||
plt.tight_layout() | ||
plt.savefig(output_pdf, bbox_inches='tight') | ||
|
||
# export coount table | ||
df.index.set_names(['Length'], inplace=True) | ||
df.reset_index(inplace=True) | ||
df.to_csv(output_tsv, sep='\t', index=False) | ||
|
||
except: | ||
sys.stderr.write("Error ocurred when processing bam file: %s.\n" | ||
% input) | ||
raise RuntimeError | ||
sys.exit(1) | ||
input = str(snakemake.input["bam"]) | ||
sample_name = snakemake.wildcards.sample | ||
output_tsv = snakemake.output["tsv"] | ||
output_pdf = snakemake.output["pdf"] | ||
output_log = snakemake.log["path"] | ||
log = [] | ||
error = [] | ||
|
||
bamfile = pysam.AlignmentFile(input, "rb") | ||
num_aln = bamfile.count() | ||
|
||
# check if bam file is empty | ||
if num_aln != 0: | ||
df = get_aln_length(bamfile=bamfile, sample=sample_name) | ||
|
||
# plot pdf | ||
matplotlib.rcParams['axes.grid'] = True | ||
|
||
x = df.index | ||
y = list(df[sample_name]) | ||
zipped = zip(x, y) | ||
max_index, max_val = max(zipped, key=lambda x: x[1]) | ||
|
||
fig, ax = plt.subplots() | ||
ax.plot(x, y, marker='o') | ||
ax.set_title('{}, mode at {}'.format(sample_name, max_index)) | ||
ax.axvline(max_index, c='r', linestyle='--', alpha=0.3) | ||
ax.axvspan(20, 40, color='k', alpha=0.1) | ||
ax.set_ylabel('Counts') | ||
ax.set_xlabel('Read length') | ||
ax.get_yaxis().get_major_formatter().set_scientific(False) | ||
plt.tight_layout() | ||
plt.savefig(output_pdf, bbox_inches='tight') | ||
|
||
# export count table | ||
df.index.set_names(['Length'], inplace=True) | ||
df.reset_index(inplace=True) | ||
df.to_csv(output_tsv, sep='\t', index=False) | ||
|
||
log += [f"{num_aln} alignments processed."] | ||
else: | ||
# no alignment found. | ||
error += ["Bam file does not contain any alignment."] | ||
|
||
# print error / log messages | ||
if error: | ||
print("\n".join(error)) | ||
raise ValueError( | ||
"Error occurred while processing BAM file, exit forced." | ||
) | ||
else: | ||
log += ["Module finished successfully!\n"] | ||
log = ["PLOT_MAPPING_LENGTH: " + i for i in log] | ||
with open(output_log, "w") as log_file: | ||
log_file.write("\n".join(log)) |