Skip to content

Commit

Permalink
Merge pull request #148 from ypriverol/conversion_bruker_data
Browse files Browse the repository at this point in the history
Conversion bruker data
  • Loading branch information
ypriverol authored Sep 5, 2023
2 parents f809f10 + 9c304a7 commit 03d8e8b
Show file tree
Hide file tree
Showing 11 changed files with 122 additions and 72 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pythonapp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
for sdrf in testdata/*/*.sdrf.tsv
do
pushd $(dirname $sdrf)
python ../../parse_sdrf.py convert-openms -s $(pwd)/../../$sdrf -t2
python ../../parse_sdrf.py convert-openms -s $(pwd)/../../$sdrf -t2 --extension_convert raw:mzML
diff -c experimental_design.tsv expected_experimental_design.tsv >> ../../failures.txt
diff -c expected_openms.tsv openms.tsv >> ../../failures.txt
popd
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
for sdrf in testdata/*/*.sdrf.tsv
do
pushd $(dirname $sdrf)
python ../../parse_sdrf.py convert-openms -s $(pwd)/../../$sdrf -t2
python ../../parse_sdrf.py convert-openms -s $(pwd)/../../$sdrf -t2 --extension_convert raw:mzML
diff -c experimental_design.tsv expected_experimental_design.tsv >> ../../failures.txt
diff -c expected_openms.tsv openms.tsv >> ../../failures.txt
popd
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ pytest
requests
pyyaml
numpy
defusedxml
2 changes: 1 addition & 1 deletion sdrf_pipelines/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.22"
__version__ = "0.0.23"
68 changes: 51 additions & 17 deletions sdrf_pipelines/openms/openms.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,48 @@ class FileToColumnEntries:
file2technical_rep = {}


def get_openms_file_name(raw, extension_convert: str = None):
"""
Convert file name for OpenMS. If extension_convert is set, the extension will be converted to the specified format.
- file.raw -> file.mzML (extension_convert=raw:mzML)
- file.mzML -> file.mzML (extension_convert=mzML:mzML)
- file.mzML -> file.mzml (extension_convert=mzML:mzml)
- file.mzml -> file.mzML (extension_convert=mzml:mzML)
- file.d -> file.mzML (extension_convert=d:mzML)
- file.d -> file.d (extension_convert=d:d)
:param raw: raw file name
:param extension_convert: convert extension to specified format
:return: converted file name
"""
if extension_convert is None:
return raw

possible_extension = ["raw", "mzML", "mzml", "d"]
extension_convert_list = extension_convert.split(",")
extension_convert_dict = {}
for extension_convert in extension_convert_list:
current_extension, new_extension = extension_convert.split(":")
if current_extension not in possible_extension or new_extension not in possible_extension:
raise Exception(
"Invalid extension conversion. Please use one of the following formats: " + str(possible_extension)
)
elif current_extension in extension_convert_dict:
raise Exception("Invalid extension conversion. Please use only one conversion per extension")
else:
extension_convert_dict[current_extension] = new_extension

ext = os.path.splitext(raw)
current_extension = ext[1][1:]
if current_extension not in extension_convert_dict:
raise Exception(
"Invalid extension conversion. The current extension of the file do not match the provided extension {}".format(
current_extension
)
)
out = ext[0] + "." + extension_convert_dict[current_extension]
return out


class OpenMS:
def __init__(self) -> None:
super().__init__()
Expand Down Expand Up @@ -173,11 +215,11 @@ def openms_ify_mods(self, sdrf_mods):
def openms_convert(
self,
sdrf_file: str = None,
keep_raw: bool = False,
one_table: bool = False,
legacy: bool = False,
verbose: bool = False,
split_by_columns: str = None,
extension_convert: str = None,
):
print("PROCESSING: " + sdrf_file + '"')

Expand Down Expand Up @@ -387,7 +429,7 @@ def openms_convert(
source_name2n_reps,
f2c.file2combined_factors,
f2c.file2label,
keep_raw,
extension_convert,
f2c.file2fraction,
)
else: # two table format
Expand All @@ -398,7 +440,7 @@ def openms_convert(
source_name_list,
source_name2n_reps,
f2c.file2label,
keep_raw,
extension_convert,
f2c.file2fraction,
f2c.file2combined_factors,
)
Expand All @@ -422,7 +464,7 @@ def openms_convert(
source_name2n_reps,
f2c.file2combined_factors,
f2c.file2label,
keep_raw,
extension_convert,
f2c.file2fraction,
)
else: # two table format
Expand All @@ -433,7 +475,7 @@ def openms_convert(
source_name_list,
source_name2n_reps,
f2c.file2label,
keep_raw,
extension_convert,
f2c.file2fraction,
f2c.file2combined_factors,
)
Expand Down Expand Up @@ -484,7 +526,7 @@ def writeTwoTableExperimentalDesign(
source_name_list,
source_name2n_reps,
file2label,
keep_raw,
extension_convert,
file2fraction,
file2combined_factors,
):
Expand Down Expand Up @@ -581,11 +623,7 @@ def writeTwoTableExperimentalDesign(
else:
label = str(self.itraq4plex[label[label_index[raw]].lower()])
label_index[raw] = label_index[raw] + 1
if not keep_raw:
ext = os.path.splitext(raw)
out = ext[0] + ".mzML"
else:
out = raw
out = get_openms_file_name(raw, extension_convert)

f.write(
str(Fraction_group[raw])
Expand Down Expand Up @@ -675,7 +713,7 @@ def writeOneTableExperimentalDesign(
source_name2n_reps,
file2combined_factors,
file2label,
keep_raw,
extension_convert,
file2fraction,
):
f = open(output_filename, "w+")
Expand Down Expand Up @@ -840,11 +878,7 @@ def writeOneTableExperimentalDesign(
label = str(self.itraq4plex[label[label_index[raw]].lower()])
label_index[raw] = label_index[raw] + 1

if not keep_raw:
ext = os.path.splitext(raw)
out = ext[0] + ".mzML"
else:
out = raw
out = get_openms_file_name(raw, extension_convert)

if "MSstats_Mixture" in open_ms_experimental_design_header:
if raw not in mixture_raw_tag.keys():
Expand Down
56 changes: 16 additions & 40 deletions sdrf_pipelines/openms/unimod.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import re
import xml.etree.ElementTree as et

import defusedxml.ElementTree as et
import pkg_resources


Expand All @@ -9,6 +9,12 @@ def __init__(self, site: str, position: str) -> None:
self._site = site
self._position = position

def get_site(self):
return self._site

def get_position(self):
return self._position


class OntologyTerm:
def __init__(self, accession: str, name: str) -> None:
Expand All @@ -35,6 +41,15 @@ def get_name(self):
def get_accession(self):
return self._ontology_term.get_accession()

def get_delta_mono_mass(self):
return self._delta_mono_mass

def get_delta_composition(self):
return self._delta_composition

def to_str(self):
return f"{self.get_accession()} {self.get_name()} {self.get_delta_mono_mass()} {self.get_delta_composition()}"


class UnimodDatabase:
"""Wrapper for the Unimod database"""
Expand Down Expand Up @@ -108,45 +123,6 @@ def _get_modifications(self, node):
mod = PostTranslationalModification(ontology_term, ma["delta_composition"], sites, ma["delta_mono_mass"])
self.modifications.append(mod)

def get_label(self, label):
mod = self.modifications.get(label, None)
return mod

def get_element(self, name):
el = self.elements.get(name, None)
return el

def list_labels(self, search):
labels = []
lre = re.compile(search)
for k in self.modifications.keys():
l = lre.search(k)
if l is not None:
labels.append(k)
return labels

def get_neutral_loss(self, label, site):
mod = self.modifications.get(label, None)
if mod is not None:
try:
nl = []
for n in mod["sites"][site]["NeutralLoss"]:
if n["composition"] != "0":
nl.append(n)
return nl
except:
return []
return []

def get_delta_mono(self, label):
mod = self.modifications.get(label, None)
if mod is not None:
try:
val = float(mod["delta_mono_mass"])
return val
except:
pass

def get_by_accession(self, accession):
for mod in self.modifications:
if mod.get_accession() == accession:
Expand Down
20 changes: 15 additions & 5 deletions sdrf_pipelines/parse_sdrf.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,21 +35,31 @@ def cli():

@click.command("convert-openms", short_help="convert sdrf to openms file output")
@click.option("--sdrf", "-s", help="SDRF file")
@click.option("--raw", "-r", help="Keep filenames in experimental design output as raw.")
@click.option(
"--legacy/--modern", "-l/-m", default=False, help="legacy=Create artificial sample column not needed in OpenMS 2.6."
)
@click.option("--onetable/--twotables", "-t1/-t2", default=False, help="Create one-table or two-tables format.")
@click.option("--verbose/--quiet", "-v/-q", default=False, help="Output debug information.")
@click.option("--onetable/--twotables", "-t1/-t2", help="Create one-table or two-tables format.", default=False)
@click.option("--verbose/--quiet", "-v/-q", help="Output debug information.", default=False)
@click.option("--conditionsfromcolumns", "-c", help="Create conditions from provided (e.g., factor) columns.")
@click.option(
"--extension_convert",
"-e",
help="convert extensions of files from one type to other 'raw:mzML,mzml:MZML,mzML:mzML,d:d'",
)
@click.pass_context
def openms_from_sdrf(
ctx, sdrf: str, raw: bool, onetable: bool, legacy: bool, verbose: bool, conditionsfromcolumns: str
ctx,
sdrf: str,
onetable: bool,
legacy: bool,
verbose: bool,
conditionsfromcolumns: str,
extension_convert: str,
):
if sdrf is None:
help()
try:
OpenMS().openms_convert(sdrf, raw, onetable, legacy, verbose, conditionsfromcolumns)
OpenMS().openms_convert(sdrf, onetable, legacy, verbose, conditionsfromcolumns, extension_convert)
except Exception as ex:
print("Error: " + str(ex))

Expand Down
4 changes: 2 additions & 2 deletions sdrf_pipelines/sdrf_merge/add_data_analysis_param.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def verify_content(pname, pvalue, ptype):
# exit("ERROR: " + pname + " needs to be a numeric value!!")
elif ptype == "class":
not_matching = [x for x in pvalue.split(",") if x not in p["value"]]
if not_matching != []:
if len(not_matching) != 0:
exit(
"ERROR: "
+ pname
Expand Down Expand Up @@ -98,7 +98,7 @@ def add_ptms(mods, pname, mod_columns):
modname = tmod[0]
modpos = tmod[1]
found = [x for x in unimod.modifications if modname == x.get_name()]
if found == []:
if len(found) == 0:
exit(
"ERROR: "
+ m
Expand Down
19 changes: 15 additions & 4 deletions sdrf_pipelines/tests/test_sdrfchecker.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,28 @@ def test_validate_srdf():
runner = CliRunner()
result = runner.invoke(cli, ["validate-sdrf", "--sdrf_file", "testdata/PXD000288.sdrf.tsv", "--check_ms"])

print(result.output)
assert "ERROR" not in result.output
print("validate sdrf " + result.output)


def test_convert_openms():
"""
:return:
"""
runner = CliRunner()
result = runner.invoke(cli, ["convert-openms", "-t2", "l", "-s", "testdata/sdrf.tsv"])
print("convert to openms" + result.output)
result = runner.invoke(cli, ["convert-openms", "-t2", "-s", "testdata/PXD000288.sdrf.tsv"])
print("convert to openms " + result.output)
assert "ERROR" not in result.output


def test_convert_openms_file_extensions():
"""
:return:
"""
runner = CliRunner()
result = runner.invoke(
cli, ["convert-openms", "-t2", "-s", "testdata/PXD000288.sdrf.tsv", "--extension_convert", "raw:mzML"]
)
print("convert to openms " + result.output)
assert "ERROR" not in result.output


Expand Down
18 changes: 18 additions & 0 deletions sdrf_pipelines/tests/test_unimod.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from sdrf_pipelines.openms.unimod import UnimodDatabase


def test_search_mods_by_accession():
unimod = UnimodDatabase()
ptm = unimod.get_by_accession("UNIMOD:21")
print(ptm.get_name())


def test_search_mods_by_keyword():
unimod = UnimodDatabase()
ptms = unimod.search_mods_by_keyword("Phospho")
for ptm in ptms:
print(ptm.to_str())


if __name__ == "__main__":
test_search_mods_by_keyword()
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def get_version(rel_path):
},
url="https://github.com/bigbio/sdrf-pipelines",
packages=find_packages(),
install_requires=["click", "pandas", "pandas_schema", "requests", "pytest", "pyyaml"],
install_requires=["click", "pandas", "pandas_schema", "requests", "pytest", "pyyaml", "defusedxml"],
entry_points={"console_scripts": ["parse_sdrf = sdrf_pipelines.parse_sdrf:main"]},
platforms=["any"],
classifiers=[
Expand Down

0 comments on commit 03d8e8b

Please sign in to comment.