Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/aptanet_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": null,
"id": "3737da88",
"metadata": {},
"outputs": [],
Expand All @@ -49,7 +49,7 @@
"import torch\n",
"\n",
"from pyaptamer.datasets import load_1gnh_structure\n",
"from pyaptamer.utils.struct_to_aaseq import struct_to_aaseq"
"from pyaptamer.utils import struct_to_aaseq"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion pyaptamer/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from pyaptamer.datasets._loaders._one_gnh import load_1gnh_structure
from pyaptamer.datasets._loaders._online_databank import load_from_rcsb
from pyaptamer.datasets._loaders._pfoa_loader import load_pfoa_structure
from pyaptamer.datasets._loaders._pfoa import load_pfoa_structure

__all__ = [
"load_pfoa_structure",
Expand Down
2 changes: 1 addition & 1 deletion pyaptamer/datasets/_loaders/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Loaders for different data structures."""

from pyaptamer.datasets._loaders._one_gnh import load_1gnh_structure
from pyaptamer.datasets._loaders._pfoa_loader import load_pfoa_structure
from pyaptamer.datasets._loaders._pfoa import load_pfoa_structure

__all__ = ["load_pfoa_structure", "load_1gnh_structure"]
2 changes: 1 addition & 1 deletion pyaptamer/datasets/_loaders/_online_databank.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from Bio.PDB import PDBList

from pyaptamer.utils.pdb_to_struct import pdb_to_struct
from pyaptamer.utils import pdb_to_struct


def load_from_rcsb(pdb_id, overwrite=False):
Expand Down
6 changes: 4 additions & 2 deletions pyaptamer/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
"rna2vec",
"pdb_to_struct",
"struct_to_aaseq",
"pdb_to_aaseq",
]

from pyaptamer.utils._pdb_to_aaseq import pdb_to_aaseq
from pyaptamer.utils._pdb_to_struct import pdb_to_struct
from pyaptamer.utils._rna import (
dna2rna,
encode_rna,
generate_all_aptamer_triplets,
rna2vec,
)
from pyaptamer.utils.pdb_to_struct import pdb_to_struct
from pyaptamer.utils.struct_to_aaseq import struct_to_aaseq
from pyaptamer.utils._struct_to_aaseq import struct_to_aaseq
49 changes: 49 additions & 0 deletions pyaptamer/utils/_pdb_to_aaseq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
__author__ = "satvshr"
__all__ = ["pdb_to_aaseq"]

import os

import pandas as pd
from Bio import SeqIO


def pdb_to_aaseq(pdb_file_path, return_type="list"):
"""
Extract amino-acid sequences (SEQRES) from a PDB file.

Parameters
----------
pdb_file_path : str or os.PathLike
Path to a PDB file.
return_type : {'list', 'pd.df'}, optional, default='list'
Format of the returned value:

- ``'list'`` : return a Python list of sequence strings (one per chain).
- ``'pd.df'`` : return a pandas.DataFrame indexed by chain id with a single
column ``'sequence'`` containing one-letter amino-acid strings.

Returns
-------
list of str or pandas.DataFrame
Depending on ``return_type``. If ``'list'``, returns a list of sequence
strings (one per SEQRES chain). If ``'pd.df'``, returns a DataFrame
where the index is the chain identifier when present (index name ``'chain'``)
and the column ``'sequence'`` contains the sequences. If no SEQRES records
are found, returns an empty list or empty DataFrame respectively.
"""
pdb_path = os.fspath(pdb_file_path)
sequences = []
chains = []

with open(pdb_path) as handle:
for record in SeqIO.parse(handle, "pdb-seqres"):
sequences.append(str(record.seq))
chains.append(getattr(record, "id", None))

if return_type == "list":
return sequences

if return_type == "pd.df":
df = pd.DataFrame({"chain": chains, "sequence": sequences})
df = df.set_index("chain")
return df
File renamed without changes.
File renamed without changes.
32 changes: 32 additions & 0 deletions pyaptamer/utils/tests/test_pdb_to_aaseq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
__author__ = "satvshr"

import os

from pyaptamer.utils import pdb_to_aaseq


def test_pdb_to_aaseq():
"""
Test that `pdb_to_aaseq` converts a PDB file path into a non-empty string
containing alphabetic characters.
"""
pdb_file_path = os.path.join(
os.path.dirname(__file__), "..", "..", "datasets", "data", "1gnh.pdb"
)
sequences = pdb_to_aaseq(pdb_file_path)

assert isinstance(sequences, list), "pdb_to_aaseq should return a list"
assert len(sequences) > 0, "Returned list should not be empty"

for seq in sequences:
assert isinstance(seq, str), "Each entry should be a string"
assert len(seq) > 0, "Each sequence string should not be empty"

sequences = pdb_to_aaseq(pdb_file_path, return_type="pd.df")

assert not sequences.empty, "Returned DataFrame should not be empty"
assert "sequence" in sequences.columns, "DataFrame should have a 'sequence' column"

for seq in sequences["sequence"]:
assert isinstance(seq, str), "Each entry should be a string"
assert len(seq) > 0, "Each sequence string should not be empty"
2 changes: 1 addition & 1 deletion pyaptamer/utils/tests/test_pdb_to_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import os

from pyaptamer.utils.pdb_to_struct import pdb_to_struct
from pyaptamer.utils import pdb_to_struct


def test_pdb_to_struct():
Expand Down
2 changes: 1 addition & 1 deletion pyaptamer/utils/tests/test_struct_to_aaseq.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
__author__ = "satvshr"

from pyaptamer.datasets import load_1gnh_structure
from pyaptamer.utils.struct_to_aaseq import struct_to_aaseq
from pyaptamer.utils import struct_to_aaseq


def test_struct_to_aaseq():
Expand Down
Loading