Skip to content

Chemical Formula & Minor Edits #31

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 9 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
## [Unreleased]
### Added
- `equality` operator overload
- `chemical_formula` attribute
## [0.1] - 2024-11-27
### Added
- `MeltingTemperature` enum
Expand Down
49 changes: 48 additions & 1 deletion opr/functions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# -*- coding: utf-8 -*-
"""OPR functions."""
from .params import A_WEIGHT, T_WEIGHT, C_WEIGHT, G_WEIGHT, ANHYDROUS_MOLECULAR_WEIGHT_CONSTANT
from .params import A_WEIGHT, T_WEIGHT, C_WEIGHT, G_WEIGHT
from .params import ANHYDROUS_MOLECULAR_WEIGHT_CONSTANT
from .params import CHEMICAL_FORMULA_FORMAT, CHEMICAL_FORMULA_FORMAT_SHORT
from .params import CHEMICAL_FORMULA_BASES, CHEMICAL_FORMULA_WATER, CHEMICAL_FORMULA_PHOSPHODIESTER


def molecular_weight_calc(sequence):
Expand Down Expand Up @@ -36,3 +39,47 @@ def basic_melting_temperature_calc(sequence):
else:
melting_temperature = 64.9 + 41 * ((g_count + c_count - 16.4) / (a_count + t_count + g_count + c_count))
return melting_temperature


def gc_content_calc(sequence):
"""
Calculate gc content.

:param sequence: primer nucleotides sequence
:type sequence: str
:return: gc content as float
"""
gc_count = sequence.count('G') + sequence.count('C')
return gc_count / len(sequence)


def chemical_formula_calc(sequence):
"""
Calculate the chemical formula.

:param sequence: primer nucleotides sequence
:type sequence: str
:return: chemical formula as dict
"""
count_mapping = {
'A': sequence.count('A'),
'T': sequence.count('T'),
'C': sequence.count('C'),
'G': sequence.count('G'),
}
n = len(sequence)

carbon_count = sum([count_mapping[x] * y['C'] for x, y in CHEMICAL_FORMULA_BASES.items()])
hydrogen_count = sum([count_mapping[x] * y['H'] for x, y in CHEMICAL_FORMULA_BASES.items()])
nitrogen_count = sum([count_mapping[x] * y['N'] for x, y in CHEMICAL_FORMULA_BASES.items()])
oxygen_count = sum([count_mapping[x] * y['O'] for x, y in CHEMICAL_FORMULA_BASES.items()])
# A water is removed from the formula for each phosphodiester bond
hydrogen_count -= (n - 1) * CHEMICAL_FORMULA_WATER['H']
hydrogen_count += (n - 1) * CHEMICAL_FORMULA_PHOSPHODIESTER['H']
oxygen_count -= (n - 1) * CHEMICAL_FORMULA_WATER['O']
oxygen_count += (n - 1) * CHEMICAL_FORMULA_PHOSPHODIESTER['O']
phosphor_count = (n - 1) * CHEMICAL_FORMULA_PHOSPHODIESTER['P']

if len(sequence) == 1:
return CHEMICAL_FORMULA_FORMAT_SHORT.format(carbon_count, hydrogen_count, nitrogen_count, oxygen_count)
return CHEMICAL_FORMULA_FORMAT.format(carbon_count, hydrogen_count, nitrogen_count, oxygen_count, phosphor_count)
28 changes: 26 additions & 2 deletions opr/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,28 @@
OPR_VERSION = "0.1"
VALID_BASES = set('ATCG')
DNA_COMPLEMENT_MAP = {"A": "T", "C": "G", "G": "C", "T": "A"}
CHEMICAL_FORMULA_BASES = {
'A': {
'C': 10, 'H': 15, 'N': 5, 'O': 4,
},
'T': {
'C': 10, 'H': 16, 'N': 2, 'O': 6,
},
'C': {
'C': 9, 'H': 15, 'N': 3, 'O': 5,
},
'G': {
'C': 10, 'H': 15, 'N': 5, 'O': 5,
},
}
CHEMICAL_FORMULA_WATER = {
'H': 2, 'O': 1,
}
CHEMICAL_FORMULA_PHOSPHODIESTER = {
'H': 3, 'O': 4, 'P': 1,
}
CHEMICAL_FORMULA_FORMAT = "C{0}H{1}N{2}O{3}P{4}"
CHEMICAL_FORMULA_FORMAT_SHORT = "C{0}H{1}N{2}O{3}"

PRIMER_LOWER_LENGTH = 18
PRIMER_HIGHEST_LENGTH = 30
Expand All @@ -16,9 +38,11 @@
ANHYDROUS_MOLECULAR_WEIGHT_CONSTANT = 61.96

PRIMER_SEQUENCE_TYPE_ERROR = "Primer sequence should be a string variable."
PRIMER_SEQUENCE_LENGTH_WARNING = "The recommended range for primer length is between 18 and 30."
PRIMER_SEQUENCE_LENGTH_WARNING = "The recommended range for primer length is between {0} and {1}.".format(
PRIMER_LOWER_LENGTH, PRIMER_HIGHEST_LENGTH)
PRIMER_SEQUENCE_VALID_BASES_ERROR = "Primer sequence should only contain the nucleotide bases A, T, C, and G."
PRIMER_SEQUENCE_VALID_GC_CONTENT_RANGE_WARNING = "The recommended range for GC content is between 30% and 80%."
PRIMER_SEQUENCE_VALID_GC_CONTENT_RANGE_WARNING = "The recommended range for GC content is between {0}% and {1}%.".format(
PRIMER_LOWEST_GC_RANGE * 100, PRIMER_HIGHEST_GC_RANGE * 100)
PRIMER_READ_ONLY_ATTRIBUTE_ERROR = "This attribute is read-only."
PRIMER_NOT_REMOVABLE_ATTRIBUTE_ERROR = "This attribute is not removable."

Expand Down
37 changes: 31 additions & 6 deletions opr/primer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,17 @@
from warnings import warn
from .errors import OPRBaseError
from .params import VALID_BASES
from .params import PRIMER_SEQUENCE_TYPE_ERROR, PRIMER_SEQUENCE_LENGTH_WARNING, PRIMER_SEQUENCE_VALID_BASES_ERROR, PRIMER_SEQUENCE_VALID_GC_CONTENT_RANGE_WARNING
from .params import PRIMER_LOWER_LENGTH, PRIMER_HIGHEST_LENGTH, PRIMER_LOWEST_GC_RANGE, PRIMER_HIGHEST_GC_RANGE
from .params import PRIMER_SEQUENCE_TYPE_ERROR, PRIMER_SEQUENCE_VALID_BASES_ERROR
from .params import PRIMER_READ_ONLY_ATTRIBUTE_ERROR, PRIMER_NOT_REMOVABLE_ATTRIBUTE_ERROR
from .params import DNA_COMPLEMENT_MAP
from .params import PRIMER_ADDITION_ERROR, PRIMER_MULTIPLICATION_ERROR
from .params import PRIMER_MELTING_TEMPERATURE_NOT_IMPLEMENTED_ERROR
from .params import PRIMER_SEQUENCE_LENGTH_WARNING, PRIMER_SEQUENCE_VALID_GC_CONTENT_RANGE_WARNING
from .params import PRIMER_LOWER_LENGTH, PRIMER_HIGHEST_LENGTH
from .params import PRIMER_LOWEST_GC_RANGE, PRIMER_HIGHEST_GC_RANGE
from .params import DNA_COMPLEMENT_MAP

from .functions import molecular_weight_calc, basic_melting_temperature_calc
from .functions import gc_content_calc, chemical_formula_calc


class MeltingTemperature(Enum):
Expand Down Expand Up @@ -45,6 +49,7 @@ def __init__(self, sequence):
MeltingTemperature.SALT_ADJUSTED: None,
MeltingTemperature.NEAREST_NEIGHBOR: None,
}
self._chemical_formula = None

def __len__(self):
"""
Expand Down Expand Up @@ -182,9 +187,9 @@ def gc_content(self):

:return: gc content
"""
if self._gc_content is None:
gc_count = self._sequence.count('G') + self._sequence.count('C')
self._gc_content = gc_count / len(self._sequence)
if self._gc_content is not None:
return self._gc_content
self._gc_content = gc_content_calc(self._sequence)
if self._gc_content < PRIMER_LOWEST_GC_RANGE or self._gc_content > PRIMER_HIGHEST_GC_RANGE:
warn(PRIMER_SEQUENCE_VALID_GC_CONTENT_RANGE_WARNING, RuntimeWarning)
return self._gc_content
Expand All @@ -196,6 +201,26 @@ def gc_content(self, _):
@gc_content.deleter
def gc_content(self, _):
raise OPRBaseError(PRIMER_NOT_REMOVABLE_ATTRIBUTE_ERROR)

@property
def chemical_formula(self):
"""
Calculate the chemical formula.

:return: chemical formula
"""
if self._chemical_formula is not None:
return self._chemical_formula
self._chemical_formula = chemical_formula_calc(self._sequence)
return self._chemical_formula

@chemical_formula.setter
def chemical_formula(self, _):
raise OPRBaseError(PRIMER_READ_ONLY_ATTRIBUTE_ERROR)

@chemical_formula.deleter
def chemical_formula(self, _):
raise OPRBaseError(PRIMER_NOT_REMOVABLE_ATTRIBUTE_ERROR)

def melting_temperature(self, method=MeltingTemperature.BASIC):
"""
Expand Down
6 changes: 6 additions & 0 deletions tests/test_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ def test_complement_3(): #Reference: https://www.qiagen.com/us/applications/enzy
oprimer.complement(inplace=True)
assert oprimer.sequence == "TAGCCGATTTAGCCGATT"


def test_chemical_formula(): #Reference: https://atdbio.com/tools/oligo-calculator
oprimer = Primer("ATCGGCTAAATCGGCTAA")
assert oprimer.chemical_formula == "C176H221N70O104P17"


def test_length():
oprimer = Primer("ATCGGCTAAATCGGCTAA")
assert len(oprimer) == 18
Expand Down
Loading