seq-lang · inumanag · Dec 9, 2019 · Dec 9, 2019 · Dec 10, 2019 · Dec 10, 2019
@@ -0,0 +1,98 @@
+
+# TODO/ CAVEATS:
+# @jordan - crc32() needs to be implemented
+# def crc32(seq):
+#     """
+#     crc32(seq)
+
+#     Return the crc32 checksum for a sequence (string or Seq object).
+#     """
+#     return _crc32(seq)
+
+def _init_table_h() -> list[int]:
+    _table_h = list[int]()
+    for i in range(256):
+        part_l = i
+        part_h = 0
+        for j in range(8):
+            rflag = part_l & 1
+            part_l >>= 1
+            if part_h & 1:
+                part_l |= 1 << 31
+            part_h >>= 1
+            if rflag:
+                part_h ^= 0xD8000000
+        _table_h.append(part_h)
+    return _table_h
+
+#Initialisation
+_table_h = _init_table_h()
+
+def crc64(s) -> str:
+    """
+    crc64(s)
+
+    Return the crc64 checksum for a sequence (string or Seq object).
+
+    TODO/CAVEATS:
+    @jordan - return formatting crch and crcl as hex
+    """
+    crcl = 0
+    crch = 0
+    for c in s:
+        shr = (crch & 0xFF) << 24
+        temp1h = crch >> 8
+        temp1l = (crcl >> 8) | shr
+        idx = (crcl ^ ord(c)) & 0xFF
+        crch = temp1h ^ _table_h[idx]
+        crcl = temp1l
+
+    # need it in 08X return f"CRC-{crch}{crcl}" 
+    return f"CRC-{crch}{crcl}"
+    #"CRC-%08X%08X" % (crch, crcl)
+
+def gcg(s) -> int:
+    """
+    gcg(s)
+
+    Given a nucleotide or amino-acid secuence (or any string),
+    returns the GCG checksum (int). Checksum used by GCG program.
+    seq type = str.
+    """
+
+    index = checksum = 0
+
+    for char in s:
+        index += 1
+        checksum += index * ord(char.upper())
+        if index == 57:
+            index = 0
+    return checksum % 10000
+
+# TODO/CAVEATS:
+# @jordan - This still needs to be implemented
+#hashlib and base64??
+# def seguid(s) -> str:
+#     """
+#     seguid(seq: seq)
+
+#     Return the SEGUID (string) for a sequence (string or Seq object).
+
+#     Given a nucleotide or amino-acid secuence (or any string),
+#     returns the SEGUID string (A SEquence Globally Unique IDentifier).
+#     seq type = str.
+#     """
+#     import hashlib
+#     import base64
+
+#     m = hashlib.sha1()
+#     m.update(_as_bytes(s.upper()))
+#     try:
+#         # For Python 3+
+#         tmp = base64.encodebytes(m.digest())
+#         return tmp.decode().replace("\n", "").rstrip("=")
+#     except AttributeError:
+#         pass
+#     # For all other Pythons
+#     return base64.b64encode(m.digest()).rstrip("=")
+
@@ -0,0 +1,194 @@
+
+"""Methods for codon usage calculations."""
+
+import math
+from biopy.sequtils.codonusageindices import SharpEcoliIndex
+import bio
+#DONT HAVE YET from Bio import SeqIO
+
+# Turn black code style off
+# fmt: off
+
+CodonsDict = {
+    "TTT": 0, "TTC": 0, "TTA": 0, "TTG": 0,
+    "CTT": 0, "CTC": 0, "CTA": 0, "CTG": 0,
+    "ATT": 0, "ATC": 0, "ATA": 0, "ATG": 0,
+    "GTT": 0, "GTC": 0, "GTA": 0, "GTG": 0,
+    "TAT": 0, "TAC": 0, "TAA": 0, "TAG": 0,
+    "CAT": 0, "CAC": 0, "CAA": 0, "CAG": 0,
+    "AAT": 0, "AAC": 0, "AAA": 0, "AAG": 0,
+    "GAT": 0, "GAC": 0, "GAA": 0, "GAG": 0,
+    "TCT": 0, "TCC": 0, "TCA": 0, "TCG": 0,
+    "CCT": 0, "CCC": 0, "CCA": 0, "CCG": 0,
+    "ACT": 0, "ACC": 0, "ACA": 0, "ACG": 0,
+    "GCT": 0, "GCC": 0, "GCA": 0, "GCG": 0,
+    "TGT": 0, "TGC": 0, "TGA": 0, "TGG": 0,
+    "CGT": 0, "CGC": 0, "CGA": 0, "CGG": 0,
+    "AGT": 0, "AGC": 0, "AGA": 0, "AGG": 0,
+    "GGT": 0, "GGC": 0, "GGA": 0, "GGG": 0}
+
+# Turn black code style on
+# fmt: on
+
+# this dictionary shows which codons encode the same AA
+SynonymousCodons = {
+    "CYS": ["TGT", "TGC"],
+    "ASP": ["GAT", "GAC"],
+    "SER": ["TCT", "TCG", "TCA", "TCC", "AGC", "AGT"],
+    "GLN": ["CAA", "CAG"],
+    "MET": ["ATG"],
+    "ASN": ["AAC", "AAT"],
+    "PRO": ["CCT", "CCG", "CCA", "CCC"],
+    "LYS": ["AAG", "AAA"],
+    "STOP": ["TAG", "TGA", "TAA"],
+    "THR": ["ACC", "ACA", "ACG", "ACT"],
+    "PHE": ["TTT", "TTC"],
+    "ALA": ["GCA", "GCC", "GCG", "GCT"],
+    "GLY": ["GGT", "GGG", "GGA", "GGC"],
+    "ILE": ["ATC", "ATA", "ATT"],
+    "LEU": ["TTA", "TTG", "CTC", "CTT", "CTG", "CTA"],
+    "HIS": ["CAT", "CAC"],
+    "ARG": ["CGA", "CGC", "CGG", "CGT", "AGG", "AGA"],
+    "TRP": ["TGG"],
+    "VAL": ["GTA", "GTC", "GTG", "GTT"],
+    "GLU": ["GAG", "GAA"],
+    "TYR": ["TAT", "TAC"],
+}
+
+class CodonAdaptationIndex:
+    """
+    A codon adaptation index (CAI) implementation.
+
+    Implements the codon adaptation index (CAI) described by Sharp and
+    Li (Nucleic Acids Res. 1987 Feb 11;15(3):1281-95).
+
+    NOTE - This implementation does not currently cope with alternative genetic
+    codes: only the synonymous codons in the standard table are considered.
+    """
+    index: dict[str, float]
+    codon_count: dict[str, int]
+
+    def __init__(self: CodonAdaptationIndex):
+        self.index = dict[str, float]()
+        self.codon_count = dict[str, int]()
+
+    # use this method with predefined CAI index
+    def set_cai_index(self, index):
+        """
+        set_cai_index(self, index)
+
+        Set up an index to be used when calculating CAI for a gene.
+
+        Just pass a dictionary similar to the SharpEcoliIndex in the
+        CodonUsageIndices module.
+        """
+        self.index = index
+
+    def generate_index(self, fasta_file):
+        """
+        generate_index(self, fasta_file)
+
+        Generate a codon usage index from a FASTA file of CDS sequences.
+
+        Takes a location of a Fasta file containing CDS sequences
+        (which must all have a whole number of codons) and generates a codon
+        usage index.
+
+        RCSU values
+        """
+        # first make sure we're not overwriting an existing index:
+        if self.index != dict[str, float]() or self.codon_count != dict[str, int]():
+            raise ValueError(
+                "an index has already been set or a codon count "
+                "has been done. Cannot overwrite either."
+            )
+
+        # count codon occurrences in the file.
+        self._count_codons(fasta_file)
+
+        # now to calculate the index we first need to sum the number of times
+        # synonymous codons were used all together.
+        for aa in SynonymousCodons:
+            total = 0.0
+            # RCSU values are CodonCount/((1/num of synonymous codons) * sum of
+            # all synonymous codons)
+            rcsu = list[float]()
+            codons = SynonymousCodons[aa]
+
+            for codon in codons:
+                total += self.codon_count[codon]
+
+            # calculate the RSCU value for each of the codons
+            for codon in codons:
+                denominator = float(total) / len(codons)
+                rcsu.append(self.codon_count[codon] / denominator)
+
+            # now generate the index W=RCSUi/RCSUmax:
+            rcsu_max = max(rcsu)
+            for codon_index, codon in enumerate(codons):
+                self.index[codon] = rcsu[codon_index] / rcsu_max
+
+    def cai_for_gene(self, dna_sequence: str) -> float:
+        """
+        cai_for_gene(self, dna_sequence)
+
+        Calculate the CAI (float) for the provided DNA sequence (string).
+
+        This method uses the Index (either the one you set or the one you
+        generated) and returns the CAI for the DNA sequence.
+        """
+        cai_value, cai_length = 0.0, 0
+
+        # if no index is set or generated, the default SharpEcoliIndex will
+        # be used.
+        if self.index == dict[str, float]():
+            self.set_cai_index(SharpEcoliIndex)
+
+        if dna_sequence.islower():
+            dna_sequence = dna_sequence.upper()
+
+        for i in range(0, len(dna_sequence), 3):
+            codon = dna_sequence[i : i + 3]
+            if codon in self.index:
+                # these two codons are always one, exclude them:
+                if codon not in ["ATG", "TGG"]:
+                    cai_value += math.log(self.index[codon])
+                    cai_length += 1
+            # some indices may not include stop codons:
+            elif codon not in ["TGA", "TAA", "TAG"]:
+                raise TypeError(
+                    f"illegal codon in sequence: {codon}.\n{self.index}"
+                )
+
+        return math.exp(cai_value / (cai_length - 1.0))
+
+    def _count_codons(self, fasta_file):
+
+        # make the codon dictionary local
+        self.codon_count = CodonsDict.copy()
+
+        dna_sequence = ''
+
+        # iterate over sequence and count all the codons in the FastaFile.
+        for cur_record in bio.FASTA(fasta_file):
+            # make sure the sequence is lower case
+            if str(cur_record.seq).islower():
+                dna_sequence = str(cur_record.seq).upper()
+            else:
+                dna_sequence = str(cur_record.seq)
+            for i in range(0, len(dna_sequence), 3):
+                codon = dna_sequence[i : i + 3]
+                if codon in self.codon_count:
+                    self.codon_count[codon] += 1
+                else:
+                    raise TypeError(
+                        f"illegal codon {codon} in gene: {cur_record}" 
+                    )
+
+    def print_index(self):
+        """
+        Print out the index used.
+        This just gives the index when the objects is printed.
+        """
+        for i in sorted(self.index):
+            print(f"{i}\t{self.index[i]}")
@@ -0,0 +1,25 @@
+"""
+Codon adaption indxes, including Sharp and Li (1987) E. coli index.
+
+Currently this module only defines a single codon adaption index from
+Sharp & Li, Nucleic Acids Res. 1987.
+"""
+# Turn black code style off
+# fmt: off
+
+SharpEcoliIndex = {
+    "GCA": 0.586, "GCC": 0.122, "GCG": 0.424, "GCT": 1.0, "AGA": 0.004,
+    "AGG": 0.002, "CGA": 0.004, "CGC": 0.356, "CGG": 0.004, "CGT": 1.0, "AAC": 1.0,
+    "AAT": 0.051, "GAC": 1.0, "GAT": 0.434, "TGC": 1.0, "TGT": 0.5, "CAA": 0.124,
+    "CAG": 1.0, "GAA": 1.0, "GAG": 0.259, "GGA": 0.01, "GGC": 0.724, "GGG": 0.019,
+    "GGT": 1.0, "CAC": 1.0, "CAT": 0.291, "ATA": 0.003, "ATC": 1.0, "ATT": 0.185,
+    "CTA": 0.007, "CTC": 0.037, "CTG": 1.0, "CTT": 0.042, "TTA": 0.02,
+    "TTG": 0.02, "AAA": 1.0, "AAG": 0.253, "ATG": 1.0, "TTC": 1.0, "TTT": 0.296,
+    "CCA": 0.135, "CCC": 0.012, "CCG": 1.0, "CCT": 0.07, "AGC": 0.41,
+    "AGT": 0.085, "TCA": 0.077, "TCC": 0.744, "TCG": 0.017, "TCT": 1.0,
+    "ACA": 0.076, "ACC": 1.0, "ACG": 0.099, "ACT": 0.965, "TGG": 1.0, "TAC": 1.0,
+    "TAT": 0.239, "GTA": 0.495, "GTC": 0.066, "GTG": 0.221, "GTT": 1.0
+}
+
+# Turn black code style on
+# fmt: on
@@ -0,0 +1,145 @@
+
+"""Local Composition Complexity."""
+
+import math
+
+def lcc_mult(s, wsize: int):
+    """
+    lcc_mult(s, wsize)
+
+    Calculate Local Composition Complexity (LCC) values over sliding window.
+
+    Returns a list of floats, the LCC values for a sliding window over
+    the sequence.
+    """
+    l2 = math.log(2.0)
+    tamseq = len(s)
+
+    upper = s.upper()
+
+    compone = [0.0]
+    lccsal = [0.0]
+    for i in range(wsize):
+        compone.append(
+            ((i + 1) / float(wsize)) * ((math.log((i + 1) / float(wsize))) / l2)
+        )
+    window = s[0:wsize]
+    cant_a = window.count("A")
+    cant_c = window.count("C")
+    cant_t = window.count("T")
+    cant_g = window.count("G")
+    term_a = compone[cant_a]
+    term_c = compone[cant_c]
+    term_t = compone[cant_t]
+    term_g = compone[cant_g]
+    lccsal.append(-(term_a + term_c + term_t + term_g))
+    tail = s[0]
+    for x in range(tamseq - wsize):
+        window = upper[x + 1 : wsize + x + 1]
+        if tail == window[-1]:
+            lccsal.append(lccsal[-1])
+        elif tail == "A":
+            cant_a -= 1
+            if window.endswith("C"):
+                cant_c += 1
+                term_a = compone[cant_a]
+                term_c = compone[cant_c]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+            elif window.endswith("T"):
+                cant_t += 1
+                term_a = compone[cant_a]
+                term_t = compone[cant_t]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+            elif window.endswith("G"):
+                cant_g += 1
+                term_a = compone[cant_a]
+                term_g = compone[cant_g]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+        elif tail == "C":
+            cant_c -= 1
+            if window.endswith("A"):
+                cant_a += 1
+                term_a = compone[cant_a]
+                term_c = compone[cant_c]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+            elif window.endswith("T"):
+                cant_t += 1
+                term_c = compone[cant_c]
+                term_t = compone[cant_t]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+            elif window.endswith("G"):
+                cant_g += 1
+                term_c = compone[cant_c]
+                term_g = compone[cant_g]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+        elif tail == "T":
+            cant_t -= 1
+            if window.endswith("A"):
+                cant_a += 1
+                term_a = compone[cant_a]
+                term_t = compone[cant_t]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+            elif window.endswith("C"):
+                cant_c += 1
+                term_c = compone[cant_c]
+                term_t = compone[cant_t]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+            elif window.endswith("G"):
+                cant_g += 1
+                term_t = compone[cant_t]
+                term_g = compone[cant_g]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+        elif tail == "G":
+            cant_g -= 1
+            if window.endswith("A"):
+                cant_a += 1
+                term_a = compone[cant_a]
+                term_g = compone[cant_g]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+            elif window.endswith("C"):
+                cant_c += 1
+                term_c = compone[cant_c]
+                term_g = compone[cant_g]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+            elif window.endswith("T"):
+                cant_t += 1
+                term_t = compone[cant_t]
+                term_g = compone[cant_g]
+                lccsal.append(-(term_a + term_c + term_t + term_g))
+        tail = window[0]
+    return lccsal
+
+
+def lcc_simp(s):
+    """
+    lcc_simp(s)
+
+    Calculate Local Composition Complexity (LCC) for a sequence.
+    """
+    wsize = len(s)
+    term_a = 0.0
+    term_c = 0.0
+    term_t = 0.0
+    term_g = 0.0
+
+
+    upper = s.upper()
+
+    l2 = math.log(2.0)
+    if "A" in s:
+        term_a = ((upper.count("A")) / float(wsize)) * (
+            (math.log((upper.count("A")) / float(wsize))) / l2
+        )
+    if "C" in s:
+        term_c = ((upper.count("C")) / float(wsize)) * (
+            (math.log((upper.count("C")) / float(wsize))) / l2
+        )
+    if "T" in s:
+        term_t = ((upper.count("T")) / float(wsize)) * (
+            (math.log((upper.count("T")) / float(wsize))) / l2
+        )
+    if "G" in s:
+        term_g = ((upper.count("G")) / float(wsize)) * (
+            (math.log((upper.count("G")) / float(wsize))) / l2
+        )
+    return -(term_a + term_c + term_t + term_g)
@@ -0,0 +1,154 @@
+
+"""Indices to be used with ProtParam."""
+
+# Turn black code style off
+# fmt: off
+
+# Kyte & Doolittle index of hydrophobicity
+# J. Mol. Biol. 157:105-132(1982).
+kd = {"A": 1.8, "R": -4.5, "N": -3.5, "D": -3.5, "C": 2.5,
+      "Q": -3.5, "E": -3.5, "G": -0.4, "H": -3.2, "I": 4.5,
+      "L": 3.8, "K": -3.9, "M": 1.9, "F": 2.8, "P": -1.6,
+      "S": -0.8, "T": -0.7, "W": -0.9, "Y": -1.3, "V": 4.2}
+
+# Flexibility
+# Normalized flexibility parameters (B-values), average
+# Vihinen M., Torkkila E., Riikonen P. Proteins. 19(2):141-9(1994).
+Flex = {"A": 0.984, "C": 0.906, "E": 1.094, "D": 1.068,
+        "G": 1.031, "F": 0.915, "I": 0.927, "H": 0.950,
+        "K": 1.102, "M": 0.952, "L": 0.935, "N": 1.048,
+        "Q": 1.037, "P": 1.049, "S": 1.046, "R": 1.008,
+        "T": 0.997, "W": 0.904, "V": 0.931, "Y": 0.929}
+
+# Hydrophilicity
+# 1 Hopp & Wood
+# Proc. Natl. Acad. Sci. U.S.A. 78:3824-3828(1981).
+hw = {"A": -0.5, "R": 3.0, "N": 0.2, "D": 3.0, "C": -1.0,
+      "Q": 0.2, "E": 3.0, "G": 0.0, "H": -0.5, "I": -1.8,
+      "L": -1.8, "K": 3.0, "M": -1.3, "F": -2.5, "P": 0.0,
+      "S": 0.3, "T": -0.4, "W": -3.4, "Y": -2.3, "V": -1.5}
+
+# Surface accessibility
+# Vergoten G & Theophanides T, Biomolecular Structure and Dynamics,
+# pg.138 (1997).
+# 1 Emini Surface fractional probability
+em = {"A": 0.815, "R": 1.475, "N": 1.296, "D": 1.283, "C": 0.394,
+      "Q": 1.348, "E": 1.445, "G": 0.714, "H": 1.180, "I": 0.603,
+      "L": 0.603, "K": 1.545, "M": 0.714, "F": 0.695, "P": 1.236,
+      "S": 1.115, "T": 1.184, "W": 0.808, "Y": 1.089, "V": 0.606}
+
+# 2 Janin Interior to surface transfer energy scale
+ja = {"A": 0.28, "R": -1.14, "N": -0.55, "D": -0.52, "C": 0.97,
+      "Q": -0.69, "E": -1.01, "G": 0.43, "H": -0.31, "I": 0.60,
+      "L": 0.60, "K": -1.62, "M": 0.43, "F": 0.46, "P": -0.42,
+      "S": -0.19, "T": -0.32, "W": 0.29, "Y": -0.15, "V": 0.60}
+
+
+# A two dimensional dictionary for calculating the instability index.
+# Guruprasad K., Reddy B.V.B., Pandit M.W. Protein Engineering 4:155-161(1990).
+# It is based on dipeptide values; therefore, the value for the dipeptide DG
+# is DIWV['D']['G'].
+DIWV = {"A": {"A": 1.0, "C": 44.94, "E": 1.0, "D": -7.49,
+              "G": 1.0, "F": 1.0, "I": 1.0, "H": -7.49,
+              "K": 1.0, "M": 1.0, "L": 1.0, "N": 1.0,
+              "Q": 1.0, "P": 20.26, "S": 1.0, "R": 1.0,
+              "T": 1.0, "W": 1.0, "V": 1.0, "Y": 1.0},
+        "C": {"A": 1.0, "C": 1.0, "E": 1.0, "D": 20.26,
+              "G": 1.0, "F": 1.0, "I": 1.0, "H": 33.60,
+              "K": 1.0, "M": 33.60, "L": 20.26, "N": 1.0,
+              "Q": -6.54, "P": 20.26, "S": 1.0, "R": 1.0,
+              "T": 33.60, "W": 24.68, "V": -6.54, "Y": 1.0},
+        "E": {"A": 1.0, "C": 44.94, "E": 33.60, "D": 20.26,
+              "G": 1.0, "F": 1.0, "I": 20.26, "H": -6.54,
+              "K": 1.0, "M": 1.0, "L": 1.0, "N": 1.0,
+              "Q": 20.26, "P": 20.26, "S": 20.26, "R": 1.0,
+              "T": 1.0, "W": -14.03, "V": 1.0, "Y": 1.0},
+        "D": {"A": 1.0, "C": 1.0, "E": 1.0, "D": 1.0,
+              "G": 1.0, "F": -6.54, "I": 1.0, "H": 1.0,
+              "K": -7.49, "M": 1.0, "L": 1.0, "N": 1.0,
+              "Q": 1.0, "P": 1.0, "S": 20.26, "R": -6.54,
+              "T": -14.03, "W": 1.0, "V": 1.0, "Y": 1.0},
+        "G": {"A": -7.49, "C": 1.0, "E": -6.54, "D": 1.0,
+              "G": 13.34, "F": 1.0, "I": -7.49, "H": 1.0,
+              "K": -7.49, "M": 1.0, "L": 1.0, "N": -7.49,
+              "Q": 1.0, "P": 1.0, "S": 1.0, "R": 1.0,
+              "T": -7.49, "W": 13.34, "V": 1.0, "Y": -7.49},
+        "F": {"A": 1.0, "C": 1.0, "E": 1.0, "D": 13.34,
+              "G": 1.0, "F": 1.0, "I": 1.0, "H": 1.0,
+              "K": -14.03, "M": 1.0, "L": 1.0, "N": 1.0,
+              "Q": 1.0, "P": 20.26, "S": 1.0, "R": 1.0,
+              "T": 1.0, "W": 1.0, "V": 1.0, "Y": 33.601},
+        "I": {"A": 1.0, "C": 1.0, "E": 44.94, "D": 1.0,
+              "G": 1.0, "F": 1.0, "I": 1.0, "H": 13.34,
+              "K": -7.49, "M": 1.0, "L": 20.26, "N": 1.0,
+              "Q": 1.0, "P": -1.88, "S": 1.0, "R": 1.0,
+              "T": 1.0, "W": 1.0, "V": -7.49, "Y": 1.0},
+        "H": {"A": 1.0, "C": 1.0, "E": 1.0, "D": 1.0,
+              "G": -9.37, "F": -9.37, "I": 44.94, "H": 1.0,
+              "K": 24.68, "M": 1.0, "L": 1.0, "N": 24.68,
+              "Q": 1.0, "P": -1.88, "S": 1.0, "R": 1.0,
+              "T": -6.54, "W": -1.88, "V": 1.0, "Y": 44.94},
+        "K": {"A": 1.0, "C": 1.0, "E": 1.0, "D": 1.0,
+              "G": -7.49, "F": 1.0, "I": -7.49, "H": 1.0,
+              "K": 1.0, "M": 33.60, "L": -7.49, "N": 1.0,
+              "Q": 24.64, "P": -6.54, "S": 1.0, "R": 33.60,
+              "T": 1.0, "W": 1.0, "V": -7.49, "Y": 1.0},
+        "M": {"A": 13.34, "C": 1.0, "E": 1.0, "D": 1.0,
+              "G": 1.0, "F": 1.0, "I": 1.0, "H": 58.28,
+              "K": 1.0, "M": -1.88, "L": 1.0, "N": 1.0,
+              "Q": -6.54, "P": 44.94, "S": 44.94, "R": -6.54,
+              "T": -1.88, "W": 1.0, "V": 1.0, "Y": 24.68},
+        "L": {"A": 1.0, "C": 1.0, "E": 1.0, "D": 1.0,
+              "G": 1.0, "F": 1.0, "I": 1.0, "H": 1.0,
+              "K": -7.49, "M": 1.0, "L": 1.0, "N": 1.0,
+              "Q": 33.60, "P": 20.26, "S": 1.0, "R": 20.26,
+              "T": 1.0, "W": 24.68, "V": 1.0, "Y": 1.0},
+        "N": {"A": 1.0, "C": -1.88, "E": 1.0, "D": 1.0,
+              "G": -14.03, "F": -14.03, "I": 44.94, "H": 1.0,
+              "K": 24.68, "M": 1.0, "L": 1.0, "N": 1.0,
+              "Q": -6.54, "P": -1.88, "S": 1.0, "R": 1.0,
+              "T": -7.49, "W": -9.37, "V": 1.0, "Y": 1.0},
+        "Q": {"A": 1.0, "C": -6.54, "E": 20.26, "D": 20.26,
+              "G": 1.0, "F": -6.54, "I": 1.0, "H": 1.0,
+              "K": 1.0, "M": 1.0, "L": 1.0, "N": 1.0,
+              "Q": 20.26, "P": 20.26, "S": 44.94, "R": 1.0,
+              "T": 1.0, "W": 1.0, "V": -6.54, "Y": -6.54},
+        "P": {"A": 20.26, "C": -6.54, "E": 18.38, "D": -6.54,
+              "G": 1.0, "F": 20.26, "I": 1.0, "H": 1.0,
+              "K": 1.0, "M": -6.54, "L": 1.0, "N": 1.0,
+              "Q": 20.26, "P": 20.26, "S": 20.26, "R": -6.54,
+              "T": 1.0, "W": -1.88, "V": 20.26, "Y": 1.0},
+        "S": {"A": 1.0, "C": 33.60, "E": 20.26, "D": 1.0,
+              "G": 1.0, "F": 1.0, "I": 1.0, "H": 1.0,
+              "K": 1.0, "M": 1.0, "L": 1.0, "N": 1.0,
+              "Q": 20.26, "P": 44.94, "S": 20.26, "R": 20.26,
+              "T": 1.0, "W": 1.0, "V": 1.0, "Y": 1.0},
+        "R": {"A": 1.0, "C": 1.0, "E": 1.0, "D": 1.0,
+              "G": -7.49, "F": 1.0, "I": 1.0, "H": 20.26,
+              "K": 1.0, "M": 1.0, "L": 1.0, "N": 13.34,
+              "Q": 20.26, "P": 20.26, "S": 44.94, "R": 58.28,
+              "T": 1.0, "W": 58.28, "V": 1.0, "Y": -6.54},
+        "T": {"A": 1.0, "C": 1.0, "E": 20.26, "D": 1.0,
+              "G": -7.49, "F": 13.34, "I": 1.0, "H": 1.0,
+              "K": 1.0, "M": 1.0, "L": 1.0, "N": -14.03,
+              "Q": -6.54, "P": 1.0, "S": 1.0, "R": 1.0,
+              "T": 1.0, "W": -14.03, "V": 1.0, "Y": 1.0},
+        "W": {"A": -14.03, "C": 1.0, "E": 1.0, "D": 1.0,
+              "G": -9.37, "F": 1.0, "I": 1.0, "H": 24.68,
+              "K": 1.0, "M": 24.68, "L": 13.34, "N": 13.34,
+              "Q": 1.0, "P": 1.0, "S": 1.0, "R": 1.0,
+              "T": -14.03, "W": 1.0, "V": -7.49, "Y": 1.0},
+        "V": {"A": 1.0, "C": 1.0, "E": 1.0, "D": -14.03,
+              "G": -7.49, "F": 1.0, "I": 1.0, "H": 1.0,
+              "K": -1.88, "M": 1.0, "L": 1.0, "N": 1.0,
+              "Q": 1.0, "P": 20.26, "S": 1.0, "R": 1.0,
+              "T": -7.49, "W": 1.0, "V": 1.0, "Y": -6.54},
+        "Y": {"A": 24.68, "C": 1.0, "E": -6.54, "D": 24.68,
+              "G": -7.49, "F": 1.0, "I": 1.0, "H": 13.34,
+              "K": 1.0, "M": 44.94, "L": 1.0, "N": 1.0,
+              "Q": 1.0, "P": 13.34, "S": 1.0, "R": -15.91,
+              "T": -7.49, "W": -9.37, "V": 1.0, "Y": 13.34},
+        }
+
+# Turn black code style on
+# fmt: on
@@ -149,6 +149,32 @@ class StopIteration:
     def message(self: StopIteration):
         return self._hdr.msg
 
+class AttributeError:
+    _hdr: ExcHeader
+
+    def __init__(self: AttributeError):
+        self._hdr = ('AttributeError', '', '', '', 0, 0)
+
+    def __init__(self: AttributeError, message: str):
+        self._hdr = ('AttributeError', message, '', '', 0, 0)
+
+    @property
+    def message(self: AttributeError):
+        return self._hdr.msg
+
+class RuntimeError:
+    _hdr: ExcHeader
+
+    def __init__(self: RuntimeError):
+        self._hdr = ('RuntimeError', '', '', '', 0, 0)
+
+    def __init__(self: RuntimeError, message: str):
+        self._hdr = ('RuntimeError', message, '', '', 0, 0)
+
+    @property
+    def message(self: RuntimeError):
+        return self._hdr.msg
+
 def check_errno(prefix: str):
     msg = _C.seq_check_errno()
     if msg:

@@ -0,0 +1,122 @@
+from Bio.SeqFeature import FeatureLocation, Position
+from Bio.SeqFeature import CompoundLocation, UnknownPosition, SeqFeature
+from Bio.SeqFeature import WithinPosition, BetweenPosition
+
+
+####### Test FeatureLocation #######
+@test
+def test_eq_identical():
+    """
+    Test two identical locations are equal.
+    """
+    loc1 = FeatureLocation(23, 42, 1, '', '')
+    loc2 = FeatureLocation(23, 42, 1, '', '')   
+    assert loc1 == loc2
+
+    loc1 = FeatureLocation(23, 42, -1, '', '')
+    loc2 = FeatureLocation(23, 42, -1, '', '')
+    assert loc1 == loc2
+
+    loc1 = FeatureLocation(23, 42, -1, '', '')
+    loc2 = FeatureLocation(23, 42, -1, '', '')
+    assert loc1 == loc2
+
+    loc1 = FeatureLocation(Position(23, 1, 0), Position(42, 2, 0), 1, '', '')
+    loc2 = FeatureLocation(23, 42, 1, '', '')
+    assert loc1 == loc2
+
+    loc1 = FeatureLocation(23, 42, 1, "foo", "bar")
+    loc2 = FeatureLocation(23, 42, 1, "foo", "bar")
+    assert loc1 == loc2
+test_eq_identical()
+
+@test
+def test_eq_not_identical():
+    """
+    Test two different locations are not equal.
+    """
+    loc1 = FeatureLocation(22, 42, 1, '', '')
+    loc2 = FeatureLocation(23, 42, 1, '', '')
+    assert loc1 != loc2
+
+    loc1 = FeatureLocation(23, 42, 1, '', '')
+    loc2 = FeatureLocation(23, 43, 1, '', '')
+    assert loc1 != loc2
+
+    loc1 = FeatureLocation(23, 42, 1, '', '')
+    loc2 = FeatureLocation(23, 42, -1, '', '')
+    assert loc1 != loc2
+
+    loc1 = FeatureLocation(23, 42, 1, 'foo', '')
+    loc2 = FeatureLocation(23, 42, 1, 'bar', '')
+    assert loc1 != loc2
+
+    loc1 = FeatureLocation(23, 42, 1, 'foo', 'bar')
+    loc2 = FeatureLocation(23, 42, 1, 'foo', 'baz')
+    assert loc1 != loc2
+test_eq_not_identical()
+
+@test
+# not finished need to have raise error with start > end
+def test_start_before_end():
+    expected = "must be greater than or equal to start location"
+    print FeatureLocation(42, 23, 1, '', '')
+
+test_start_before_end()
+
+####### Test CompoundLocation #######
+
+@test
+def test_eq_identical1():
+    loc1 = FeatureLocation(12, 17, 1, '', '') + FeatureLocation(23, 42, 1, '', '')
+    loc2 = FeatureLocation(12, 17, 1, '', '') + FeatureLocation(23, 42, 1, '', '')
+    assert loc1 == loc2
+
+    loc1 = FeatureLocation(12, 17, 1, '', '') + FeatureLocation(23, 42, 1, '', '')
+    loc2 = CompoundLocation([FeatureLocation(12, 17, 1, '', ''), FeatureLocation(23, 42, 1, '', '')], 'join')
+    assert loc1 == loc2
+test_eq_identical1()
+
+@test
+def test_eq_not_identical1():
+    loc1 = FeatureLocation(12, 17, 1, '', '') + FeatureLocation(23, 42, 1, '', '')
+    loc2 = FeatureLocation(12, 17, 1, '', '') + FeatureLocation(23, 42, 1, '', '') + FeatureLocation(50, 60, 1, '', '')
+    assert loc1 != loc2
+
+    loc1 = FeatureLocation(12, 17, 1, '', '') + FeatureLocation(23, 42, 1, '', '')
+    loc2 = FeatureLocation(12, 17, -1, '', '') + FeatureLocation(23, 42, -1, '', '')
+    assert loc1 != loc2
+
+    loc1 = CompoundLocation([FeatureLocation(12, 17, 1, '', ''), FeatureLocation(23, 42, 1, '', '')], "join")
+    loc2 = CompoundLocation([FeatureLocation(12, 17, 1, '', ''), FeatureLocation(23, 42, 1, '', '')], 'order')
+    assert loc1 != loc2
+test_eq_not_identical1()  
+
+####### Test Locations #######
+
+@test
+# Not finished
+def test_fuzzy():
+    exact_pos = Position(5, 0, 0)
+    within_pos_s = WithinPosition(10, left=10, right=13)
+    within_pos_e = WithinPosition(13, left=10, right=13)
+    between_pos_e = BetweenPosition(24, 20, 24)
+    before_pos = Position(15, 1, 0)
+    after_pos = Position(40, 2, 0)
+
+    assert int(within_pos_s) == 10
+    assert str(within_pos_s) == "(10.13)"
+    assert int(within_pos_e) == 13
+    assert str(within_pos_e) == "(10.13)"
+    assert int(between_pos_e) == 24
+    assert str(between_pos_e) == "(20^24)"
+    assert str(before_pos) == "<15"
+    assert str(after_pos) == ">40"
+
+    # TODO/CAVEATS:
+    # FeatureLocation does not support these types for now.
+    # location1 = FeatureLocation(exact_pos, within_pos_e, 1, '', '')
+    # location2 = FeatureLocation(before_pos, between_pos_e)
+    # location3 = FeatureLocation(within_pos_s, after_pos)
+test_fuzzy()
+
@@ -0,0 +1,23 @@
+from biopy.sequtils.checksum import crc64, gcg
+
+str_light_chain_one =  ("QSALTQPASVSGSPGQSITISCTGTSSDVGSYNLVSWYQQ"
+                        "HPGKAPKLMIYEGSKRPSGVSNRFSGSKSGNTASLTISGL"
+                        "QAEDEADYYCSSYAGSSTLVFGGGTKLTVL")
+
+str_light_chain_two =  ("QSALTQPASVSGSPGQSITISCTGTSSDVGSYNLVSWYQQ"
+                        "HPGKAPKLMIYEGSKRPSGVSNRFSGSKSGNTASLTISGL"
+                        "QAEDEADYYCCSYAGSSTWVFGGGTKLTVL")
+
+@test
+def test_checksum():
+    num1 = crc64(str_light_chain_one)
+    num2 = crc64(str_light_chain_two)
+    assert num1 == num2
+test_checksum()
+
+@test
+def test_gcg():
+    num1 = gcg(str_light_chain_one)
+    num2 = gcg(str_light_chain_two)
+    assert num1 != num2
+test_gcg()
@@ -0,0 +1,21 @@
+from biopy.sequtils.codonusage import CodonAdaptationIndex
+import math
+
+X = CodonAdaptationIndex()
+path = "/Users/jordanwatson1/seq/test/stdlib/biopy_test/sequtils_test/scratch.fasta"
+
+X.generate_index(path)
+
+@test
+def cai_for_gene():
+    PRECISION = 1e-6
+
+    # Test Codon Adaptation Index (CAI) using default E. coli data.
+    CAI = CodonAdaptationIndex()
+    value = CAI.cai_for_gene("ATGCGTATCGATCGCGATACGATTAGGCGGATG")
+    assert math.fabs(value - 0.09978) < PRECISION
+cai_for_gene()
+
+def print_index():
+    X.print_index()
+# print_index()
@@ -0,0 +1,25 @@
+from biopy.sequtils.lcc import lcc_simp, lcc_mult
+import math
+
+str_light_chain_one =  ("QSALTQPASVSGSPGQSITISCTGTSSDVGSYNLVSWYQQ"
+                        "HPGKAPKLMIYEGSKRPSGVSNRFSGSKSGNTASLTISGL"
+                        "QAEDEADYYCSSYAGSSTLVFGGGTKLTVL")
+
+exp_simple_LCC = 1.03
+exp_window_LCC = (0.00, 1.00, 0.96, 0.96, 0.96, 0.65, 0.43, 0.35, 0.35, 0.35, 0.35, 0.53, 0.59, 0.26)
+
+@test
+def lcc_simp_test():
+    PRECISION = 1e2
+    assert math.fabs(lcc_simp(str_light_chain_one) - exp_simple_LCC) < PRECISION
+lcc_simp_test()
+
+@test
+def lcc_mult_test():
+    PRECISION = 1e2
+    values = lcc_mult(str_light_chain_one, 20)
+    assert len(values) == len(exp_window_LCC)
+
+    for val1, val2 in zip(values, exp_window_LCC):
+        assert math.fabs(val1 - val2) < PRECISION
+lcc_mult_test()