Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 70 additions & 24 deletions gene_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"""
YOUR HEADER COMMENT HERE

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In future mini projects, please leave a header comment that describes what this code is about. It's going to help you a lot when the code base gets large.


@author: YOUR NAME HERE
@author:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please write your name. It's like writing your name for any other quizzes or projects.


"""

Expand Down Expand Up @@ -30,8 +30,15 @@ def get_complement(nucleotide):
>>> get_complement('C')
'G'
"""
# TODO: implement this
pass

if nucleotide == "A":
return "T"
if nucleotide == "T":
return "A"
if nucleotide == "C":
return "G"
if nucleotide == "G":
return "C"


def get_reverse_complement(dna):
Expand All @@ -45,8 +52,15 @@ def get_reverse_complement(dna):
>>> get_reverse_complement("CCGCGTTCA")
'TGAACGCGG'
"""
# TODO: implement this
pass

c_dna =[]
for letter in dna:
n_dna = get_complement(letter)
c_dna.append(n_dna)

complementary_dna = "".join(c_dna)

return complementary_dna[ : :-1]


def rest_of_ORF(dna):
Expand All @@ -62,9 +76,14 @@ def rest_of_ORF(dna):
>>> rest_of_ORF("ATGAGATAGG")
'ATGAGA'
"""
# TODO: implement this
pass

stop_codons = ["TAG", "TAA","TGA"]
for i in range(0,len(dna),3):
codon = dna[i:i+3]
for stop in stop_codons:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can do if codon in stop_codons:

if (codon == stop):
return dna [:i]
return dna

def find_all_ORFs_oneframe(dna):
""" Finds all non-nested open reading frames in the given DNA
Expand All @@ -79,8 +98,19 @@ def find_all_ORFs_oneframe(dna):
>>> find_all_ORFs_oneframe("ATGCATGAATGTAGATAGATGTGCCC")
['ATGCATGAATGTAGA', 'ATGTGCCC']
"""
# TODO: implement this
pass
stop_codons = ['TAA', 'TAG', 'TGA']
start_codon = 'ATG'
orfs = []
i =0

while i < len(dna):
codon = dna[i:i+3]
if (codon == start_codon):
orf = rest_of_ORF(dna[i:])
orfs.append(orf)
i = i + len(orf)
i = i +3
return orfs


def find_all_ORFs(dna):
Expand All @@ -96,8 +126,7 @@ def find_all_ORFs(dna):
>>> find_all_ORFs("ATGCATGAATGTAG")
['ATGCATGAATGTAG', 'ATGAATGTAG', 'ATG']
"""
# TODO: implement this
pass
return [i for j in range(0,3) for i in find_all_ORFs_oneframe(dna[j:])]

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice usage of list comprehension! 👍



def find_all_ORFs_both_strands(dna):
Expand All @@ -109,8 +138,7 @@ def find_all_ORFs_both_strands(dna):
>>> find_all_ORFs_both_strands("ATGCGAATGTAGCATCAAA")
['ATGCGAATG', 'ATGCTACATTCGCAT']
"""
# TODO: implement this
pass
return find_all_ORFs(dna) + find_all_ORFs(get_reverse_complement(dna))


def longest_ORF(dna):
Expand All @@ -119,9 +147,11 @@ def longest_ORF(dna):
>>> longest_ORF("ATGCGAATGTAGCATCAAA")
'ATGCTACATTCGCAT'
"""
# TODO: implement this
pass

longest = ''
for orf in find_all_ORFs_both_strands(dna):
if len(orf) > len(longest):
longest = orf
return longest

def longest_ORF_noncoding(dna, num_trials):
""" Computes the maximum length of the longest ORF over num_trials shuffles
Expand All @@ -130,8 +160,12 @@ def longest_ORF_noncoding(dna, num_trials):
dna: a DNA sequence
num_trials: the number of random shuffles
returns: the maximum length longest ORF """
# TODO: implement this
pass
res = 0
for i in range(num_trials):
working_dna = shuffle_string(dna)
if len(longest_ORF(working_dna)) > res:
res = len(longest_ORF(working_dna))
return res


def coding_strand_to_AA(dna):
Expand All @@ -148,8 +182,10 @@ def coding_strand_to_AA(dna):
>>> coding_strand_to_AA("ATGCCCGCTTT")
'MPA'
"""
# TODO: implement this
pass
acid = ''
for i in range(0,int(len(dna)/3)):
acid = acid + aa_table[dna[3*i:3*(i+1)]]
return acid


def gene_finder(dna):
Expand All @@ -158,9 +194,19 @@ def gene_finder(dna):
dna: a DNA sequence
returns: a list of all amino acid sequences coded by the sequence dna.
"""
# TODO: implement this
pass
threshold = longest_ORF_noncoding(dna,1500)
orfs = find_all_ORFs_both_strands(dna)
genes = []
for orf in orfs:
if len(orf) > threshold:
genes.append(coding_strand_to_AA(orf))
return genes


if __name__ == "__main__":
import doctest
doctest.testmod()
# import doctest
# # doctest.testmod()

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please remove comments for final code submission

# doctest.run_docstring_examples(coding_strand_to_AA,globals(), verbose=True)

dna = load_seq("./data/X73525.fa")
print(gene_finder(dna))