-
Notifications
You must be signed in to change notification settings - Fork 18
Gene Finder #8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Gene Finder #8
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,7 +2,7 @@ | |
| """ | ||
| YOUR HEADER COMMENT HERE | ||
|
|
||
| @author: YOUR NAME HERE | ||
| @author: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please write your name. It's like writing your name for any other quizzes or projects. |
||
|
|
||
| """ | ||
|
|
||
|
|
@@ -30,8 +30,15 @@ def get_complement(nucleotide): | |
| >>> get_complement('C') | ||
| 'G' | ||
| """ | ||
| # TODO: implement this | ||
| pass | ||
|
|
||
| if nucleotide == "A": | ||
| return "T" | ||
| if nucleotide == "T": | ||
| return "A" | ||
| if nucleotide == "C": | ||
| return "G" | ||
| if nucleotide == "G": | ||
| return "C" | ||
|
|
||
|
|
||
| def get_reverse_complement(dna): | ||
|
|
@@ -45,8 +52,15 @@ def get_reverse_complement(dna): | |
| >>> get_reverse_complement("CCGCGTTCA") | ||
| 'TGAACGCGG' | ||
| """ | ||
| # TODO: implement this | ||
| pass | ||
|
|
||
| c_dna =[] | ||
| for letter in dna: | ||
| n_dna = get_complement(letter) | ||
| c_dna.append(n_dna) | ||
|
|
||
| complementary_dna = "".join(c_dna) | ||
|
|
||
| return complementary_dna[ : :-1] | ||
|
|
||
|
|
||
| def rest_of_ORF(dna): | ||
|
|
@@ -62,9 +76,14 @@ def rest_of_ORF(dna): | |
| >>> rest_of_ORF("ATGAGATAGG") | ||
| 'ATGAGA' | ||
| """ | ||
| # TODO: implement this | ||
| pass | ||
|
|
||
| stop_codons = ["TAG", "TAA","TGA"] | ||
| for i in range(0,len(dna),3): | ||
| codon = dna[i:i+3] | ||
| for stop in stop_codons: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can do |
||
| if (codon == stop): | ||
| return dna [:i] | ||
| return dna | ||
|
|
||
| def find_all_ORFs_oneframe(dna): | ||
| """ Finds all non-nested open reading frames in the given DNA | ||
|
|
@@ -79,8 +98,19 @@ def find_all_ORFs_oneframe(dna): | |
| >>> find_all_ORFs_oneframe("ATGCATGAATGTAGATAGATGTGCCC") | ||
| ['ATGCATGAATGTAGA', 'ATGTGCCC'] | ||
| """ | ||
| # TODO: implement this | ||
| pass | ||
| stop_codons = ['TAA', 'TAG', 'TGA'] | ||
| start_codon = 'ATG' | ||
| orfs = [] | ||
| i =0 | ||
|
|
||
| while i < len(dna): | ||
| codon = dna[i:i+3] | ||
| if (codon == start_codon): | ||
| orf = rest_of_ORF(dna[i:]) | ||
| orfs.append(orf) | ||
| i = i + len(orf) | ||
| i = i +3 | ||
| return orfs | ||
|
|
||
|
|
||
| def find_all_ORFs(dna): | ||
|
|
@@ -96,8 +126,7 @@ def find_all_ORFs(dna): | |
| >>> find_all_ORFs("ATGCATGAATGTAG") | ||
| ['ATGCATGAATGTAG', 'ATGAATGTAG', 'ATG'] | ||
| """ | ||
| # TODO: implement this | ||
| pass | ||
| return [i for j in range(0,3) for i in find_all_ORFs_oneframe(dna[j:])] | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice usage of list comprehension! 👍 |
||
|
|
||
|
|
||
| def find_all_ORFs_both_strands(dna): | ||
|
|
@@ -109,8 +138,7 @@ def find_all_ORFs_both_strands(dna): | |
| >>> find_all_ORFs_both_strands("ATGCGAATGTAGCATCAAA") | ||
| ['ATGCGAATG', 'ATGCTACATTCGCAT'] | ||
| """ | ||
| # TODO: implement this | ||
| pass | ||
| return find_all_ORFs(dna) + find_all_ORFs(get_reverse_complement(dna)) | ||
|
|
||
|
|
||
| def longest_ORF(dna): | ||
|
|
@@ -119,9 +147,11 @@ def longest_ORF(dna): | |
| >>> longest_ORF("ATGCGAATGTAGCATCAAA") | ||
| 'ATGCTACATTCGCAT' | ||
| """ | ||
| # TODO: implement this | ||
| pass | ||
|
|
||
| longest = '' | ||
| for orf in find_all_ORFs_both_strands(dna): | ||
| if len(orf) > len(longest): | ||
| longest = orf | ||
| return longest | ||
|
|
||
| def longest_ORF_noncoding(dna, num_trials): | ||
| """ Computes the maximum length of the longest ORF over num_trials shuffles | ||
|
|
@@ -130,8 +160,12 @@ def longest_ORF_noncoding(dna, num_trials): | |
| dna: a DNA sequence | ||
| num_trials: the number of random shuffles | ||
| returns: the maximum length longest ORF """ | ||
| # TODO: implement this | ||
| pass | ||
| res = 0 | ||
| for i in range(num_trials): | ||
| working_dna = shuffle_string(dna) | ||
| if len(longest_ORF(working_dna)) > res: | ||
| res = len(longest_ORF(working_dna)) | ||
| return res | ||
|
|
||
|
|
||
| def coding_strand_to_AA(dna): | ||
|
|
@@ -148,8 +182,10 @@ def coding_strand_to_AA(dna): | |
| >>> coding_strand_to_AA("ATGCCCGCTTT") | ||
| 'MPA' | ||
| """ | ||
| # TODO: implement this | ||
| pass | ||
| acid = '' | ||
| for i in range(0,int(len(dna)/3)): | ||
| acid = acid + aa_table[dna[3*i:3*(i+1)]] | ||
| return acid | ||
|
|
||
|
|
||
| def gene_finder(dna): | ||
|
|
@@ -158,9 +194,19 @@ def gene_finder(dna): | |
| dna: a DNA sequence | ||
| returns: a list of all amino acid sequences coded by the sequence dna. | ||
| """ | ||
| # TODO: implement this | ||
| pass | ||
| threshold = longest_ORF_noncoding(dna,1500) | ||
| orfs = find_all_ORFs_both_strands(dna) | ||
| genes = [] | ||
| for orf in orfs: | ||
| if len(orf) > threshold: | ||
| genes.append(coding_strand_to_AA(orf)) | ||
| return genes | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| import doctest | ||
| doctest.testmod() | ||
| # import doctest | ||
| # # doctest.testmod() | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please remove comments for final code submission |
||
| # doctest.run_docstring_examples(coding_strand_to_AA,globals(), verbose=True) | ||
|
|
||
| dna = load_seq("./data/X73525.fa") | ||
| print(gene_finder(dna)) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In future mini projects, please leave a header comment that describes what this code is about. It's going to help you a lot when the code base gets large.