File tree 3 files changed +58
-0
lines changed 3 files changed +58
-0
lines changed Original file line number Diff line number Diff line change
1
+ ## Mutating DNA sequence with NumPy
2
+
3
+ Create a NumPy character array representing a DNA sequence.
4
+ Use then advanced indexing and NumPy random module for making random
5
+ mutations to DNA sequence:
6
+
7
+ * Choose N random mutation sites using ` numpy.random.random_integers() `
8
+ (a single site can selected multiple times)
9
+ * Create N random letters presenting the mutations
10
+ * Use advanced indexing for creating mutated sequence
11
+
12
+ The end results should be something like:
13
+
14
+ ```
15
+ Original DNA: ATGCTACAGT
16
+ Mutated DNA: AGGCTACAGA
17
+ ```
18
+
19
+ You can start from the provided skeleton code [ skeleton.py] ( skeleton.py ) .
Original file line number Diff line number Diff line change
1
+ import numpy as np
2
+
3
+ # Generate N element long random character array from given alphabet
4
+ def generate_string (N , alphabet = 'ACGT' ):
5
+ base = np .array (alphabet , dtype = 'c' )
6
+ # Draw N random samples from alphabet
7
+ dna = np .random .choice (base , N )
8
+ return dna
9
+
10
+ dna = generate_string (20 )
11
+ print ("Original DNA" , dna .tobytes ().decode ())
12
+
13
+ # TODO
14
+ # Use numpy.random.random_integers for selecting N mutation sites
15
+ # Utilise then numpy.random.choice for generating the mutations
16
+ # and use advanced indexing for creating mutated DNA
Original file line number Diff line number Diff line change
1
+ import numpy as np
2
+
3
+ # Generate N element long random character array from given alphabet
4
+ def generate_string (N , alphabet = 'ACGT' ):
5
+ base = np .array (alphabet , dtype = 'c' )
6
+ # Draw N random samples from alphabet
7
+ dna = np .random .choice (base , N )
8
+ return dna
9
+
10
+ # Perform N random mutations to DNA string
11
+ def mutate (dna , N ):
12
+ mutated = dna .copy ()
13
+ mutation_sites = np .random .random_integers (0 , dna .size - 1 , size = N )
14
+ base = np .array ('ACGT' , dtype = 'c' )
15
+ new_bases = np .random .choice (base , N )
16
+ mutated [mutation_sites ] = new_bases
17
+ return mutated
18
+
19
+ dna = generate_string (20 )
20
+ dna_mutated = mutate (dna , 5 )
21
+ print ("Original DNA:" , dna .tobytes ().decode ())
22
+ print ("Mutated DNA:" , dna_mutated .tobytes ().decode ())
23
+ print ("Similarity " , np .sum (dna == dna_mutated ) / float (dna .size ))
You can’t perform that action at this time.
0 commit comments