Add DNA mutation exercise

jussienko · jussienko · commit d31d4269278a · 2018-04-11T11:14:53.000+03:00
diff --git a/numpy/dna-mutation/README.md b/numpy/dna-mutation/README.md
@@ -0,0 +1,19 @@
+## Mutating DNA sequence with NumPy
+
+Create a NumPy character array representing a DNA sequence.
+Use then advanced indexing and NumPy random module for making random
+mutations to DNA sequence:
+
+ * Choose N random mutation sites using `numpy.random.random_integers()`
+   (a single site can selected multiple times)
+ * Create N random letters presenting the mutations
+ * Use advanced indexing for creating mutated sequence
+
+The end results should be something like:
+
+```
+Original DNA: ATGCTACAGT
+Mutated  DNA: AGGCTACAGA
+```
+
+You can start from the provided skeleton code [skeleton.py](skeleton.py).
diff --git a/numpy/dna-mutation/skeleton.py b/numpy/dna-mutation/skeleton.py
@@ -0,0 +1,16 @@
+import numpy as np
+
+# Generate N element long random character array from given alphabet
+def generate_string(N, alphabet='ACGT'):
+    base = np.array(alphabet, dtype='c')
+    # Draw N random samples from alphabet
+    dna = np.random.choice(base, N)
+    return dna
+
+dna = generate_string(20)
+print("Original DNA", dna.tobytes().decode())
+
+# TODO
+# Use numpy.random.random_integers for selecting N mutation sites
+# Utilise then numpy.random.choice for generating the mutations
+# and use advanced indexing for creating mutated DNA
diff --git a/numpy/dna-mutation/solution/mutate.py b/numpy/dna-mutation/solution/mutate.py
@@ -0,0 +1,23 @@
+import numpy as np
+
+# Generate N element long random character array from given alphabet
+def generate_string(N, alphabet='ACGT'):
+    base = np.array(alphabet, dtype='c')
+    # Draw N random samples from alphabet
+    dna = np.random.choice(base, N)
+    return dna
+
+# Perform N random mutations to DNA string
+def mutate(dna, N):
+    mutated = dna.copy()
+    mutation_sites = np.random.random_integers(0, dna.size - 1, size=N)
+    base = np.array('ACGT', dtype='c')
+    new_bases = np.random.choice(base, N)
+    mutated[mutation_sites] = new_bases
+    return mutated
+
+dna = generate_string(20)
+dna_mutated = mutate(dna, 5)
+print("Original DNA:", dna.tobytes().decode())
+print("Mutated  DNA:", dna_mutated.tobytes().decode())
+print("Similarity ", np.sum(dna == dna_mutated) / float(dna.size))