diff --git a/ch2/filteringdata.py b/ch2/filteringdata.py
index 8ff2afe..ba45a8f 100644
--- a/ch2/filteringdata.py
+++ b/ch2/filteringdata.py
@@ -8,23 +8,50 @@
 
 from math import sqrt
 
-users = {"Angelica": {"Blues Traveler": 3.5, "Broken Bells": 2.0, "Norah Jones": 4.5, "Phoenix": 5.0, "Slightly Stoopid": 1.5, "The Strokes": 2.5, "Vampire Weekend": 2.0},
-         "Bill":{"Blues Traveler": 2.0, "Broken Bells": 3.5, "Deadmau5": 4.0, "Phoenix": 2.0, "Slightly Stoopid": 3.5, "Vampire Weekend": 3.0},
-         "Chan": {"Blues Traveler": 5.0, "Broken Bells": 1.0, "Deadmau5": 1.0, "Norah Jones": 3.0, "Phoenix": 5, "Slightly Stoopid": 1.0},
-         "Dan": {"Blues Traveler": 3.0, "Broken Bells": 4.0, "Deadmau5": 4.5, "Phoenix": 3.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 2.0},
-         "Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0, "The Strokes": 4.0, "Vampire Weekend": 1.0},
-         "Jordyn":  {"Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0, "Phoenix": 5.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 4.0},
-         "Sam": {"Blues Traveler": 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0, "Phoenix": 5.0, "Slightly Stoopid": 4.0, "The Strokes": 5.0},
-         "Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0, "Slightly Stoopid": 2.5, "The Strokes": 3.0}
-        }
-
+users = {
+    "Angelica": {
+        "Blues Traveler": 3.5, "Broken Bells": 2.0, "Norah Jones": 4.5,
+        "Phoenix": 5.0, "Slightly Stoopid": 1.5, "The Strokes": 2.5,
+        "Vampire Weekend": 2.0
+    },
+    "Bill": {
+        "Blues Traveler": 2.0, "Broken Bells": 3.5, "Deadmau5": 4.0,
+        "Phoenix": 2.0, "Slightly Stoopid": 3.5, "Vampire Weekend": 3.0
+    },
+    "Chan": {
+        "Blues Traveler": 5.0, "Broken Bells": 1.0, "Deadmau5": 1.0,
+        "Norah Jones": 3.0, "Phoenix": 5, "Slightly Stoopid": 1.0
+    },
+    "Dan": {
+        "Blues Traveler": 3.0, "Broken Bells": 4.0, "Deadmau5": 4.5,
+        "Phoenix": 3.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0,
+        "Vampire Weekend": 2.0
+    },
+    "Hailey": {
+        "Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0,
+        "The Strokes": 4.0, "Vampire Weekend": 1.0
+    },
+    "Jordyn": {
+        "Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0,
+        "Phoenix": 5.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0,
+        "Vampire Weekend": 4.0
+    },
+    "Sam": {
+        "Blues Traveler": 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0,
+        "Phoenix": 5.0, "Slightly Stoopid": 4.0, "The Strokes": 5.0
+    },
+    "Veronica": {
+        "Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0,
+        "Slightly Stoopid": 2.5, "The Strokes": 3.0
+    }
+}
 
 
 def manhattan(rating1, rating2):
     """Computes the Manhattan distance. Both rating1 and rating2 are dictionaries
        of the form {'The Strokes': 3.0, 'Slightly Stoopid': 2.5}"""
     distance = 0
-    commonRatings = False 
+    commonRatings = False
     for key in rating1:
         if key in rating2:
             distance += abs(rating1[key] - rating2[key])
@@ -32,7 +59,7 @@ def manhattan(rating1, rating2):
     if commonRatings:
         return distance
     else:
-        return -1 #Indicates no ratings in common
+        return -1  # Indicates no ratings in common
 
 
 def computeNearestNeighbor(username, users):
@@ -46,6 +73,7 @@ def computeNearestNeighbor(username, users):
     distances.sort()
     return distances
 
+
 def recommend(username, users):
     """Give list of recommendations"""
     # first find nearest neighbor
@@ -56,12 +84,16 @@ def recommend(username, users):
     neighborRatings = users[nearest]
     userRatings = users[username]
     for artist in neighborRatings:
-        if not artist in userRatings:
+        if artist not in userRatings:
             recommendations.append((artist, neighborRatings[artist]))
     # using the fn sorted for variety - sort is more efficient
-    return sorted(recommendations, key=lambda artistTuple: artistTuple[1], reverse = True)
+    return sorted(
+        recommendations,
+        key=lambda artistTuple: artistTuple[1],
+        reverse=True
+    )
 
 # examples - uncomment to run
 
-print( recommend('Hailey', users))
-#print( recommend('Chan', users))
+print(recommend('Hailey', users))
+# print( recommend('Chan', users))
diff --git a/ch2/filteringdataPearson.py b/ch2/filteringdataPearson.py
index 68a0f2b..6b03b45 100644
--- a/ch2/filteringdataPearson.py
+++ b/ch2/filteringdataPearson.py
@@ -8,16 +8,43 @@
 
 from math import sqrt
 
-users = {"Angelica": {"Blues Traveler": 3.5, "Broken Bells": 2.0, "Norah Jones": 4.5, "Phoenix": 5.0, "Slightly Stoopid": 1.5, "The Strokes": 2.5, "Vampire Weekend": 2.0},
-         "Bill":{"Blues Traveler": 2.0, "Broken Bells": 3.5, "Deadmau5": 4.0, "Phoenix": 2.0, "Slightly Stoopid": 3.5, "Vampire Weekend": 3.0},
-         "Chan": {"Blues Traveler": 5.0, "Broken Bells": 1.0, "Deadmau5": 1.0, "Norah Jones": 3.0, "Phoenix": 5, "Slightly Stoopid": 1.0},
-         "Dan": {"Blues Traveler": 3.0, "Broken Bells": 4.0, "Deadmau5": 4.5, "Phoenix": 3.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 2.0},
-         "Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0, "The Strokes": 4.0, "Vampire Weekend": 1.0},
-         "Jordyn":  {"Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0, "Phoenix": 5.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 4.0},
-         "Sam": {"Blues Traveler": 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0, "Phoenix": 5.0, "Slightly Stoopid": 4.0, "The Strokes": 5.0},
-         "Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0, "Slightly Stoopid": 2.5, "The Strokes": 3.0}
-        }
-
+users = {
+    "Angelica": {
+        "Blues Traveler": 3.5, "Broken Bells": 2.0, "Norah Jones": 4.5,
+        "Phoenix": 5.0, "Slightly Stoopid": 1.5, "The Strokes": 2.5,
+        "Vampire Weekend": 2.0
+    },
+    "Bill": {
+        "Blues Traveler": 2.0, "Broken Bells": 3.5, "Deadmau5": 4.0,
+        "Phoenix": 2.0, "Slightly Stoopid": 3.5, "Vampire Weekend": 3.0
+    },
+    "Chan": {
+        "Blues Traveler": 5.0, "Broken Bells": 1.0, "Deadmau5": 1.0,
+        "Norah Jones": 3.0, "Phoenix": 5, "Slightly Stoopid": 1.0
+    },
+    "Dan": {
+        "Blues Traveler": 3.0, "Broken Bells": 4.0, "Deadmau5": 4.5,
+        "Phoenix": 3.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0,
+        "Vampire Weekend": 2.0
+    },
+    "Hailey": {
+        "Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0,
+        "The Strokes": 4.0, "Vampire Weekend": 1.0
+    },
+    "Jordyn": {
+        "Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0,
+        "Phoenix": 5.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0,
+        "Vampire Weekend": 4.0
+    },
+    "Sam": {
+        "Blues Traveler": 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0,
+        "Phoenix": 5.0, "Slightly Stoopid": 4.0, "The Strokes": 5.0
+    },
+    "Veronica": {
+        "Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0,
+        "Slightly Stoopid": 2.5, "The Strokes": 3.0
+    }
+}
 
 
 def manhattan(rating1, rating2):
@@ -32,8 +59,7 @@ def manhattan(rating1, rating2):
     if total > 0:
         return distance / total
     else:
-        return -1 #Indicates no ratings in common
-
+        return -1  # Indicates no ratings in common
 
 
 def pearson(rating1, rating2):
@@ -54,12 +80,16 @@ def pearson(rating1, rating2):
             sum_x2 += pow(x, 2)
             sum_y2 += pow(y, 2)
     # now compute denominator
-    denominator = sqrt(sum_x2 - pow(sum_x, 2) / n) * sqrt(sum_y2 - pow(sum_y, 2) / n)
+    denominator = (
+        sqrt(sum_x2 - pow(sum_x, 2) / n) *
+        sqrt(sum_y2 - pow(sum_y, 2) / n)
+    )
+
     if denominator == 0:
         return 0
     else:
         return (sum_xy - (sum_x * sum_y) / n) / denominator
-            
+
 
 def computeNearestNeighbor(username, users):
     """creates a sorted list of users based on their distance to username"""
@@ -72,6 +102,7 @@ def computeNearestNeighbor(username, users):
     distances.sort()
     return distances
 
+
 def recommend(username, users):
     """Give list of recommendations"""
     # first find nearest neighbor
@@ -82,8 +113,11 @@ def recommend(username, users):
     neighborRatings = users[nearest]
     userRatings = users[username]
     for artist in neighborRatings:
-        if not artist in userRatings:
+        if artist not in userRatings:
             recommendations.append((artist, neighborRatings[artist]))
     # using the fn sorted for variety - sort is more efficient
-    return sorted(recommendations, key=lambda artistTuple: artistTuple[1], reverse = True)
-
+    return sorted(
+        recommendations,
+        key=lambda artistTuple: artistTuple[1],
+        reverse=True
+    )
diff --git a/ch2/recommender.py b/ch2/recommender.py
index 4c38f2a..48bd974 100644
--- a/ch2/recommender.py
+++ b/ch2/recommender.py
@@ -1,42 +1,41 @@
-import codecs 
+import codecs
 from math import sqrt
 
 users = {"Angelica": {"Blues Traveler": 3.5, "Broken Bells": 2.0,
                       "Norah Jones": 4.5, "Phoenix": 5.0,
                       "Slightly Stoopid": 1.5,
                       "The Strokes": 2.5, "Vampire Weekend": 2.0},
-         
-         "Bill":{"Blues Traveler": 2.0, "Broken Bells": 3.5,
-                 "Deadmau5": 4.0, "Phoenix": 2.0,
-                 "Slightly Stoopid": 3.5, "Vampire Weekend": 3.0},
-         
+
+         "Bill": {"Blues Traveler": 2.0, "Broken Bells": 3.5,
+                  "Deadmau5": 4.0, "Phoenix": 2.0,
+                  "Slightly Stoopid": 3.5, "Vampire Weekend": 3.0},
+
          "Chan": {"Blues Traveler": 5.0, "Broken Bells": 1.0,
                   "Deadmau5": 1.0, "Norah Jones": 3.0, "Phoenix": 5,
                   "Slightly Stoopid": 1.0},
-         
+
          "Dan": {"Blues Traveler": 3.0, "Broken Bells": 4.0,
                  "Deadmau5": 4.5, "Phoenix": 3.0,
                  "Slightly Stoopid": 4.5, "The Strokes": 4.0,
                  "Vampire Weekend": 2.0},
-         
+
          "Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0,
                     "Norah Jones": 4.0, "The Strokes": 4.0,
                     "Vampire Weekend": 1.0},
-         
+
          "Jordyn":  {"Broken Bells": 4.5, "Deadmau5": 4.0,
                      "Norah Jones": 5.0, "Phoenix": 5.0,
                      "Slightly Stoopid": 4.5, "The Strokes": 4.0,
                      "Vampire Weekend": 4.0},
-         
+
          "Sam": {"Blues Traveler": 5.0, "Broken Bells": 2.0,
                  "Norah Jones": 3.0, "Phoenix": 5.0,
                  "Slightly Stoopid": 4.0, "The Strokes": 5.0},
-         
+
          "Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0,
                       "Phoenix": 4.0, "Slightly Stoopid": 2.5,
                       "The Strokes": 3.0}
-        }
-
+         }
 
 
 class recommender:
@@ -71,7 +70,6 @@ def convertProductID2name(self, id):
         else:
             return id
 
-
     def userRatings(self, id, n):
         """Return n top ratings for user with id"""
         print ("Ratings for " + self.userid2name[id])
@@ -82,13 +80,10 @@ def userRatings(self, id, n):
                    for (k, v) in ratings]
         # finally sort and return
         ratings.sort(key=lambda artistTuple: artistTuple[1],
-                     reverse = True)
+                     reverse=True)
         ratings = ratings[:n]
         for rating in ratings:
             print("%s\t%i" % (rating[0], rating[1]))
-        
-
-        
 
     def loadBookDB(self, path=''):
         """loads the BX book dataset. Path is where the BX files are
@@ -101,7 +96,7 @@ def loadBookDB(self, path=''):
         f = codecs.open(path + "BX-Book-Ratings.csv", 'r', 'utf8')
         for line in f:
             i += 1
-            #separate line into fields
+            # separate line into fields
             fields = line.split(';')
             user = fields[0].strip('"')
             book = fields[1].strip('"')
@@ -120,7 +115,7 @@ def loadBookDB(self, path=''):
         f = codecs.open(path + "BX-Books.csv", 'r', 'utf8')
         for line in f:
             i += 1
-            #separate line into fields
+            # separate line into fields
             fields = line.split(';')
             isbn = fields[0].strip('"')
             title = fields[1].strip('"')
@@ -135,8 +130,8 @@ def loadBookDB(self, path=''):
         f = codecs.open(path + "BX-Users.csv", 'r', 'utf8')
         for line in f:
             i += 1
-            #print(line)
-            #separate line into fields
+            # print(line)
+            # separate line into fields
             fields = line.split(';')
             userid = fields[0].strip('"')
             location = fields[1].strip('"')
@@ -152,8 +147,7 @@ def loadBookDB(self, path=''):
             self.username2id[location] = userid
         f.close()
         print(i)
-                
-        
+
     def pearson(self, rating1, rating2):
         sum_xy = 0
         sum_x = 0
@@ -181,7 +175,6 @@ def pearson(self, rating1, rating2):
         else:
             return (sum_xy - (sum_x * sum_y) / n) / denominator
 
-
     def computeNearestNeighbor(self, username):
         """creates a sorted list of users based on their distance to
         username"""
@@ -197,46 +190,45 @@ def computeNearestNeighbor(self, username):
         return distances
 
     def recommend(self, user):
-       """Give list of recommendations"""
-       recommendations = {}
-       # first get list of users  ordered by nearness
-       nearest = self.computeNearestNeighbor(user)
-       #
-       # now get the ratings for the user
-       #
-       userRatings = self.data[user]
-       #
-       # determine the total distance
-       totalDistance = 0.0
-       for i in range(self.k):
-          totalDistance += nearest[i][1]
-       # now iterate through the k nearest neighbors
-       # accumulating their ratings
-       for i in range(self.k):
-          # compute slice of pie 
-          weight = nearest[i][1] / totalDistance
-          # get the name of the person
-          name = nearest[i][0]
-          # get the ratings for this person
-          neighborRatings = self.data[name]
-          # get the name of the person
-          # now find bands neighbor rated that user didn't
-          for artist in neighborRatings:
-             if not artist in userRatings:
-                if artist not in recommendations:
-                   recommendations[artist] = (neighborRatings[artist]
-                                              * weight)
-                else:
-                   recommendations[artist] = (recommendations[artist]
-                                              + neighborRatings[artist]
-                                              * weight)
-       # now make list from dictionary
-       recommendations = list(recommendations.items())
-       recommendations = [(self.convertProductID2name(k), v)
-                          for (k, v) in recommendations]
-       # finally sort and return
-       recommendations.sort(key=lambda artistTuple: artistTuple[1],
-                            reverse = True)
-       # Return the first n items
-       return recommendations[:self.n]
-
+        """Give list of recommendations"""
+        recommendations = {}
+        # first get list of users  ordered by nearness
+        nearest = self.computeNearestNeighbor(user)
+        #
+        # now get the ratings for the user
+        #
+        userRatings = self.data[user]
+        #
+        # determine the total distance
+        totalDistance = 0.0
+        for i in range(self.k):
+            totalDistance += nearest[i][1]
+        # now iterate through the k nearest neighbors
+        # accumulating their ratings
+        for i in range(self.k):
+            # compute slice of pie
+            weight = nearest[i][1] / totalDistance
+            # get the name of the person
+            name = nearest[i][0]
+            # get the ratings for this person
+            neighborRatings = self.data[name]
+            # get the name of the person
+            # now find bands neighbor rated that user didn't
+            for artist in neighborRatings:
+                if artist not in userRatings:
+                    if artist not in recommendations:
+                        recommendations[artist] = (neighborRatings[artist]
+                                                   * weight)
+                    else:
+                        recommendations[artist] = (recommendations[artist]
+                                                   + neighborRatings[artist]
+                                                   * weight)
+        # now make list from dictionary
+        recommendations = list(recommendations.items())
+        recommendations = [(self.convertProductID2name(k), v)
+                           for (k, v) in recommendations]
+        # finally sort and return
+        recommendations.sort(key=lambda artistTuple: artistTuple[1],
+                             reverse=True)
+        # Return the first n items
+        return recommendations[:self.n]
diff --git a/ch3/cosineSimilarity.py b/ch3/cosineSimilarity.py
index 5c34140..92365a4 100644
--- a/ch3/cosineSimilarity.py
+++ b/ch3/cosineSimilarity.py
@@ -1,400 +1,426 @@
-import codecs 
+import codecs
 from math import sqrt
 
-users2 = {"Amy": {"Taylor Swift": 4, "PSY": 3, "Whitney Houston": 4},
-          "Ben": {"Taylor Swift": 5, "PSY": 2},
-          "Clara": {"PSY": 3.5, "Whitney Houston": 4},
-          "Daisy": {"Taylor Swift": 5, "Whitney Houston": 3}}
+users2 = {
+    "Amy": {"Taylor Swift": 4, "PSY": 3, "Whitney Houston": 4},
+    "Ben": {"Taylor Swift": 5, "PSY": 2},
+    "Clara": {"PSY": 3.5, "Whitney Houston": 4},
+    "Daisy": {"Taylor Swift": 5, "Whitney Houston": 3}
+}
 
-users = {"Angelica": {"Blues Traveler": 3.5, "Broken Bells": 2.0,
-                      "Norah Jones": 4.5, "Phoenix": 5.0,
-                      "Slightly Stoopid": 1.5, "The Strokes": 2.5,
-                      "Vampire Weekend": 2.0},
-         "Bill":{"Blues Traveler": 2.0, "Broken Bells": 3.5,
-                 "Deadmau5": 4.0, "Phoenix": 2.0,
-                 "Slightly Stoopid": 3.5, "Vampire Weekend": 3.0},
-         "Chan": {"Blues Traveler": 5.0, "Broken Bells": 1.0,
-                  "Deadmau5": 1.0, "Norah Jones": 3.0,
-                  "Phoenix": 5, "Slightly Stoopid": 1.0},
-         "Dan": {"Blues Traveler": 3.0, "Broken Bells": 4.0,
-                 "Deadmau5": 4.5, "Phoenix": 3.0,
-                 "Slightly Stoopid": 4.5, "The Strokes": 4.0,
-                 "Vampire Weekend": 2.0},
-         "Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0,
-                    "Norah Jones": 4.0, "The Strokes": 4.0,
-                    "Vampire Weekend": 1.0},
-         "Jordyn":  {"Broken Bells": 4.5, "Deadmau5": 4.0,
-                     "Norah Jones": 5.0, "Phoenix": 5.0,
-                     "Slightly Stoopid": 4.5, "The Strokes": 4.0,
-                     "Vampire Weekend": 4.0},
-         "Sam": {"Blues Traveler": 5.0, "Broken Bells": 2.0,
-                 "Norah Jones": 3.0, "Phoenix": 5.0,
-                 "Slightly Stoopid": 4.0, "The Strokes": 5.0},
-         "Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0,
-                      "Phoenix": 4.0, "Slightly Stoopid": 2.5,
-                      "The Strokes": 3.0}
-        }
+users = {
+    "Angelica": {
+        "Blues Traveler": 3.5, "Broken Bells": 2.0, "Norah Jones": 4.5,
+        "Phoenix": 5.0, "Slightly Stoopid": 1.5, "The Strokes": 2.5,
+        "Vampire Weekend": 2.0
+    },
+    "Bill": {
+        "Blues Traveler": 2.0, "Broken Bells": 3.5, "Deadmau5": 4.0,
+        "Phoenix": 2.0, "Slightly Stoopid": 3.5, "Vampire Weekend": 3.0
+    },
+    "Chan": {
+        "Blues Traveler": 5.0, "Broken Bells": 1.0, "Deadmau5": 1.0,
+        "Norah Jones": 3.0, "Phoenix": 5, "Slightly Stoopid": 1.0
+    },
+    "Dan": {
+        "Blues Traveler": 3.0, "Broken Bells": 4.0, "Deadmau5": 4.5,
+        "Phoenix": 3.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0,
+        "Vampire Weekend": 2.0
+    },
+    "Hailey": {
+        "Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0,
+        "The Strokes": 4.0, "Vampire Weekend": 1.0
+    },
+    "Jordyn":  {
+        "Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0,
+        "Phoenix": 5.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0,
+        "Vampire Weekend": 4.0
+    },
+    "Sam": {
+        "Blues Traveler": 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0,
+        "Phoenix": 5.0, "Slightly Stoopid": 4.0, "The Strokes": 5.0
+    },
+    "Veronica": {
+        "Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0,
+        "Slightly Stoopid": 2.5, "The Strokes": 3.0
+    }
+}
 
-users3 = {"David": {"Imagine Dragons": 3, "Daft Punk": 5,
-                    "Lorde": 4, "Fall Out Boy": 1},
-          "Matt":  {"Imagine Dragons": 3, "Daft Punk": 4,
-                    "Lorde": 4, "Fall Out Boy": 1},
-          "Ben":   {"Kacey Musgraves": 4, "Imagine Dragons": 3,
-                    "Lorde": 3, "Fall Out Boy": 1},
-          "Chris": {"Kacey Musgraves": 4, "Imagine Dragons": 4,
-                    "Daft Punk": 4, "Lorde": 3, "Fall Out Boy": 1},
-          "Tori":  {"Kacey Musgraves": 5, "Imagine Dragons": 4,
-                    "Daft Punk": 5, "Fall Out Boy": 3}}
+users3 = {
+    "David": {
+        "Imagine Dragons": 3, "Daft Punk": 5, "Lorde": 4, "Fall Out Boy": 1
+    },
+    "Matt": {
+        "Imagine Dragons": 3, "Daft Punk": 4, "Lorde": 4, "Fall Out Boy": 1
+    },
+    "Ben": {
+        "Kacey Musgraves": 4, "Imagine Dragons": 3, "Lorde": 3,
+        "Fall Out Boy": 1
+    },
+    "Chris": {
+        "Kacey Musgraves": 4, "Imagine Dragons": 4, "Daft Punk": 4,
+        "Lorde": 3, "Fall Out Boy": 1
+    },
+    "Tori": {
+        "Kacey Musgraves": 5, "Imagine Dragons": 4, "Daft Punk": 5,
+        "Fall Out Boy": 3
+    }
+}
 
-def computeUserAverages(users):
-   results = {}
-   for (key, ratings) in users.items():
-      results[key] = float(sum(ratings.values())) / len(ratings.values())
-   return results
 
-def computeSimilarity(band1, band2, userRatings):
-   averages = {}
-   for (key, ratings) in userRatings.items():
-      averages[key] = (float(sum(ratings.values()))
-                      / len(ratings.values()))
+def computeUserAverages(users):
+    results = {}
+    for (key, ratings) in users.items():
+        results[key] = float(sum(ratings.values())) / len(ratings.values())
+    return results
 
-   num = 0  # numerator
-   dem1 = 0 # first half of denominator
-   dem2 = 0
-   for (user, ratings) in userRatings.items():
-      if band1 in ratings and band2 in ratings:
-         avg = averages[user]
-         num += (ratings[band1] - avg) * (ratings[band2] - avg)
-         dem1 += (ratings[band1] - avg)**2
-         dem2 += (ratings[band2] - avg)**2
-   return num / (sqrt(dem1) * sqrt(dem2))
 
-class recommender:
+def computeSimilarity(band1, band2, userRatings):
+    averages = {}
+    for (key, ratings) in userRatings.items():
+        averages[key] = (
+            float(sum(ratings.values())) /
+            len(ratings.values())
+        )
 
-   def __init__(self, data, k=1, metric='pearson', n=5):
-      """ initialize recommender
-      currently, if data is dictionary the recommender is initialized
-      to it.
-      For all other data types of data, no initialization occurs
-      k is the k value for k nearest neighbor
-      metric is which distance formula to use
-      n is the maximum number of recommendations to make"""
-      self.k = k
-      self.n = n
-      self.username2id = {}
-      self.userid2name = {}
-      self.productid2name = {}
-      #
-      # The following two variables are used for Slope One
-      # 
-      self.frequencies = {}
-      self.deviations = {}
-      # for some reason I want to save the name of the metric
-      self.metric = metric
-      if self.metric == 'pearson':
-         self.fn = self.pearson
-      #
-      # if data is dictionary set recommender data to it
-      #
-      if type(data).__name__ == 'dict':
-         self.data = data
+    num = 0   # numerator
+    dem1 = 0  # first half of denominator
+    dem2 = 0
+    for (user, ratings) in userRatings.items():
+        if band1 in ratings and band2 in ratings:
+            avg = averages[user]
+            num += (ratings[band1] - avg) * (ratings[band2] - avg)
+            dem1 += (ratings[band1] - avg)**2
+            dem2 += (ratings[band2] - avg)**2
+    return num / (sqrt(dem1) * sqrt(dem2))
 
-   def convertProductID2name(self, id):
-      """Given product id number return product name"""
-      if id in self.productid2name:
-         return self.productid2name[id]
-      else:
-         return id
 
+class recommender:
 
-   def userRatings(self, id, n):
-      """Return n top ratings for user with id"""
-      print ("Ratings for " + self.userid2name[id])
-      ratings = self.data[id]
-      print(len(ratings))
-      ratings = list(ratings.items())[:n]
-      ratings = [(self.convertProductID2name(k), v)
-                 for (k, v) in ratings]
-      # finally sort and return
-      ratings.sort(key=lambda artistTuple: artistTuple[1],
-                   reverse = True)      
-      for rating in ratings:
-         print("%s\t%i" % (rating[0], rating[1]))
+    def __init__(self, data, k=1, metric='pearson', n=5):
+        """ initialize recommender
+        currently, if data is dictionary the recommender is initialized
+        to it.
+        For all other data types of data, no initialization occurs
+        k is the k value for k nearest neighbor
+        metric is which distance formula to use
+        n is the maximum number of recommendations to make"""
+        self.k = k
+        self.n = n
+        self.username2id = {}
+        self.userid2name = {}
+        self.productid2name = {}
+        #
+        # The following two variables are used for Slope One
+        #
+        self.frequencies = {}
+        self.deviations = {}
+        # for some reason I want to save the name of the metric
+        self.metric = metric
+        if self.metric == 'pearson':
+            self.fn = self.pearson
+        #
+        # if data is dictionary set recommender data to it
+        #
+        if type(data).__name__ == 'dict':
+            self.data = data
 
+    def convertProductID2name(self, id):
+        """Given product id number return product name"""
+        if id in self.productid2name:
+            return self.productid2name[id]
+        else:
+            return id
 
-   def showUserTopItems(self, user, n):
-      """ show top n items for user"""
-      items = list(self.data[user].items())
-      items.sort(key=lambda itemTuple: itemTuple[1], reverse=True)
-      for i in range(n):
-         print("%s\t%i" % (self.convertProductID2name(items[i][0]),
-                           items[i][1]))
-            
-   def loadMovieLens(self, path=''):
-      self.data = {}
-      #
-      # first load movie ratings
-      #
-      i = 0
-      #
-      # First load book ratings into self.data
-      #
-      #f = codecs.open(path + "u.data", 'r', 'utf8')
-      f = codecs.open(path + "u.data", 'r', 'ascii')
-      #  f = open(path + "u.data")
-      for line in f:
-         i += 1
-         #separate line into fields
-         fields = line.split('\t')
-         user = fields[0]
-         movie = fields[1]
-         rating = int(fields[2].strip().strip('"'))
-         if user in self.data:
-            currentRatings = self.data[user]
-         else:
-            currentRatings = {}
-         currentRatings[movie] = rating
-         self.data[user] = currentRatings
-      f.close()
-      #
-      # Now load movie into self.productid2name
-      # the file u.item contains movie id, title, release date among
-      # other fields
-      #
-      #f = codecs.open(path + "u.item", 'r', 'utf8')
-      f = codecs.open(path + "u.item", 'r', 'iso8859-1', 'ignore')
-      #f = open(path + "u.item")
-      for line in f:
-         i += 1
-         #separate line into fields
-         fields = line.split('|')
-         mid = fields[0].strip()
-         title = fields[1].strip()
-         self.productid2name[mid] = title
-      f.close()
-      #
-      #  Now load user info into both self.userid2name
-      #  and self.username2id
-      #
-      #f = codecs.open(path + "u.user", 'r', 'utf8')
-      f = open(path + "u.user")
-      for line in f:
-         i += 1
-         fields = line.split('|')
-         userid = fields[0].strip('"')
-         self.userid2name[userid] = line
-         self.username2id[line] = userid
-      f.close()
-      print(i)
+    def userRatings(self, id, n):
+        """Return n top ratings for user with id"""
+        print ("Ratings for " + self.userid2name[id])
+        ratings = self.data[id]
+        print(len(ratings))
+        ratings = list(ratings.items())[:n]
+        ratings = [(self.convertProductID2name(k), v)
+                   for (k, v) in ratings]
+        # finally sort and return
+        ratings.sort(key=lambda artistTuple: artistTuple[1],
+                     reverse=True)
+        for rating in ratings:
+            print("%s\t%i" % (rating[0], rating[1]))
 
+    def showUserTopItems(self, user, n):
+        """ show top n items for user"""
+        items = list(self.data[user].items())
+        items.sort(key=lambda itemTuple: itemTuple[1], reverse=True)
+        for i in range(n):
+            print("%s\t%i" % (self.convertProductID2name(items[i][0]),
+                              items[i][1]))
 
+    def loadMovieLens(self, path=''):
+        self.data = {}
+        #
+        # first load movie ratings
+        #
+        i = 0
+        #
+        # First load book ratings into self.data
+        #
+        # f = codecs.open(path + "u.data", 'r', 'utf8')
+        f = codecs.open(path + "u.data", 'r', 'ascii')
+        # f = open(path + "u.data")
+        for line in f:
+            i += 1
+            # separate line into fields
+            fields = line.split('\t')
+            user = fields[0]
+            movie = fields[1]
+            rating = int(fields[2].strip().strip('"'))
+            if user in self.data:
+                currentRatings = self.data[user]
+            else:
+                currentRatings = {}
+            currentRatings[movie] = rating
+            self.data[user] = currentRatings
+        f.close()
+        #
+        # Now load movie into self.productid2name
+        # the file u.item contains movie id, title, release date among
+        # other fields
+        #
+        # f = codecs.open(path + "u.item", 'r', 'utf8')
+        f = codecs.open(path + "u.item", 'r', 'iso8859-1', 'ignore')
+        # f = open(path + "u.item")
+        for line in f:
+            i += 1
+            # separate line into fields
+            fields = line.split('|')
+            mid = fields[0].strip()
+            title = fields[1].strip()
+            self.productid2name[mid] = title
+        f.close()
+        #
+        #  Now load user info into both self.userid2name
+        #  and self.username2id
+        #
+        # f = codecs.open(path + "u.user", 'r', 'utf8')
+        f = open(path + "u.user")
+        for line in f:
+            i += 1
+            fields = line.split('|')
+            userid = fields[0].strip('"')
+            self.userid2name[userid] = line
+            self.username2id[line] = userid
+        f.close()
+        print(i)
 
+    def loadBookDB(self, path=''):
+        """loads the BX book dataset. Path is where the BX files are
+        located"""
+        self.data = {}
+        i = 0
+        #
+        # First load book ratings into self.data
+        #
+        f = codecs.open(path + "u.data", 'r', 'utf8')
+        for line in f:
+            i += 1
+            # separate line into fields
+            fields = line.split(';')
+            user = fields[0].strip('"')
+            book = fields[1].strip('"')
+            rating = int(fields[2].strip().strip('"'))
+            if rating > 5:
+                print("EXCEEDING ", rating)
+            if user in self.data:
+                currentRatings = self.data[user]
+            else:
+                currentRatings = {}
+            currentRatings[book] = rating
+            self.data[user] = currentRatings
+        f.close()
+        #
+        # Now load books into self.productid2name
+        # Books contains isbn, title, and author among other fields
+        #
+        f = codecs.open(path + "BX-Books.csv", 'r', 'utf8')
+        for line in f:
+            i += 1
+            # separate line into fields
+            fields = line.split(';')
+            isbn = fields[0].strip('"')
+            title = fields[1].strip('"')
+            author = fields[2].strip().strip('"')
+            title = title + ' by ' + author
+            self.productid2name[isbn] = title
+        f.close()
+        #
+        #  Now load user info into both self.userid2name and
+        #  self.username2id
+        #
+        f = codecs.open(path + "BX-Users.csv", 'r', 'utf8')
+        for line in f:
+            i += 1
+            # separate line into fields
+            fields = line.split(';')
+            userid = fields[0].strip('"')
+            location = fields[1].strip('"')
+            if len(fields) > 3:
+                age = fields[2].strip().strip('"')
+            else:
+                age = 'NULL'
+            if age != 'NULL':
+                value = location + '  (age: ' + age + ')'
+            else:
+                value = location
+            self.userid2name[userid] = value
+            self.username2id[location] = userid
+        f.close()
+        print(i)
 
-   def loadBookDB(self, path=''):
-      """loads the BX book dataset. Path is where the BX files are
-      located"""
-      self.data = {}
-      i = 0
-      #
-      # First load book ratings into self.data
-      #
-      f = codecs.open(path + "u.data", 'r', 'utf8')
-      for line in f:
-         i += 1
-         # separate line into fields
-         fields = line.split(';')
-         user = fields[0].strip('"')
-         book = fields[1].strip('"')
-         rating = int(fields[2].strip().strip('"'))
-         if rating > 5:
-            print("EXCEEDING ", rating)
-         if user in self.data:
-            currentRatings = self.data[user]
-         else:
-            currentRatings = {}
-         currentRatings[book] = rating
-         self.data[user] = currentRatings
-      f.close()
-      #
-      # Now load books into self.productid2name
-      # Books contains isbn, title, and author among other fields
-      #
-      f = codecs.open(path + "BX-Books.csv", 'r', 'utf8')
-      for line in f:
-         i += 1
-         # separate line into fields
-         fields = line.split(';')
-         isbn = fields[0].strip('"')
-         title = fields[1].strip('"')
-         author = fields[2].strip().strip('"')
-         title = title + ' by ' + author
-         self.productid2name[isbn] = title
-      f.close()
-      #
-      #  Now load user info into both self.userid2name and
-      #  self.username2id
-      #
-      f = codecs.open(path + "BX-Users.csv", 'r', 'utf8')
-      for line in f:
-         i += 1
-         # separate line into fields
-         fields = line.split(';')
-         userid = fields[0].strip('"')
-         location = fields[1].strip('"')
-         if len(fields) > 3:
-            age = fields[2].strip().strip('"')
-         else:
-            age = 'NULL'
-         if age != 'NULL':
-            value = location + '  (age: ' + age + ')'
-         else:
-            value = location
-         self.userid2name[userid] = value
-         self.username2id[location] = userid
-      f.close()
-      print(i)
-                
-        
-   def computeDeviations(self):
-      # for each person in the data:
-      #    get their ratings
-      for ratings in self.data.values():
-         # for each item & rating in that set of ratings:
-         for (item, rating) in ratings.items():
-            self.frequencies.setdefault(item, {})
-            self.deviations.setdefault(item, {})                    
-            # for each item2 & rating2 in that set of ratings:
-            for (item2, rating2) in ratings.items():
-               if item != item2:
-                  # add the difference between the ratings to our
-                  # computation
-                  self.frequencies[item].setdefault(item2, 0)
-                  self.deviations[item].setdefault(item2, 0.0)
-                  self.frequencies[item][item2] += 1
-                  self.deviations[item][item2] += rating - rating2
-        
-      for (item, ratings) in self.deviations.items():
-         for item2 in ratings:
-            ratings[item2] /= self.frequencies[item][item2]
+    def computeDeviations(self):
+        # for each person in the data:
+        #    get their ratings
+        for ratings in self.data.values():
+            # for each item & rating in that set of ratings:
+            for (item, rating) in ratings.items():
+                self.frequencies.setdefault(item, {})
+                self.deviations.setdefault(item, {})
+                # for each item2 & rating2 in that set of ratings:
+                for (item2, rating2) in ratings.items():
+                    if item != item2:
+                        # add the difference between the ratings to our
+                        # computation
+                        self.frequencies[item].setdefault(item2, 0)
+                        self.deviations[item].setdefault(item2, 0.0)
+                        self.frequencies[item][item2] += 1
+                        self.deviations[item][item2] += rating - rating2
 
+        for (item, ratings) in self.deviations.items():
+            for item2 in ratings:
+                ratings[item2] /= self.frequencies[item][item2]
 
-   def slopeOneRecommendations(self, userRatings):
-      recommendations = {}
-      frequencies = {}
-      # for every item and rating in the user's recommendations
-      for (userItem, userRating) in userRatings.items():
-         # for every item in our dataset that the user didn't rate
-         for (diffItem, diffRatings) in self.deviations.items():
-            if diffItem not in userRatings and \
-               userItem in self.deviations[diffItem]:
-               freq = self.frequencies[diffItem][userItem]
-               recommendations.setdefault(diffItem, 0.0)
-               frequencies.setdefault(diffItem, 0)
-               # add to the running sum representing the numerator
-               # of the formula
-               recommendations[diffItem] += (diffRatings[userItem] +
-                                             userRating) * freq
-               # keep a running sum of the frequency of diffitem
-               frequencies[diffItem] += freq
-      recommendations =  [(self.convertProductID2name(k),
-                           v / frequencies[k])
-                          for (k, v) in recommendations.items()]
-      # finally sort and return
-      recommendations.sort(key=lambda artistTuple: artistTuple[1],
-                           reverse = True)
-      # I am only going to return the first 50 recommendations
-      return recommendations[:50]
-        
-   def pearson(self, rating1, rating2):
-      sum_xy = 0
-      sum_x = 0
-      sum_y = 0
-      sum_x2 = 0
-      sum_y2 = 0
-      n = 0
-      for key in rating1:
-         if key in rating2:
-            n += 1
-            x = rating1[key]
-            y = rating2[key]
-            sum_xy += x * y
-            sum_x += x
-            sum_y += y
-            sum_x2 += pow(x, 2)
-            sum_y2 += pow(y, 2)
-      if n == 0:
-         return 0
-      # now compute denominator
-      denominator = sqrt(sum_x2 - pow(sum_x, 2) / n) * \
-                    sqrt(sum_y2 - pow(sum_y, 2) / n)
-      if denominator == 0:
-         return 0
-      else:
-         return (sum_xy - (sum_x * sum_y) / n) / denominator
+    def slopeOneRecommendations(self, userRatings):
+        recommendations = {}
+        frequencies = {}
+        # for every item and rating in the user's recommendations
+        for (userItem, userRating) in userRatings.items():
+            # for every item in our dataset that the user didn't rate
+            for (diffItem, diffRatings) in self.deviations.items():
+                if diffItem not in userRatings and \
+                   userItem in self.deviations[diffItem]:
+                    freq = self.frequencies[diffItem][userItem]
+                    recommendations.setdefault(diffItem, 0.0)
+                    frequencies.setdefault(diffItem, 0)
+                    # add to the running sum representing the numerator
+                    # of the formula
+                    recommendations[diffItem] += (
+                        (diffRatings[userItem] + userRating) * freq
+                    )
+                    # keep a running sum of the frequency of diffitem
+                    frequencies[diffItem] += freq
+        recommendations = [(self.convertProductID2name(k),
+                            v / frequencies[k])
+                           for (k, v) in recommendations.items()]
+        # finally sort and return
+        recommendations.sort(key=lambda artistTuple: artistTuple[1],
+                             reverse=True)
+        # I am only going to return the first 50 recommendations
+        return recommendations[:50]
 
+    def pearson(self, rating1, rating2):
+        sum_xy = 0
+        sum_x = 0
+        sum_y = 0
+        sum_x2 = 0
+        sum_y2 = 0
+        n = 0
+        for key in rating1:
+            if key in rating2:
+                n += 1
+                x = rating1[key]
+                y = rating2[key]
+                sum_xy += x * y
+                sum_x += x
+                sum_y += y
+                sum_x2 += pow(x, 2)
+                sum_y2 += pow(y, 2)
+        if n == 0:
+            return 0
+        # now compute denominator
+        denominator = (
+            sqrt(sum_x2 - pow(sum_x, 2) / n) *
+            sqrt(sum_y2 - pow(sum_y, 2) / n)
+        )
+        if denominator == 0:
+            return 0
+        else:
+            return (sum_xy - (sum_x * sum_y) / n) / denominator
 
-   def computeNearestNeighbor(self, username):
-      """creates a sorted list of users based on their distance
-      to username"""
-      distances = []
-      for instance in self.data:
-         if instance != username:
-            distance = self.fn(self.data[username],
-                               self.data[instance])
-            distances.append((instance, distance))
-      # sort based on distance -- closest first
-      distances.sort(key=lambda artistTuple: artistTuple[1],
-                     reverse=True)
-      return distances
+    def computeNearestNeighbor(self, username):
+        """creates a sorted list of users based on their distance
+        to username"""
+        distances = []
+        for instance in self.data:
+            if instance != username:
+                distance = self.fn(self.data[username],
+                                   self.data[instance])
+                distances.append((instance, distance))
+        # sort based on distance -- closest first
+        distances.sort(key=lambda artistTuple: artistTuple[1],
+                       reverse=True)
+        return distances
 
-   def recommend(self, user):
-      """Give list of recommendations"""
-      recommendations = {}
-      # first get list of users  ordered by nearness
-      nearest = self.computeNearestNeighbor(user)
-      #
-      # now get the ratings for the user
-      #
-      userRatings = self.data[user]
-      #
-      # determine the total distance
-      totalDistance = 0.0
-      for i in range(self.k):
-         totalDistance += nearest[i][1]
-      # now iterate through the k nearest neighbors
-      # accumulating their ratings
-      for i in range(self.k):
-         # compute slice of pie 
-         weight = nearest[i][1] / totalDistance
-         # get the name of the person
-         name = nearest[i][0]
-         # get the ratings for this person
-         neighborRatings = self.data[name]
-         # get the name of the person
-         # now find bands neighbor rated that user didn't
-         for artist in neighborRatings:
-            if not artist in userRatings:
-               if artist not in recommendations:
-                  recommendations[artist] = neighborRatings[artist] * \
-                                            weight
-               else:
-                  recommendations[artist] = recommendations[artist] + \
-                                            neighborRatings[artist] * \
-                                            weight
-      # now make list from dictionary and only get the first n items
-      recommendations = list(recommendations.items())[:self.n]
-      recommendations = [(self.convertProductID2name(k), v)
-                         for (k, v) in recommendations]
-      # finally sort and return
-      recommendations.sort(key=lambda artistTuple: artistTuple[1],
-                           reverse = True)
-      return recommendations
+    def recommend(self, user):
+        """Give list of recommendations"""
+        recommendations = {}
+        # first get list of users  ordered by nearness
+        nearest = self.computeNearestNeighbor(user)
+        #
+        # now get the ratings for the user
+        #
+        userRatings = self.data[user]
+        #
+        # determine the total distance
+        totalDistance = 0.0
+        for i in range(self.k):
+            totalDistance += nearest[i][1]
+        # now iterate through the k nearest neighbors
+        # accumulating their ratings
+        for i in range(self.k):
+            # compute slice of pie
+            weight = nearest[i][1] / totalDistance
+            # get the name of the person
+            name = nearest[i][0]
+            # get the ratings for this person
+            neighborRatings = self.data[name]
+            # get the name of the person
+            # now find bands neighbor rated that user didn't
+            for artist in neighborRatings:
+                if artist not in userRatings:
+                    if artist not in recommendations:
+                        recommendations[artist] = (
+                            neighborRatings[artist] * weight
+                        )
+                    else:
+                        recommendations[artist] = (
+                            recommendations[artist] +
+                            neighborRatings[artist] * weight
+                        )
+        # now make list from dictionary and only get the first n items
+        recommendations = list(recommendations.items())[:self.n]
+        recommendations = [(self.convertProductID2name(k), v)
+                           for (k, v) in recommendations]
+        # finally sort and return
+        recommendations.sort(key=lambda artistTuple: artistTuple[1],
+                             reverse=True)
+        return recommendations
 
-bands = ['Kacey Musgraves', 'Daft Punk', 'Imagine Dragons', 'Lorde', 'Fall Out Boy']
+bands = [
+    'Kacey Musgraves', 'Daft Punk', 'Imagine Dragons',
+    'Lorde', 'Fall Out Boy'
+]
 
 for b in bands:
-   for x in bands:
-      print("%20s%20s%10.5f" % (b, x, computeSimilarity(b, x, users3)))
+    for x in bands:
+        print("%20s%20s%10.5f" % (b, x, computeSimilarity(b, x, users3)))
 
 
 print (computeUserAverages(users3))
diff --git a/ch3/recommender3.py b/ch3/recommender3.py
index 31093a0..2b54e57 100644
--- a/ch3/recommender3.py
+++ b/ch3/recommender3.py
@@ -1,360 +1,365 @@
-import codecs 
+import codecs
 from math import sqrt
 
-users2 = {"Amy": {"Taylor Swift": 4, "PSY": 3, "Whitney Houston": 4},
-          "Ben": {"Taylor Swift": 5, "PSY": 2},
-          "Clara": {"PSY": 3.5, "Whitney Houston": 4},
-          "Daisy": {"Taylor Swift": 5, "Whitney Houston": 3}}
-
-users = {"Angelica": {"Blues Traveler": 3.5, "Broken Bells": 2.0,
-                      "Norah Jones": 4.5, "Phoenix": 5.0,
-                      "Slightly Stoopid": 1.5, "The Strokes": 2.5,
-                      "Vampire Weekend": 2.0},
-         "Bill":{"Blues Traveler": 2.0, "Broken Bells": 3.5,
-                 "Deadmau5": 4.0, "Phoenix": 2.0,
-                 "Slightly Stoopid": 3.5, "Vampire Weekend": 3.0},
-         "Chan": {"Blues Traveler": 5.0, "Broken Bells": 1.0,
-                  "Deadmau5": 1.0, "Norah Jones": 3.0,
-                  "Phoenix": 5, "Slightly Stoopid": 1.0},
-         "Dan": {"Blues Traveler": 3.0, "Broken Bells": 4.0,
-                 "Deadmau5": 4.5, "Phoenix": 3.0,
-                 "Slightly Stoopid": 4.5, "The Strokes": 4.0,
-                 "Vampire Weekend": 2.0},
-         "Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0,
-                    "Norah Jones": 4.0, "The Strokes": 4.0,
-                    "Vampire Weekend": 1.0},
-         "Jordyn":  {"Broken Bells": 4.5, "Deadmau5": 4.0,
-                     "Norah Jones": 5.0, "Phoenix": 5.0,
-                     "Slightly Stoopid": 4.5, "The Strokes": 4.0,
-                     "Vampire Weekend": 4.0},
-         "Sam": {"Blues Traveler": 5.0, "Broken Bells": 2.0,
-                 "Norah Jones": 3.0, "Phoenix": 5.0,
-                 "Slightly Stoopid": 4.0, "The Strokes": 5.0},
-         "Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0,
-                      "Phoenix": 4.0, "Slightly Stoopid": 2.5,
-                      "The Strokes": 3.0}
-        }
+users2 = {
+    "Amy": {"Taylor Swift": 4, "PSY": 3, "Whitney Houston": 4},
+    "Ben": {"Taylor Swift": 5, "PSY": 2},
+    "Clara": {"PSY": 3.5, "Whitney Houston": 4},
+    "Daisy": {"Taylor Swift": 5, "Whitney Houston": 3}
+}
 
+users = {
+    "Angelica": {
+        "Blues Traveler": 3.5, "Broken Bells": 2.0, "Norah Jones": 4.5,
+        "Phoenix": 5.0, "Slightly Stoopid": 1.5, "The Strokes": 2.5,
+        "Vampire Weekend": 2.0
+    },
+    "Bill": {
+        "Blues Traveler": 2.0, "Broken Bells": 3.5, "Deadmau5": 4.0,
+        "Phoenix": 2.0, "Slightly Stoopid": 3.5, "Vampire Weekend": 3.0
+    },
+    "Chan": {
+        "Blues Traveler": 5.0, "Broken Bells": 1.0, "Deadmau5": 1.0,
+        "Norah Jones": 3.0, "Phoenix": 5, "Slightly Stoopid": 1.0
+    },
+    "Dan": {
+        "Blues Traveler": 3.0, "Broken Bells": 4.0, "Deadmau5": 4.5,
+        "Phoenix": 3.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0,
+        "Vampire Weekend": 2.0
+    },
+    "Hailey": {
+        "Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0,
+        "The Strokes": 4.0, "Vampire Weekend": 1.0
+    },
+    "Jordyn": {
+        "Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0,
+        "Phoenix": 5.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0,
+        "Vampire Weekend": 4.0
+    },
+    "Sam": {
+        "Blues Traveler": 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0,
+        "Phoenix": 5.0, "Slightly Stoopid": 4.0, "The Strokes": 5.0
+    },
+    "Veronica": {
+        "Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0,
+        "Slightly Stoopid": 2.5, "The Strokes": 3.0
+    }
+}
 
 
 class recommender:
 
-   def __init__(self, data, k=1, metric='pearson', n=5):
-      """ initialize recommender
-      currently, if data is dictionary the recommender is initialized
-      to it.
-      For all other data types of data, no initialization occurs
-      k is the k value for k nearest neighbor
-      metric is which distance formula to use
-      n is the maximum number of recommendations to make"""
-      self.k = k
-      self.n = n
-      self.username2id = {}
-      self.userid2name = {}
-      self.productid2name = {}
-      #
-      # The following two variables are used for Slope One
-      # 
-      self.frequencies = {}
-      self.deviations = {}
-      # for some reason I want to save the name of the metric
-      self.metric = metric
-      if self.metric == 'pearson':
-         self.fn = self.pearson
-      #
-      # if data is dictionary set recommender data to it
-      #
-      if type(data).__name__ == 'dict':
-         self.data = data
-
-   def convertProductID2name(self, id):
-      """Given product id number return product name"""
-      if id in self.productid2name:
-         return self.productid2name[id]
-      else:
-         return id
-
-
-   def userRatings(self, id, n):
-      """Return n top ratings for user with id"""
-      print ("Ratings for " + self.userid2name[id])
-      ratings = self.data[id]
-      print(len(ratings))
-      ratings = list(ratings.items())[:n]
-      ratings = [(self.convertProductID2name(k), v)
-                 for (k, v) in ratings]
-      # finally sort and return
-      ratings.sort(key=lambda artistTuple: artistTuple[1],
-                   reverse = True)      
-      for rating in ratings:
-         print("%s\t%i" % (rating[0], rating[1]))
+    def __init__(self, data, k=1, metric='pearson', n=5):
+        """ initialize recommender
+        currently, if data is dictionary the recommender is initialized
+        to it.
+        For all other data types of data, no initialization occurs
+        k is the k value for k nearest neighbor
+        metric is which distance formula to use
+        n is the maximum number of recommendations to make"""
+        self.k = k
+        self.n = n
+        self.username2id = {}
+        self.userid2name = {}
+        self.productid2name = {}
+        #
+        # The following two variables are used for Slope One
+        #
+        self.frequencies = {}
+        self.deviations = {}
+        # for some reason I want to save the name of the metric
+        self.metric = metric
+        if self.metric == 'pearson':
+            self.fn = self.pearson
+        #
+        # if data is dictionary set recommender data to it
+        #
+        if type(data).__name__ == 'dict':
+            self.data = data
 
+    def convertProductID2name(self, id):
+        """Given product id number return product name"""
+        if id in self.productid2name:
+            return self.productid2name[id]
+        else:
+            return id
 
-   def showUserTopItems(self, user, n):
-      """ show top n items for user"""
-      items = list(self.data[user].items())
-      items.sort(key=lambda itemTuple: itemTuple[1], reverse=True)
-      for i in range(n):
-         print("%s\t%i" % (self.convertProductID2name(items[i][0]),
-                           items[i][1]))
-            
-   def loadMovieLens(self, path=''):
-      self.data = {}
-      #
-      # first load movie ratings
-      #
-      i = 0
-      #
-      # First load book ratings into self.data
-      #
-      #f = codecs.open(path + "u.data", 'r', 'utf8')
-      f = codecs.open(path + "u.data", 'r', 'ascii')
-      #  f = open(path + "u.data")
-      for line in f:
-         i += 1
-         #separate line into fields
-         fields = line.split('\t')
-         user = fields[0]
-         movie = fields[1]
-         rating = int(fields[2].strip().strip('"'))
-         if user in self.data:
-            currentRatings = self.data[user]
-         else:
-            currentRatings = {}
-         currentRatings[movie] = rating
-         self.data[user] = currentRatings
-      f.close()
-      #
-      # Now load movie into self.productid2name
-      # the file u.item contains movie id, title, release date among
-      # other fields
-      #
-      #f = codecs.open(path + "u.item", 'r', 'utf8')
-      f = codecs.open(path + "u.item", 'r', 'iso8859-1', 'ignore')
-      #f = open(path + "u.item")
-      for line in f:
-         i += 1
-         #separate line into fields
-         fields = line.split('|')
-         mid = fields[0].strip()
-         title = fields[1].strip()
-         self.productid2name[mid] = title
-      f.close()
-      #
-      #  Now load user info into both self.userid2name
-      #  and self.username2id
-      #
-      #f = codecs.open(path + "u.user", 'r', 'utf8')
-      f = open(path + "u.user")
-      for line in f:
-         i += 1
-         fields = line.split('|')
-         userid = fields[0].strip('"')
-         self.userid2name[userid] = line
-         self.username2id[line] = userid
-      f.close()
-      print(i)
-
+    def userRatings(self, id, n):
+        """Return n top ratings for user with id"""
+        print ("Ratings for " + self.userid2name[id])
+        ratings = self.data[id]
+        print(len(ratings))
+        ratings = list(ratings.items())[:n]
+        ratings = [(self.convertProductID2name(k), v)
+                   for (k, v) in ratings]
+        # finally sort and return
+        ratings.sort(key=lambda artistTuple: artistTuple[1],
+                     reverse=True)
+        for rating in ratings:
+            print("%s\t%i" % (rating[0], rating[1]))
 
+    def showUserTopItems(self, user, n):
+        """ show top n items for user"""
+        items = list(self.data[user].items())
+        items.sort(key=lambda itemTuple: itemTuple[1], reverse=True)
+        for i in range(n):
+            print("%s\t%i" % (self.convertProductID2name(items[i][0]),
+                              items[i][1]))
 
+    def loadMovieLens(self, path=''):
+        self.data = {}
+        #
+        # first load movie ratings
+        #
+        i = 0
+        #
+        # First load book ratings into self.data
+        #
+        # f = codecs.open(path + "u.data", 'r', 'utf8')
+        f = codecs.open(path + "u.data", 'r', 'ascii')
+        # f = open(path + "u.data")
+        for line in f:
+            i += 1
+            # separate line into fields
+            fields = line.split('\t')
+            user = fields[0]
+            movie = fields[1]
+            rating = int(fields[2].strip().strip('"'))
+            if user in self.data:
+                currentRatings = self.data[user]
+            else:
+                currentRatings = {}
+            currentRatings[movie] = rating
+            self.data[user] = currentRatings
+        f.close()
+        #
+        # Now load movie into self.productid2name
+        # the file u.item contains movie id, title, release date among
+        # other fields
+        #
+        # f = codecs.open(path + "u.item", 'r', 'utf8')
+        f = codecs.open(path + "u.item", 'r', 'iso8859-1', 'ignore')
+        # f = open(path + "u.item")
+        for line in f:
+            i += 1
+            # separate line into fields
+            fields = line.split('|')
+            mid = fields[0].strip()
+            title = fields[1].strip()
+            self.productid2name[mid] = title
+        f.close()
+        #
+        #  Now load user info into both self.userid2name
+        #  and self.username2id
+        #
+        # f = codecs.open(path + "u.user", 'r', 'utf8')
+        f = open(path + "u.user")
+        for line in f:
+            i += 1
+            fields = line.split('|')
+            userid = fields[0].strip('"')
+            self.userid2name[userid] = line
+            self.username2id[line] = userid
+        f.close()
+        print(i)
 
-   def loadBookDB(self, path=''):
-      """loads the BX book dataset. Path is where the BX files are
-      located"""
-      self.data = {}
-      i = 0
-      #
-      # First load book ratings into self.data
-      #
-      f = codecs.open(path + "u.data", 'r', 'utf8')
-      for line in f:
-         i += 1
-         # separate line into fields
-         fields = line.split(';')
-         user = fields[0].strip('"')
-         book = fields[1].strip('"')
-         rating = int(fields[2].strip().strip('"'))
-         if rating > 5:
-            print("EXCEEDING ", rating)
-         if user in self.data:
-            currentRatings = self.data[user]
-         else:
-            currentRatings = {}
-         currentRatings[book] = rating
-         self.data[user] = currentRatings
-      f.close()
-      #
-      # Now load books into self.productid2name
-      # Books contains isbn, title, and author among other fields
-      #
-      f = codecs.open(path + "BX-Books.csv", 'r', 'utf8')
-      for line in f:
-         i += 1
-         # separate line into fields
-         fields = line.split(';')
-         isbn = fields[0].strip('"')
-         title = fields[1].strip('"')
-         author = fields[2].strip().strip('"')
-         title = title + ' by ' + author
-         self.productid2name[isbn] = title
-      f.close()
-      #
-      #  Now load user info into both self.userid2name and
-      #  self.username2id
-      #
-      f = codecs.open(path + "BX-Users.csv", 'r', 'utf8')
-      for line in f:
-         i += 1
-         # separate line into fields
-         fields = line.split(';')
-         userid = fields[0].strip('"')
-         location = fields[1].strip('"')
-         if len(fields) > 3:
-            age = fields[2].strip().strip('"')
-         else:
-            age = 'NULL'
-         if age != 'NULL':
-            value = location + '  (age: ' + age + ')'
-         else:
-            value = location
-         self.userid2name[userid] = value
-         self.username2id[location] = userid
-      f.close()
-      print(i)
-                
-        
-   def computeDeviations(self):
-      # for each person in the data:
-      #    get their ratings
-      for ratings in self.data.values():
-         # for each item & rating in that set of ratings:
-         for (item, rating) in ratings.items():
-            self.frequencies.setdefault(item, {})
-            self.deviations.setdefault(item, {})                    
-            # for each item2 & rating2 in that set of ratings:
-            for (item2, rating2) in ratings.items():
-               if item != item2:
-                  # add the difference between the ratings to our
-                  # computation
-                  self.frequencies[item].setdefault(item2, 0)
-                  self.deviations[item].setdefault(item2, 0.0)
-                  self.frequencies[item][item2] += 1
-                  self.deviations[item][item2] += rating - rating2
-        
-      for (item, ratings) in self.deviations.items():
-         for item2 in ratings:
-            ratings[item2] /= self.frequencies[item][item2]
+    def loadBookDB(self, path=''):
+        """loads the BX book dataset. Path is where the BX files are
+        located"""
+        self.data = {}
+        i = 0
+        #
+        # First load book ratings into self.data
+        #
+        f = codecs.open(path + "u.data", 'r', 'utf8')
+        for line in f:
+            i += 1
+            # separate line into fields
+            fields = line.split(';')
+            user = fields[0].strip('"')
+            book = fields[1].strip('"')
+            rating = int(fields[2].strip().strip('"'))
+            if rating > 5:
+                print("EXCEEDING ", rating)
+            if user in self.data:
+                currentRatings = self.data[user]
+            else:
+                currentRatings = {}
+            currentRatings[book] = rating
+            self.data[user] = currentRatings
+        f.close()
+        #
+        # Now load books into self.productid2name
+        # Books contains isbn, title, and author among other fields
+        #
+        f = codecs.open(path + "BX-Books.csv", 'r', 'utf8')
+        for line in f:
+            i += 1
+            # separate line into fields
+            fields = line.split(';')
+            isbn = fields[0].strip('"')
+            title = fields[1].strip('"')
+            author = fields[2].strip().strip('"')
+            title = title + ' by ' + author
+            self.productid2name[isbn] = title
+        f.close()
+        #
+        #  Now load user info into both self.userid2name and
+        #  self.username2id
+        #
+        f = codecs.open(path + "BX-Users.csv", 'r', 'utf8')
+        for line in f:
+            i += 1
+            # separate line into fields
+            fields = line.split(';')
+            userid = fields[0].strip('"')
+            location = fields[1].strip('"')
+            if len(fields) > 3:
+                age = fields[2].strip().strip('"')
+            else:
+                age = 'NULL'
+            if age != 'NULL':
+                value = location + '  (age: ' + age + ')'
+            else:
+                value = location
+            self.userid2name[userid] = value
+            self.username2id[location] = userid
+        f.close()
+        print(i)
 
+    def computeDeviations(self):
+        # for each person in the data:
+        #    get their ratings
+        for ratings in self.data.values():
+            # for each item & rating in that set of ratings:
+            for (item, rating) in ratings.items():
+                self.frequencies.setdefault(item, {})
+                self.deviations.setdefault(item, {})
+                # for each item2 & rating2 in that set of ratings:
+                for (item2, rating2) in ratings.items():
+                    if item != item2:
+                        # add the difference between the ratings to our
+                        # computation
+                        self.frequencies[item].setdefault(item2, 0)
+                        self.deviations[item].setdefault(item2, 0.0)
+                        self.frequencies[item][item2] += 1
+                        self.deviations[item][item2] += rating - rating2
 
-   def slopeOneRecommendations(self, userRatings):
-      recommendations = {}
-      frequencies = {}
-      # for every item and rating in the user's recommendations
-      for (userItem, userRating) in userRatings.items():
-         # for every item in our dataset that the user didn't rate
-         for (diffItem, diffRatings) in self.deviations.items():
-            if diffItem not in userRatings and \
-               userItem in self.deviations[diffItem]:
-               freq = self.frequencies[diffItem][userItem]
-               recommendations.setdefault(diffItem, 0.0)
-               frequencies.setdefault(diffItem, 0)
-               # add to the running sum representing the numerator
-               # of the formula
-               recommendations[diffItem] += (diffRatings[userItem] +
-                                             userRating) * freq
-               # keep a running sum of the frequency of diffitem
-               frequencies[diffItem] += freq
-      recommendations =  [(self.convertProductID2name(k),
-                           v / frequencies[k])
-                          for (k, v) in recommendations.items()]
-      # finally sort and return
-      recommendations.sort(key=lambda artistTuple: artistTuple[1],
-                           reverse = True)
-      # I am only going to return the first 50 recommendations
-      return recommendations[:50]
-        
-   def pearson(self, rating1, rating2):
-      sum_xy = 0
-      sum_x = 0
-      sum_y = 0
-      sum_x2 = 0
-      sum_y2 = 0
-      n = 0
-      for key in rating1:
-         if key in rating2:
-            n += 1
-            x = rating1[key]
-            y = rating2[key]
-            sum_xy += x * y
-            sum_x += x
-            sum_y += y
-            sum_x2 += pow(x, 2)
-            sum_y2 += pow(y, 2)
-      if n == 0:
-         return 0
-      # now compute denominator
-      denominator = sqrt(sum_x2 - pow(sum_x, 2) / n) * \
-                    sqrt(sum_y2 - pow(sum_y, 2) / n)
-      if denominator == 0:
-         return 0
-      else:
-         return (sum_xy - (sum_x * sum_y) / n) / denominator
+        for (item, ratings) in self.deviations.items():
+            for item2 in ratings:
+                ratings[item2] /= self.frequencies[item][item2]
 
+    def slopeOneRecommendations(self, userRatings):
+        recommendations = {}
+        frequencies = {}
+        # for every item and rating in the user's recommendations
+        for (userItem, userRating) in userRatings.items():
+            # for every item in our dataset that the user didn't rate
+            for (diffItem, diffRatings) in self.deviations.items():
+                if diffItem not in userRatings and \
+                   userItem in self.deviations[diffItem]:
+                    freq = self.frequencies[diffItem][userItem]
+                    recommendations.setdefault(diffItem, 0.0)
+                    frequencies.setdefault(diffItem, 0)
+                    # add to the running sum representing the numerator
+                    # of the formula
+                    recommendations[diffItem] += (diffRatings[userItem] +
+                                                  userRating) * freq
+                    # keep a running sum of the frequency of diffitem
+                    frequencies[diffItem] += freq
+        recommendations = [(self.convertProductID2name(k),
+                            v / frequencies[k])
+                           for (k, v) in recommendations.items()]
+        # finally sort and return
+        recommendations.sort(key=lambda artistTuple: artistTuple[1],
+                             reverse=True)
+        # I am only going to return the first 50 recommendations
+        return recommendations[:50]
 
-   def computeNearestNeighbor(self, username):
-      """creates a sorted list of users based on their distance
-      to username"""
-      distances = []
-      for instance in self.data:
-         if instance != username:
-            distance = self.fn(self.data[username],
-                               self.data[instance])
-            distances.append((instance, distance))
-      # sort based on distance -- closest first
-      distances.sort(key=lambda artistTuple: artistTuple[1],
-                     reverse=True)
-      return distances
+    def pearson(self, rating1, rating2):
+        sum_xy = 0
+        sum_x = 0
+        sum_y = 0
+        sum_x2 = 0
+        sum_y2 = 0
+        n = 0
+        for key in rating1:
+            if key in rating2:
+                n += 1
+                x = rating1[key]
+                y = rating2[key]
+                sum_xy += x * y
+                sum_x += x
+                sum_y += y
+                sum_x2 += pow(x, 2)
+                sum_y2 += pow(y, 2)
+        if n == 0:
+            return 0
+        # now compute denominator
+        denominator = (
+            sqrt(sum_x2 - pow(sum_x, 2) / n) *
+            sqrt(sum_y2 - pow(sum_y, 2) / n)
+        )
+        if denominator == 0:
+            return 0
+        else:
+            return (sum_xy - (sum_x * sum_y) / n) / denominator
 
-   def recommend(self, user):
-      """Give list of recommendations"""
-      recommendations = {}
-      # first get list of users  ordered by nearness
-      nearest = self.computeNearestNeighbor(user)
-      #
-      # now get the ratings for the user
-      #
-      userRatings = self.data[user]
-      #
-      # determine the total distance
-      totalDistance = 0.0
-      for i in range(self.k):
-         totalDistance += nearest[i][1]
-      # now iterate through the k nearest neighbors
-      # accumulating their ratings
-      for i in range(self.k):
-         # compute slice of pie 
-         weight = nearest[i][1] / totalDistance
-         # get the name of the person
-         name = nearest[i][0]
-         # get the ratings for this person
-         neighborRatings = self.data[name]
-         # get the name of the person
-         # now find bands neighbor rated that user didn't
-         for artist in neighborRatings:
-            if not artist in userRatings:
-               if artist not in recommendations:
-                  recommendations[artist] = neighborRatings[artist] * \
-                                            weight
-               else:
-                  recommendations[artist] = recommendations[artist] + \
-                                            neighborRatings[artist] * \
-                                            weight
-      # now make list from dictionary and only get the first n items
-      recommendations = list(recommendations.items())[:self.n]
-      recommendations = [(self.convertProductID2name(k), v)
-                         for (k, v) in recommendations]
-      # finally sort and return
-      recommendations.sort(key=lambda artistTuple: artistTuple[1],
-                           reverse = True)
-      return recommendations
+    def computeNearestNeighbor(self, username):
+        """creates a sorted list of users based on their distance
+        to username"""
+        distances = []
+        for instance in self.data:
+            if instance != username:
+                distance = self.fn(self.data[username],
+                                   self.data[instance])
+                distances.append((instance, distance))
+        # sort based on distance -- closest first
+        distances.sort(key=lambda artistTuple: artistTuple[1],
+                       reverse=True)
+        return distances
 
+    def recommend(self, user):
+        """Give list of recommendations"""
+        recommendations = {}
+        # first get list of users  ordered by nearness
+        nearest = self.computeNearestNeighbor(user)
+        #
+        # now get the ratings for the user
+        #
+        userRatings = self.data[user]
+        #
+        # determine the total distance
+        totalDistance = 0.0
+        for i in range(self.k):
+            totalDistance += nearest[i][1]
+        # now iterate through the k nearest neighbors
+        # accumulating their ratings
+        for i in range(self.k):
+            # compute slice of pie
+            weight = nearest[i][1] / totalDistance
+            # get the name of the person
+            name = nearest[i][0]
+            # get the ratings for this person
+            neighborRatings = self.data[name]
+            # get the name of the person
+            # now find bands neighbor rated that user didn't
+            for artist in neighborRatings:
+                if artist not in userRatings:
+                    if artist not in recommendations:
+                        recommendations[artist] = (
+                            neighborRatings[artist] * weight
+                        )
+                    else:
+                        recommendations[artist] = (
+                            recommendations[artist] +
+                            neighborRatings[artist] * weight
+                        )
+        # now make list from dictionary and only get the first n items
+        recommendations = list(recommendations.items())[:self.n]
+        recommendations = [(self.convertProductID2name(k), v)
+                           for (k, v) in recommendations]
+        # finally sort and return
+        recommendations.sort(key=lambda artistTuple: artistTuple[1],
+                             reverse=True)
+        return recommendations
diff --git a/ch4/ch4-filteringdata.py b/ch4/ch4-filteringdata.py
index a6d7208..97316c4 100644
--- a/ch4/ch4-filteringdata.py
+++ b/ch4/ch4-filteringdata.py
@@ -2,7 +2,8 @@
 #  ch4-filteringdata.py
 #
 #  Code for the first example from chapter 4.
-#  The only change from the original filteringdata.py is the addition of the music dictionary.
+#  The only change from the original filteringdata.py is the addition of the
+#  music dictionary.
 #
 #  Code file for the book Programmer's Guide to Data Mining
 #  http://guidetodatamining.com
@@ -11,30 +12,94 @@
 
 from math import sqrt
 
-users = {"Angelica": {"Blues Traveler": 3.5, "Broken Bells": 2.0, "Norah Jones": 4.5, "Phoenix": 5.0, "Slightly Stoopid": 1.5, "The Strokes": 2.5, "Vampire Weekend": 2.0},
-         "Bill":{"Blues Traveler": 2.0, "Broken Bells": 3.5, "Deadmau5": 4.0, "Phoenix": 2.0, "Slightly Stoopid": 3.5, "Vampire Weekend": 3.0},
-         "Chan": {"Blues Traveler": 5.0, "Broken Bells": 1.0, "Deadmau5": 1.0, "Norah Jones": 3.0, "Phoenix": 5, "Slightly Stoopid": 1.0},
-         "Dan": {"Blues Traveler": 3.0, "Broken Bells": 4.0, "Deadmau5": 4.5, "Phoenix": 3.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 2.0},
-         "Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0, "The Strokes": 4.0, "Vampire Weekend": 1.0},
-         "Jordyn":  {"Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0, "Phoenix": 5.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 4.0},
-         "Sam": {"Blues Traveler": 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0, "Phoenix": 5.0, "Slightly Stoopid": 4.0, "The Strokes": 5.0},
-         "Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0, "Slightly Stoopid": 2.5, "The Strokes": 3.0}
-        }
+users = {
+    "Angelica": {
+        "Blues Traveler": 3.5, "Broken Bells": 2.0, "Norah Jones": 4.5,
+        "Phoenix": 5.0, "Slightly Stoopid": 1.5, "The Strokes": 2.5,
+        "Vampire Weekend": 2.0
+    },
+    "Bill": {
+        "Blues Traveler": 2.0, "Broken Bells": 3.5, "Deadmau5": 4.0,
+        "Phoenix": 2.0, "Slightly Stoopid": 3.5, "Vampire Weekend": 3.0
+    },
+    "Chan": {
+        "Blues Traveler": 5.0, "Broken Bells": 1.0, "Deadmau5": 1.0,
+        "Norah Jones": 3.0, "Phoenix": 5, "Slightly Stoopid": 1.0
+    },
+    "Dan": {
+        "Blues Traveler": 3.0, "Broken Bells": 4.0, "Deadmau5": 4.5,
+        "Phoenix": 3.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0,
+        "Vampire Weekend": 2.0
+    },
+    "Hailey": {
+        "Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0,
+        "The Strokes": 4.0, "Vampire Weekend": 1.0
+    },
+    "Jordyn": {
+        "Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0,
+        "Phoenix": 5.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0,
+        "Vampire Weekend": 4.0
+    },
+    "Sam": {
+        "Blues Traveler": 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0,
+        "Phoenix": 5.0, "Slightly Stoopid": 4.0, "The Strokes": 5.0
+    },
+    "Veronica": {
+        "Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0,
+        "Slightly Stoopid": 2.5, "The Strokes": 3.0
+    }
+}
+
+music = {
+    "Dr Dog/Fate": {
+        "piano": 2.5, "vocals": 4, "beat": 3.5, "blues": 3, "guitar": 5,
+        "backup vocals": 4, "rap": 1
+    },
+    "Phoenix/Lisztomania": {
+        "piano": 2, "vocals": 5, "beat": 5, "blues": 3, "guitar": 2,
+        "backup vocals": 1, "rap": 1
+    },
+    "Heartless Bastards/Out at Sea": {
+        "piano": 1, "vocals": 5, "beat": 4, "blues": 2, "guitar": 4,
+        "backup vocals": 1, "rap": 1
+    },
+    "Todd Snider/Don't Tempt Me": {
+        "piano": 4, "vocals": 5, "beat": 4, "blues": 4, "guitar": 1,
+        "backup vocals": 5, "rap": 1
+    },
+    "The Black Keys/Magic Potion": {
+        "piano": 1, "vocals": 4, "beat": 5, "blues": 3.5, "guitar": 5,
+        "backup vocals": 1, "rap": 1
+    },
+    "Glee Cast/Jessie's Girl": {
+        "piano": 1, "vocals": 5, "beat": 3.5, "blues": 3, "guitar": 4,
+        "backup vocals": 5, "rap": 1
+    },
+    "La Roux/Bulletproof": {
+        "piano": 5, "vocals": 5, "beat": 4, "blues": 2, "guitar": 1,
+        "backup vocals": 1, "rap": 1
+    },
+    "Mike Posner": {
+        "piano": 2.5, "vocals": 4, "beat": 4, "blues": 1, "guitar": 1,
+        "backup vocals": 1, "rap": 1
+    },
+    "Black Eyed Peas/Rock That Body": {
+        "piano": 2, "vocals": 5, "beat": 5, "blues": 1, "guitar": 2,
+        "backup vocals": 2, "rap": 4
+    },
+    "Lady Gaga/Alejandro": {
+        "piano": 1, "vocals": 5, "beat": 3, "blues": 2, "guitar": 1,
+        "backup vocals": 2, "rap": 1
+    }
+}
 
-music = {"Dr Dog/Fate": {"piano": 2.5, "vocals": 4, "beat": 3.5, "blues": 3, "guitar": 5, "backup vocals": 4, "rap": 1},
-         "Phoenix/Lisztomania": {"piano": 2, "vocals": 5, "beat": 5, "blues": 3, "guitar": 2, "backup vocals": 1, "rap": 1},
-         "Heartless Bastards/Out at Sea": {"piano": 1, "vocals": 5, "beat": 4, "blues": 2, "guitar": 4, "backup vocals": 1, "rap": 1},
-         "Todd Snider/Don't Tempt Me": {"piano": 4, "vocals": 5, "beat": 4, "blues": 4, "guitar": 1, "backup vocals": 5, "rap": 1},
-         "The Black Keys/Magic Potion": {"piano": 1, "vocals": 4, "beat": 5, "blues": 3.5, "guitar": 5, "backup vocals": 1, "rap": 1},
-         "Glee Cast/Jessie's Girl": {"piano": 1, "vocals": 5, "beat": 3.5, "blues": 3, "guitar":4, "backup vocals": 5, "rap": 1},
-         "La Roux/Bulletproof": {"piano": 5, "vocals": 5, "beat": 4, "blues": 2, "guitar": 1, "backup vocals": 1, "rap": 1},
-         "Mike Posner": {"piano": 2.5, "vocals": 4, "beat": 4, "blues": 1, "guitar": 1, "backup vocals": 1, "rap": 1},
-         "Black Eyed Peas/Rock That Body": {"piano": 2, "vocals": 5, "beat": 5, "blues": 1, "guitar": 2, "backup vocals": 2, "rap": 4},
-         "Lady Gaga/Alejandro": {"piano": 1, "vocals": 5, "beat": 3, "blues": 2, "guitar": 1, "backup vocals": 2, "rap": 1}}
 
 def manhattan(rating1, rating2):
-    """Computes the Manhattan distance. Both rating1 and rating2 are dictionaries
-       of the form {'The Strokes': 3.0, 'Slightly Stoopid': 2.5}"""
+    """Computes the Manhattan distance.
+
+    Both rating1 and rating2 are dictionaries of the form:
+    {'The Strokes': 3.0, 'Slightly Stoopid': 2.5}
+    """
     distance = 0
     total = 0
     for key in rating1:
@@ -44,7 +109,6 @@ def manhattan(rating1, rating2):
     return distance
 
 
-
 def computeNearestNeighbor(username, users):
     """creates a sorted list of users based on their distance to username"""
     distances = []
@@ -56,6 +120,7 @@ def computeNearestNeighbor(username, users):
     distances.sort()
     return distances
 
+
 def recommend(username, users):
     """Give list of recommendations"""
     # first find nearest neighbor
@@ -66,8 +131,11 @@ def recommend(username, users):
     neighborRatings = users[nearest]
     userRatings = users[username]
     for artist in neighborRatings:
-        if not artist in userRatings:
+        if artist not in userRatings:
             recommendations.append((artist, neighborRatings[artist]))
     # using the fn sorted for variety - sort is more efficient
-    return sorted(recommendations, key=lambda artistTuple: artistTuple[1], reverse = True)
-
+    return sorted(
+        recommendations,
+        key=lambda artistTuple: artistTuple[1],
+        reverse=True
+    )
diff --git a/ch4/classifyTemplate.py b/ch4/classifyTemplate.py
index 9adfdc6..758f2a3 100644
--- a/ch4/classifyTemplate.py
+++ b/ch4/classifyTemplate.py
@@ -1,5 +1,5 @@
 #
-#  Classify Template 
+#  Classify Template
 #
 #  Finish the code for the method, nearestNeighbor
 #
@@ -10,13 +10,12 @@
 #
 
 
-
 class Classifier:
 
     def __init__(self, filename):
 
         self.medianAndDeviation = []
-        
+
         # reading the data in from the file
         f = open(filename)
         lines = f.readlines()
@@ -41,13 +40,10 @@ def __init__(self, filename):
         # now normalize the data
         for i in range(self.vlen):
             self.normalizeColumn(i)
-        
 
-        
-    
     ##################################################
     ###
-    ###  CODE TO COMPUTE THE MODIFIED STANDARD SCORE
+    #  CODE TO COMPUTE THE MODIFIED STANDARD SCORE
 
     def getMedian(self, alist):
         """return median of alist"""
@@ -57,13 +53,12 @@ def getMedian(self, alist):
         length = len(alist)
         if length % 2 == 1:
             # length of list is odd so return middle element
-            return blist[int(((length + 1) / 2) -  1)]
+            return blist[int(((length + 1) / 2) - 1)]
         else:
             # length of list is even so compute midpoint
             v1 = blist[int(length / 2)]
-            v2 =blist[(int(length / 2) - 1)]
+            v2 = blist[(int(length / 2) - 1)]
             return (v1 + v2) / 2.0
-        
 
     def getAbsoluteStandardDeviation(self, alist, median):
         """given alist and median return absolute standard deviation"""
@@ -72,18 +67,16 @@ def getAbsoluteStandardDeviation(self, alist, median):
             sum += abs(item - median)
         return sum / len(alist)
 
-
     def normalizeColumn(self, columnNumber):
-       """given a column number, normalize that column in self.data"""
-       # first extract values to list
-       col = [v[1][columnNumber] for v in self.data]
-       median = self.getMedian(col)
-       asd = self.getAbsoluteStandardDeviation(col, median)
-       #print("Median: %f   ASD = %f" % (median, asd))
-       self.medianAndDeviation.append((median, asd))
-       for v in self.data:
-           v[1][columnNumber] = (v[1][columnNumber] - median) / asd
-
+        """given a column number, normalize that column in self.data"""
+        # first extract values to list
+        col = [v[1][columnNumber] for v in self.data]
+        median = self.getMedian(col)
+        asd = self.getAbsoluteStandardDeviation(col, median)
+        # print("Median: %f   ASD = %f" % (median, asd))
+        self.medianAndDeviation.append((median, asd))
+        for v in self.data:
+            v[1][columnNumber] = (v[1][columnNumber] - median) / asd
 
     def normalizeVector(self, v):
         """We have stored the median and asd for each column.
@@ -94,27 +87,23 @@ def normalizeVector(self, v):
             vector[i] = (vector[i] - median) / asd
         return vector
 
-    
     ###
-    ### END NORMALIZATION
+    # END NORMALIZATION
     ##################################################
 
-
-
     def manhattan(self, vector1, vector2):
         """Computes the Manhattan distance."""
         return sum(map(lambda v1, v2: abs(v1 - v2), vector1, vector2))
 
-
     def nearestNeighbor(self, itemVector):
         """return nearest neighbor to itemVector"""
-        
+
         return ((0, ("REPLACE THIS LINE WITH CORRECT RETURN", [0], [])))
-    
+
     def classify(self, itemVector):
         """Return class we think item Vector is in"""
         return(self.nearestNeighbor(self.normalizeVector(itemVector))[1][0])
- 
+
 
 def unitTest():
     classifier = Classifier('athletesTrainingSet.txt')
@@ -129,16 +118,19 @@ def unitTest():
     assert(nlNorm == classifier.data[-1][1])
     print('normalizeVector fn OK')
     # check distance
-    assert (round(classifier.manhattan(clNorm, classifier.data[1][1]), 5) == 1.16823)
+    assert (
+        round(classifier.manhattan(clNorm, classifier.data[1][1]), 5) ==
+        1.16823
+    )
     assert(classifier.manhattan(brNorm, classifier.data[1][1]) == 0)
     assert(classifier.manhattan(nlNorm, classifier.data[-1][1]) == 0)
     print('Manhattan distance fn OK')
     # Brittainey Raven's nearest neighbor should be herself
     result = classifier.nearestNeighbor(brNorm)
-    assert(result[1][2]== br[2])
+    assert(result[1][2] == br[2])
     # Nastia Liukin's nearest neighbor should be herself
     result = classifier.nearestNeighbor(nlNorm)
-    assert(result[1][2]== nl[2])
+    assert(result[1][2] == nl[2])
     # Crystal Langhorne's nearest neighbor is Jennifer Lacy"
     assert(classifier.nearestNeighbor(clNorm)[1][2][0] == "Jennifer Lacy")
     print("Nearest Neighbor fn OK")
@@ -149,6 +141,4 @@ def unitTest():
     print('Classify fn OK')
 
 
-
 unitTest()
-    
diff --git a/ch4/nearestNeighborClassifier.py b/ch4/nearestNeighborClassifier.py
index f96ca56..a563c49 100644
--- a/ch4/nearestNeighborClassifier.py
+++ b/ch4/nearestNeighborClassifier.py
@@ -1,5 +1,5 @@
 #
-#  Nearest Neighbor Classifier 
+#  Nearest Neighbor Classifier
 #
 #
 #  Code file for the book Programmer's Guide to Data Mining
@@ -9,51 +9,51 @@
 #
 
 
-##   I am trying to make the classifier more general purpose
-##   by reading the data from a file.
-##   Each line of the file contains tab separated fields.
-##   The first line of the file describes how those fields (columns) should
-##   be interpreted. The descriptors in the fields of the first line are:
-##
-##        comment   -  this field should be interpreted as a comment
-##        class     -  this field describes the class of the field
-##        num       -  this field describes an integer attribute that should 
-##                     be included in the computation.
-##
-##        more to be described as needed
-## 
-##
-##    So, for example, if our file describes athletes and is of the form:
-##    Shavonte Zellous   basketball  70  155
-##    The first line might be:
-##    comment   class  num   num
-##
-##    Meaning the first column (name of the player) should be considered a comment; 
-##    the next column represents the class of the entry (the sport); 
-##    and the next 2 represent attributes to use in the calculations.
-##
-##    The classifer reads this file into the list called data.
-##    The format of each entry in that list is a tuple
-##  
-##    (class, normalized attribute-list, comment-list)
-##
-##    so, for example
-##
-##   [('basketball', [1.28, 1.71], ['Brittainey Raven']),
-##    ('basketball', [0.89, 1.47], ['Shavonte Zellous']),
-##    ('gymnastics', [-1.68, -0.75], ['Shawn Johnson']),
-##    ('gymnastics', [-2.27, -1.2], ['Ksenia Semenova']),
-##    ('track', [0.09, -0.06], ['Blake Russell'])]
-##
-   
-            
+#   I am trying to make the classifier more general purpose
+#   by reading the data from a file.
+#   Each line of the file contains tab separated fields.
+#   The first line of the file describes how those fields (columns) should
+#   be interpreted. The descriptors in the fields of the first line are:
+#
+#        comment   -  this field should be interpreted as a comment
+#        class     -  this field describes the class of the field
+#        num       -  this field describes an integer attribute that should
+#                     be included in the computation.
+#
+#        more to be described as needed
+#
+#
+#    So, for example, if our file describes athletes and is of the form:
+#    Shavonte Zellous   basketball  70  155
+#    The first line might be:
+#    comment   class  num   num
+#
+#    Meaning the first column (name of the player) should be considered a
+#    comment;
+#    the next column represents the class of the entry (the sport);
+#    and the next 2 represent attributes to use in the calculations.
+#
+#    The classifer reads this file into the list called data.
+#    The format of each entry in that list is a tuple
+#
+#    (class, normalized attribute-list, comment-list)
+#
+#    so, for example
+#
+#   [('basketball', [1.28, 1.71], ['Brittainey Raven']),
+#    ('basketball', [0.89, 1.47], ['Shavonte Zellous']),
+#    ('gymnastics', [-1.68, -0.75], ['Shawn Johnson']),
+#    ('gymnastics', [-2.27, -1.2], ['Ksenia Semenova']),
+#    ('track', [0.09, -0.06], ['Blake Russell'])]
+#
+
 
 class Classifier:
 
     def __init__(self, filename):
 
         self.medianAndDeviation = []
-        
+
         # reading the data in from the file
         f = open(filename)
         lines = f.readlines()
@@ -78,13 +78,10 @@ def __init__(self, filename):
         # now normalize the data
         for i in range(self.vlen):
             self.normalizeColumn(i)
-        
 
-        
-    
     ##################################################
     ###
-    ###  CODE TO COMPUTE THE MODIFIED STANDARD SCORE
+    #  CODE TO COMPUTE THE MODIFIED STANDARD SCORE
 
     def getMedian(self, alist):
         """return median of alist"""
@@ -94,13 +91,12 @@ def getMedian(self, alist):
         length = len(alist)
         if length % 2 == 1:
             # length of list is odd so return middle element
-            return blist[int(((length + 1) / 2) -  1)]
+            return blist[int(((length + 1) / 2) - 1)]
         else:
             # length of list is even so compute midpoint
             v1 = blist[int(length / 2)]
-            v2 =blist[(int(length / 2) - 1)]
+            v2 = blist[(int(length / 2) - 1)]
             return (v1 + v2) / 2.0
-        
 
     def getAbsoluteStandardDeviation(self, alist, median):
         """given alist and median return absolute standard deviation"""
@@ -109,18 +105,16 @@ def getAbsoluteStandardDeviation(self, alist, median):
             sum += abs(item - median)
         return sum / len(alist)
 
-
     def normalizeColumn(self, columnNumber):
-       """given a column number, normalize that column in self.data"""
-       # first extract values to list
-       col = [v[1][columnNumber] for v in self.data]
-       median = self.getMedian(col)
-       asd = self.getAbsoluteStandardDeviation(col, median)
-       #print("Median: %f   ASD = %f" % (median, asd))
-       self.medianAndDeviation.append((median, asd))
-       for v in self.data:
-           v[1][columnNumber] = (v[1][columnNumber] - median) / asd
-
+        """given a column number, normalize that column in self.data"""
+        # first extract values to list
+        col = [v[1][columnNumber] for v in self.data]
+        median = self.getMedian(col)
+        asd = self.getAbsoluteStandardDeviation(col, median)
+        # print("Median: %f   ASD = %f" % (median, asd))
+        self.medianAndDeviation.append((median, asd))
+        for v in self.data:
+            v[1][columnNumber] = (v[1][columnNumber] - median) / asd
 
     def normalizeVector(self, v):
         """We have stored the median and asd for each column.
@@ -131,27 +125,23 @@ def normalizeVector(self, v):
             vector[i] = (vector[i] - median) / asd
         return vector
 
-    
     ###
-    ### END NORMALIZATION
+    # END NORMALIZATION
     ##################################################
 
-
-
     def manhattan(self, vector1, vector2):
         """Computes the Manhattan distance."""
         return sum(map(lambda v1, v2: abs(v1 - v2), vector1, vector2))
 
-
     def nearestNeighbor(self, itemVector):
         """return nearest neighbor to itemVector"""
-        return min([ (self.manhattan(itemVector, item[1]), item)
-                     for item in self.data])
-    
+        return min([(self.manhattan(itemVector, item[1]), item)
+                    for item in self.data])
+
     def classify(self, itemVector):
         """Return class we think item Vector is in"""
         return(self.nearestNeighbor(self.normalizeVector(itemVector))[1][0])
- 
+
 
 def unitTest():
     classifier = Classifier('athletesTrainingSet.txt')
@@ -166,16 +156,19 @@ def unitTest():
     assert(nlNorm == classifier.data[-1][1])
     print('normalizeVector fn OK')
     # check distance
-    assert (round(classifier.manhattan(clNorm, classifier.data[1][1]), 5) == 1.16823)
+    assert (
+        round(classifier.manhattan(clNorm, classifier.data[1][1]), 5) ==
+        1.16823
+    )
     assert(classifier.manhattan(brNorm, classifier.data[1][1]) == 0)
     assert(classifier.manhattan(nlNorm, classifier.data[-1][1]) == 0)
     print('Manhattan distance fn OK')
     # Brittainey Raven's nearest neighbor should be herself
     result = classifier.nearestNeighbor(brNorm)
-    assert(result[1][2]== br[2])
+    assert(result[1][2] == br[2])
     # Nastia Liukin's nearest neighbor should be herself
     result = classifier.nearestNeighbor(nlNorm)
-    assert(result[1][2]== nl[2])
+    assert(result[1][2] == nl[2])
     # Crystal Langhorne's nearest neighbor is Jennifer Lacy"
     assert(classifier.nearestNeighbor(clNorm)[1][2][0] == "Jennifer Lacy")
     print("Nearest Neighbor fn OK")
@@ -185,6 +178,7 @@ def unitTest():
     assert(classifier.classify(nl[1]) == 'Gymnastics')
     print('Classify fn OK')
 
+
 def test(training_filename, test_filename):
     """Test the classifier on a test set of data"""
     classifier = Classifier(training_filename)
@@ -197,24 +191,23 @@ def test(training_filename, test_filename):
         vector = []
         classInColumn = -1
         for i in range(len(classifier.format)):
-              if classifier.format[i] == 'num':
-                  vector.append(float(data[i]))
-              elif classifier.format[i] == 'class':
-                  classInColumn = i
-        theClass= classifier.classify(vector)
+            if classifier.format[i] == 'num':
+                vector.append(float(data[i]))
+            elif classifier.format[i] == 'class':
+                classInColumn = i
+        theClass = classifier.classify(vector)
         prefix = '-'
         if theClass == data[classInColumn]:
             # it is correct
             numCorrect += 1
             prefix = '+'
         print("%s  %12s  %s" % (prefix, theClass, line))
-    print("%4.2f%% correct" % (numCorrect * 100/ len(lines)))
-        
+    print("%4.2f%% correct" % (numCorrect * 100 / len(lines)))
+
 
 ##
-##  Here are examples of how the classifier is used on different data sets
-##  in the book.
+#  Here are examples of how the classifier is used on different data sets
+#  in the book.
 #  test('athletesTrainingSet.txt', 'athletesTestSet.txt')
 #  test("irisTrainingSet.data", "irisTestSet.data")
 #  test("mpgTrainingSet.txt", "mpgTestSet.txt")
-    
diff --git a/ch4/normalizeColumnTemplate.py b/ch4/normalizeColumnTemplate.py
index 56a228c..23ba985 100644
--- a/ch4/normalizeColumnTemplate.py
+++ b/ch4/normalizeColumnTemplate.py
@@ -1,5 +1,5 @@
 #
-#  normalize column 
+#  normalize column
 #
 #  This is the template for you to write and test the method
 #
@@ -13,14 +13,12 @@
 #  Ron Zacharski
 #
 
-   
-            
 
 class Classifier:
 
     def __init__(self, filename):
 
-        self.medianAndDeviation = []       
+        self.medianAndDeviation = []
         # reading the data in from the file
         f = open(filename)
         lines = f.readlines()
@@ -45,9 +43,6 @@ def __init__(self, filename):
         # now normalize the data
         for i in range(self.vlen):
             self.normalizeColumn(i)
-        
-
-    
 
     def getMedian(self, alist):
         """return median of alist"""
@@ -57,13 +52,12 @@ def getMedian(self, alist):
         length = len(alist)
         if length % 2 == 1:
             # length of list is odd so return middle element
-            return blist[int(((length + 1) / 2) -  1)]
+            return blist[int(((length + 1) / 2) - 1)]
         else:
             # length of list is even so compute midpoint
             v1 = blist[int(length / 2)]
-            v2 =blist[(int(length / 2) - 1)]
+            v2 = blist[(int(length / 2) - 1)]
             return (v1 + v2) / 2.0
-        
 
     def getAbsoluteStandardDeviation(self, alist, median):
         """given alist and median return absolute standard deviation"""
@@ -72,26 +66,21 @@ def getAbsoluteStandardDeviation(self, alist, median):
             sum += abs(item - median)
         return sum / len(alist)
 
-
     ##################################################
     ###
-    ### FINISH WRITING THIS METHOD
+    # FINISH WRITING THIS METHOD
 
-    
     def normalizeColumn(self, columnNumber):
-       """given a column number, normalize that column in self.data
-       using the Modified Standard Score"""
-
-       """ TO BE DONE"""
+        """given a column number, normalize that column in self.data
+        using the Modified Standard Score"""
 
+        """ TO BE DONE"""
 
-    
     ###
-    ### 
+    #
     ##################################################
 
 
-
 def unitTest():
     classifier = Classifier('athletesTrainingSet.txt')
     #
@@ -104,8 +93,12 @@ def unitTest():
     assert(round(m1, 3) == 65.5)
     m2 = classifier.getMedian(list2)
     assert(round(m2, 3) == 107)
-    assert(round(classifier.getAbsoluteStandardDeviation(list1, m1),3) == 5.95)
-    assert(round(classifier.getAbsoluteStandardDeviation(list2, m2),3) == 33.65)
+    assert(
+        round(classifier.getAbsoluteStandardDeviation(list1, m1), 3) == 5.95
+    )
+    assert(
+        round(classifier.getAbsoluteStandardDeviation(list2, m2), 3) == 33.65
+    )
     print("getMedian and getAbsoluteStandardDeviation are OK")
 
     # test normalizeColumn
@@ -116,11 +109,10 @@ def unitTest():
              [-1.2605, -0.8915], [0.7563, 0.0297], [0.7563, 1.4264],
              [0.7563, 1.4264], [-0.4202, 0.0297], [-0.084, -0.0297],
              [0.084, -0.2972], [-0.7563, -0.9212]]
-    
 
     for i in range(len(list1)):
-        assert(round(classifier.data[i][1][0],4) == list1[i][0])
-        assert(round(classifier.data[i][1][1],4) == list1[i][1])
+        assert(round(classifier.data[i][1][0], 4) == list1[i][0])
+        assert(round(classifier.data[i][1][1], 4) == list1[i][1])
     print("normalizeColumn is OK")
-     
+
 unitTest()
diff --git a/ch4/testMedianAndASD.py b/ch4/testMedianAndASD.py
index 1ec2d27..189f2a6 100644
--- a/ch4/testMedianAndASD.py
+++ b/ch4/testMedianAndASD.py
@@ -5,15 +5,14 @@
 #
 # also download the file athletesTrainingSet.txt, which you should
 # put in the same folder as this file.
-   
-            
+
 
 class Classifier:
 
     def __init__(self, filename):
 
         self.medianAndDeviation = []
-        
+
         # reading the data in from the file
         f = open(filename)
         lines = f.readlines()
@@ -33,20 +32,16 @@ def __init__(self, filename):
                     classification = fields[i]
             self.data.append((classification, vector, ignore))
         self.rawData = list(self.data)
-        
 
-        
-    
     ##################################################
     ###
-    ###  FINISH THE FOLLOWING TWO METHODS
+    # FINISH THE FOLLOWING TWO METHODS
 
     def getMedian(self, alist):
         """return median of alist"""
 
         """TO BE DONE"""
         return 0
-        
 
     def getAbsoluteStandardDeviation(self, alist, median):
         """given alist and median return absolute standard deviation"""
@@ -54,13 +49,11 @@ def getAbsoluteStandardDeviation(self, alist, median):
         """TO BE DONE"""
         return 0
 
-    
     ###
-    ### 
+    #
     ##################################################
 
 
-
 def unitTest():
     list1 = [54, 72, 78, 49, 65, 63, 75, 67, 54]
     list2 = [54, 72, 78, 49, 65, 63, 75, 67, 54, 68]
@@ -83,8 +76,7 @@ def unitTest():
     assert(round(asd2, 3) == 7.5)
     assert(round(asd3, 3) == 0)
     assert(round(asd4, 3) == 1.5)
-    
+
     print("getMedian and getAbsoluteStandardDeviation work correctly")
 
 unitTest()
-    
diff --git a/ch5/crossValidation.py b/ch5/crossValidation.py
index b98b30e..f360e0a 100644
--- a/ch5/crossValidation.py
+++ b/ch5/crossValidation.py
@@ -1,6 +1,6 @@
-#  
-# 
-#  Nearest Neighbor Classifier for mpg dataset 
+#
+#
+#  Nearest Neighbor Classifier for mpg dataset
 #
 #  for chapter 5 page 14
 #
@@ -11,6 +11,7 @@
 #
 import copy
 
+
 class Classifier:
     def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
 
@@ -21,11 +22,11 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
 
         "class	num	num	num	num	num	comment"
         """
-   
+
         self.medianAndDeviation = []
-        
+
         # reading the data in from the file
- 
+
         self.format = dataFormat.strip().split('\t')
         self.data = []
         # for each of the buckets numbered 1 through 10:
@@ -41,7 +42,7 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                     ignore = []
                     vector = []
                     for j in range(len(fields)):
-                        
+
                         if self.format[j] == 'num':
                             vector.append(float(fields[j]))
                         elif self.format[j] == 'comment':
@@ -55,13 +56,10 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         # now normalize the data
         for i in range(self.vlen):
             self.normalizeColumn(i)
-        
 
-        
-    
     ##################################################
     ###
-    ###  CODE TO COMPUTE THE MODIFIED STANDARD SCORE
+    #  CODE TO COMPUTE THE MODIFIED STANDARD SCORE
 
     def getMedian(self, alist):
         """return median of alist"""
@@ -71,13 +69,12 @@ def getMedian(self, alist):
         length = len(alist)
         if length % 2 == 1:
             # length of list is odd so return middle element
-            return blist[int(((length + 1) / 2) -  1)]
+            return blist[int(((length + 1) / 2) - 1)]
         else:
             # length of list is even so compute midpoint
             v1 = blist[int(length / 2)]
-            v2 =blist[(int(length / 2) - 1)]
+            v2 = blist[(int(length / 2) - 1)]
             return (v1 + v2) / 2.0
-        
 
     def getAbsoluteStandardDeviation(self, alist, median):
         """given alist and median return absolute standard deviation"""
@@ -86,18 +83,16 @@ def getAbsoluteStandardDeviation(self, alist, median):
             sum += abs(item - median)
         return sum / len(alist)
 
-
     def normalizeColumn(self, columnNumber):
-       """given a column number, normalize that column in self.data"""
-       # first extract values to list
-       col = [v[1][columnNumber] for v in self.data]
-       median = self.getMedian(col)
-       asd = self.getAbsoluteStandardDeviation(col, median)
-       #print("Median: %f   ASD = %f" % (median, asd))
-       self.medianAndDeviation.append((median, asd))
-       for v in self.data:
-           v[1][columnNumber] = (v[1][columnNumber] - median) / asd
-
+        """given a column number, normalize that column in self.data"""
+        # first extract values to list
+        col = [v[1][columnNumber] for v in self.data]
+        median = self.getMedian(col)
+        asd = self.getAbsoluteStandardDeviation(col, median)
+        # print("Median: %f   ASD = %f" % (median, asd))
+        self.medianAndDeviation.append((median, asd))
+        for v in self.data:
+            v[1][columnNumber] = (v[1][columnNumber] - median) / asd
 
     def normalizeVector(self, v):
         """We have stored the median and asd for each column.
@@ -107,14 +102,15 @@ def normalizeVector(self, v):
             (median, asd) = self.medianAndDeviation[i]
             vector[i] = (vector[i] - median) / asd
         return vector
+
     ###
-    ### END NORMALIZATION
+    # END NORMALIZATION
     ##################################################
 
     def testBucket(self, bucketPrefix, bucketNumber):
         """Evaluate the classifier with data from the file
         bucketPrefix-bucketNumber"""
-        
+
         filename = "%s-%02i" % (bucketPrefix, bucketNumber)
         f = open(filename)
         lines = f.readlines()
@@ -125,10 +121,10 @@ def testBucket(self, bucketPrefix, bucketNumber):
             vector = []
             classInColumn = -1
             for i in range(len(self.format)):
-                  if self.format[i] == 'num':
-                      vector.append(float(data[i]))
-                  elif self.format[i] == 'class':
-                      classInColumn = i
+                if self.format[i] == 'num':
+                    vector.append(float(data[i]))
+                elif self.format[i] == 'class':
+                    classInColumn = i
             theRealClass = data[classInColumn]
             classifiedAs = self.classify(vector)
             totals.setdefault(theRealClass, {})
@@ -136,24 +132,20 @@ def testBucket(self, bucketPrefix, bucketNumber):
             totals[theRealClass][classifiedAs] += 1
         return totals
 
-
-
     def manhattan(self, vector1, vector2):
         """Computes the Manhattan distance."""
         return sum(map(lambda v1, v2: abs(v1 - v2), vector1, vector2))
 
-
     def nearestNeighbor(self, itemVector):
         """return nearest neighbor to itemVector"""
-        return min([ (self.manhattan(itemVector, item[1]), item)
-                     for item in self.data])
-    
+        return min([(self.manhattan(itemVector, item[1]), item)
+                    for item in self.data])
+
     def classify(self, itemVector):
         """Return class we think item Vector is in"""
         return(self.nearestNeighbor(self.normalizeVector(itemVector))[1][0])
- 
 
-       
+
 def tenfold(bucketPrefix, dataFormat):
     results = {}
     for i in range(1, 11):
@@ -164,18 +156,18 @@ def tenfold(bucketPrefix, dataFormat):
             for (ckey, cvalue) in value.items():
                 results[key].setdefault(ckey, 0)
                 results[key][ckey] += cvalue
-                
+
     # now print results
     categories = list(results.keys())
     categories.sort()
-    print(   "\n       Classified as: ")
-    header =    "        "
+    print("\n       Classified as: ")
+    header = "        "
     subheader = "      +"
     for category in categories:
         header += category + "   "
         subheader += "----+"
-    print (header)
-    print (subheader)
+    print(header)
+    print(subheader)
     total = 0.0
     correct = 0.0
     for category in categories:
@@ -191,9 +183,8 @@ def tenfold(bucketPrefix, dataFormat):
                 correct += count
         print(row)
     print(subheader)
-    print("\n%5.3f percent correct" %((correct * 100) / total))
+    print("\n%5.3f percent correct" % ((correct * 100) / total))
     print("total of %i instances" % total)
 
 
-tenfold("../../data/mpgData/mpgData",        "class	num	num	num	num	num	comment")
-
+tenfold("../../data/mpgData/mpgData", "class	num	num	num	num	num	comment")
diff --git a/ch5/divide.py b/ch5/divide.py
index 9048c84..86fb207 100644
--- a/ch5/divide.py
+++ b/ch5/divide.py
@@ -1,6 +1,7 @@
 # divide data into 10 buckets
 import random
 
+
 def buckets(filename, bucketName, separator, classColumn):
     """the original data is in the file named filename
     bucketName is the prefix for all the bucket names
@@ -24,10 +25,10 @@ def buckets(filename, bucketName, separator, classColumn):
     # initialize the buckets
     buckets = []
     for i in range(numberOfBuckets):
-        buckets.append([])       
+        buckets.append([])
     # now for each category put the data into the buckets
     for k in data.keys():
-        #randomize order of instances for each class
+        # randomize order of instances for each class
         random.shuffle(data[k])
         bNum = 0
         # divide into buckets
@@ -42,5 +43,5 @@ def buckets(filename, bucketName, separator, classColumn):
             f.write(item)
         f.close()
 
-# example of how to use this code          
-buckets("pimaSmall.txt", 'pimaSmall',',',8)
+# example of how to use this code
+buckets("pimaSmall.txt", 'pimaSmall', ',', 8)
diff --git a/ch5/nearestNeighborClassifier.py b/ch5/nearestNeighborClassifier.py
index f96ca56..2322004 100644
--- a/ch5/nearestNeighborClassifier.py
+++ b/ch5/nearestNeighborClassifier.py
@@ -1,5 +1,5 @@
 #
-#  Nearest Neighbor Classifier 
+#  Nearest Neighbor Classifier
 #
 #
 #  Code file for the book Programmer's Guide to Data Mining
@@ -9,51 +9,51 @@
 #
 
 
-##   I am trying to make the classifier more general purpose
-##   by reading the data from a file.
-##   Each line of the file contains tab separated fields.
-##   The first line of the file describes how those fields (columns) should
-##   be interpreted. The descriptors in the fields of the first line are:
-##
-##        comment   -  this field should be interpreted as a comment
-##        class     -  this field describes the class of the field
-##        num       -  this field describes an integer attribute that should 
-##                     be included in the computation.
-##
-##        more to be described as needed
-## 
-##
-##    So, for example, if our file describes athletes and is of the form:
-##    Shavonte Zellous   basketball  70  155
-##    The first line might be:
-##    comment   class  num   num
-##
-##    Meaning the first column (name of the player) should be considered a comment; 
-##    the next column represents the class of the entry (the sport); 
-##    and the next 2 represent attributes to use in the calculations.
-##
-##    The classifer reads this file into the list called data.
-##    The format of each entry in that list is a tuple
-##  
-##    (class, normalized attribute-list, comment-list)
-##
-##    so, for example
-##
-##   [('basketball', [1.28, 1.71], ['Brittainey Raven']),
-##    ('basketball', [0.89, 1.47], ['Shavonte Zellous']),
-##    ('gymnastics', [-1.68, -0.75], ['Shawn Johnson']),
-##    ('gymnastics', [-2.27, -1.2], ['Ksenia Semenova']),
-##    ('track', [0.09, -0.06], ['Blake Russell'])]
-##
-   
-            
+#   I am trying to make the classifier more general purpose
+#   by reading the data from a file.
+#   Each line of the file contains tab separated fields.
+#   The first line of the file describes how those fields (columns) should
+#   be interpreted. The descriptors in the fields of the first line are:
+#
+#        comment   -  this field should be interpreted as a comment
+#        class     -  this field describes the class of the field
+#        num       -  this field describes an integer attribute that should
+#                     be included in the computation.
+#
+#        more to be described as needed
+#
+#
+#    So, for example, if our file describes athletes and is of the form:
+#    Shavonte Zellous   basketball  70  155
+#    The first line might be:
+#    comment   class  num   num
+#
+#    Meaning the first column (name of the player) should be considered a
+#    comment;
+#    the next column represents the class of the entry (the sport);
+#    and the next 2 represent attributes to use in the calculations.
+#
+#    The classifer reads this file into the list called data.
+#    The format of each entry in that list is a tuple
+#
+#    (class, normalized attribute-list, comment-list)
+#
+#    so, for example
+#
+#   [('basketball', [1.28, 1.71], ['Brittainey Raven']),
+#    ('basketball', [0.89, 1.47], ['Shavonte Zellous']),
+#    ('gymnastics', [-1.68, -0.75], ['Shawn Johnson']),
+#    ('gymnastics', [-2.27, -1.2], ['Ksenia Semenova']),
+#    ('track', [0.09, -0.06], ['Blake Russell'])]
+#
+
 
 class Classifier:
 
     def __init__(self, filename):
 
         self.medianAndDeviation = []
-        
+
         # reading the data in from the file
         f = open(filename)
         lines = f.readlines()
@@ -78,13 +78,10 @@ def __init__(self, filename):
         # now normalize the data
         for i in range(self.vlen):
             self.normalizeColumn(i)
-        
 
-        
-    
     ##################################################
     ###
-    ###  CODE TO COMPUTE THE MODIFIED STANDARD SCORE
+    #  CODE TO COMPUTE THE MODIFIED STANDARD SCORE
 
     def getMedian(self, alist):
         """return median of alist"""
@@ -94,13 +91,12 @@ def getMedian(self, alist):
         length = len(alist)
         if length % 2 == 1:
             # length of list is odd so return middle element
-            return blist[int(((length + 1) / 2) -  1)]
+            return blist[int(((length + 1) / 2) - 1)]
         else:
             # length of list is even so compute midpoint
             v1 = blist[int(length / 2)]
-            v2 =blist[(int(length / 2) - 1)]
+            v2 = blist[(int(length / 2) - 1)]
             return (v1 + v2) / 2.0
-        
 
     def getAbsoluteStandardDeviation(self, alist, median):
         """given alist and median return absolute standard deviation"""
@@ -109,18 +105,16 @@ def getAbsoluteStandardDeviation(self, alist, median):
             sum += abs(item - median)
         return sum / len(alist)
 
-
     def normalizeColumn(self, columnNumber):
-       """given a column number, normalize that column in self.data"""
-       # first extract values to list
-       col = [v[1][columnNumber] for v in self.data]
-       median = self.getMedian(col)
-       asd = self.getAbsoluteStandardDeviation(col, median)
-       #print("Median: %f   ASD = %f" % (median, asd))
-       self.medianAndDeviation.append((median, asd))
-       for v in self.data:
-           v[1][columnNumber] = (v[1][columnNumber] - median) / asd
-
+        """given a column number, normalize that column in self.data"""
+        # first extract values to list
+        col = [v[1][columnNumber] for v in self.data]
+        median = self.getMedian(col)
+        asd = self.getAbsoluteStandardDeviation(col, median)
+        # print("Median: %f   ASD = %f" % (median, asd))
+        self.medianAndDeviation.append((median, asd))
+        for v in self.data:
+            v[1][columnNumber] = (v[1][columnNumber] - median) / asd
 
     def normalizeVector(self, v):
         """We have stored the median and asd for each column.
@@ -131,27 +125,23 @@ def normalizeVector(self, v):
             vector[i] = (vector[i] - median) / asd
         return vector
 
-    
     ###
-    ### END NORMALIZATION
+    # END NORMALIZATION
     ##################################################
 
-
-
     def manhattan(self, vector1, vector2):
         """Computes the Manhattan distance."""
         return sum(map(lambda v1, v2: abs(v1 - v2), vector1, vector2))
 
-
     def nearestNeighbor(self, itemVector):
         """return nearest neighbor to itemVector"""
-        return min([ (self.manhattan(itemVector, item[1]), item)
-                     for item in self.data])
-    
+        return min([(self.manhattan(itemVector, item[1]), item)
+                    for item in self.data])
+
     def classify(self, itemVector):
         """Return class we think item Vector is in"""
         return(self.nearestNeighbor(self.normalizeVector(itemVector))[1][0])
- 
+
 
 def unitTest():
     classifier = Classifier('athletesTrainingSet.txt')
@@ -166,16 +156,19 @@ def unitTest():
     assert(nlNorm == classifier.data[-1][1])
     print('normalizeVector fn OK')
     # check distance
-    assert (round(classifier.manhattan(clNorm, classifier.data[1][1]), 5) == 1.16823)
+    assert (
+        round(classifier.manhattan(clNorm, classifier.data[1][1]), 5) ==
+        1.16823
+    )
     assert(classifier.manhattan(brNorm, classifier.data[1][1]) == 0)
     assert(classifier.manhattan(nlNorm, classifier.data[-1][1]) == 0)
     print('Manhattan distance fn OK')
     # Brittainey Raven's nearest neighbor should be herself
     result = classifier.nearestNeighbor(brNorm)
-    assert(result[1][2]== br[2])
+    assert(result[1][2] == br[2])
     # Nastia Liukin's nearest neighbor should be herself
     result = classifier.nearestNeighbor(nlNorm)
-    assert(result[1][2]== nl[2])
+    assert(result[1][2] == nl[2])
     # Crystal Langhorne's nearest neighbor is Jennifer Lacy"
     assert(classifier.nearestNeighbor(clNorm)[1][2][0] == "Jennifer Lacy")
     print("Nearest Neighbor fn OK")
@@ -185,6 +178,7 @@ def unitTest():
     assert(classifier.classify(nl[1]) == 'Gymnastics')
     print('Classify fn OK')
 
+
 def test(training_filename, test_filename):
     """Test the classifier on a test set of data"""
     classifier = Classifier(training_filename)
@@ -197,24 +191,23 @@ def test(training_filename, test_filename):
         vector = []
         classInColumn = -1
         for i in range(len(classifier.format)):
-              if classifier.format[i] == 'num':
-                  vector.append(float(data[i]))
-              elif classifier.format[i] == 'class':
-                  classInColumn = i
-        theClass= classifier.classify(vector)
+            if classifier.format[i] == 'num':
+                vector.append(float(data[i]))
+            elif classifier.format[i] == 'class':
+                classInColumn = i
+        theClass = classifier.classify(vector)
         prefix = '-'
         if theClass == data[classInColumn]:
             # it is correct
             numCorrect += 1
             prefix = '+'
         print("%s  %12s  %s" % (prefix, theClass, line))
-    print("%4.2f%% correct" % (numCorrect * 100/ len(lines)))
-        
+    print("%4.2f%% correct" % (numCorrect * 100 / len(lines)))
 
-##
-##  Here are examples of how the classifier is used on different data sets
-##  in the book.
+
+#
+#  Here are examples of how the classifier is used on different data sets
+#  in the book.
 #  test('athletesTrainingSet.txt', 'athletesTestSet.txt')
 #  test("irisTrainingSet.data", "irisTestSet.data")
 #  test("mpgTrainingSet.txt", "mpgTestSet.txt")
-    
diff --git a/ch5/pimaKNN.py b/ch5/pimaKNN.py
index fd30f42..f698b05 100644
--- a/ch5/pimaKNN.py
+++ b/ch5/pimaKNN.py
@@ -1,5 +1,5 @@
-#  
-# 
+#
+#
 #  Nearest Neighbor Classifier for Pima dataset
 #
 #
@@ -11,6 +11,7 @@
 import heapq
 import random
 
+
 class Classifier:
     def __init__(self, bucketPrefix, testBucketNumber, dataFormat, k):
 
@@ -21,11 +22,11 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat, k):
 
         "class	num	num	num	num	num	comment"
         """
-   
+
         self.medianAndDeviation = []
         self.k = k
         # reading the data in from the file
- 
+
         self.format = dataFormat.strip().split('\t')
         self.data = []
         # for each of the buckets numbered 1 through 10:
@@ -54,13 +55,10 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat, k):
         # now normalize the data
         for i in range(self.vlen):
             self.normalizeColumn(i)
-        
 
-        
-    
     ##################################################
     ###
-    ###  CODE TO COMPUTE THE MODIFIED STANDARD SCORE
+    #  CODE TO COMPUTE THE MODIFIED STANDARD SCORE
 
     def getMedian(self, alist):
         """return median of alist"""
@@ -70,13 +68,12 @@ def getMedian(self, alist):
         length = len(alist)
         if length % 2 == 1:
             # length of list is odd so return middle element
-            return blist[int(((length + 1) / 2) -  1)]
+            return blist[int(((length + 1) / 2) - 1)]
         else:
             # length of list is even so compute midpoint
             v1 = blist[int(length / 2)]
-            v2 =blist[(int(length / 2) - 1)]
+            v2 = blist[(int(length / 2) - 1)]
             return (v1 + v2) / 2.0
-        
 
     def getAbsoluteStandardDeviation(self, alist, median):
         """given alist and median return absolute standard deviation"""
@@ -85,18 +82,16 @@ def getAbsoluteStandardDeviation(self, alist, median):
             sum += abs(item - median)
         return sum / len(alist)
 
-
     def normalizeColumn(self, columnNumber):
-       """given a column number, normalize that column in self.data"""
-       # first extract values to list
-       col = [v[1][columnNumber] for v in self.data]
-       median = self.getMedian(col)
-       asd = self.getAbsoluteStandardDeviation(col, median)
-       #print("Median: %f   ASD = %f" % (median, asd))
-       self.medianAndDeviation.append((median, asd))
-       for v in self.data:
-           v[1][columnNumber] = (v[1][columnNumber] - median) / asd
-
+        """given a column number, normalize that column in self.data"""
+        # first extract values to list
+        col = [v[1][columnNumber] for v in self.data]
+        median = self.getMedian(col)
+        asd = self.getAbsoluteStandardDeviation(col, median)
+        # print("Median: %f   ASD = %f" % (median, asd))
+        self.medianAndDeviation.append((median, asd))
+        for v in self.data:
+            v[1][columnNumber] = (v[1][columnNumber] - median) / asd
 
     def normalizeVector(self, v):
         """We have stored the median and asd for each column.
@@ -107,13 +102,13 @@ def normalizeVector(self, v):
             vector[i] = (vector[i] - median) / asd
         return vector
     ###
-    ### END NORMALIZATION
+    # END NORMALIZATION
     ##################################################
 
     def testBucket(self, bucketPrefix, bucketNumber):
         """Evaluate the classifier with data from the file
         bucketPrefix-bucketNumber"""
-        
+
         filename = "%s-%02i" % (bucketPrefix, bucketNumber)
         f = open(filename)
         lines = f.readlines()
@@ -124,59 +119,59 @@ def testBucket(self, bucketPrefix, bucketNumber):
             vector = []
             classInColumn = -1
             for i in range(len(self.format)):
-                  if self.format[i] == 'num':
-                      vector.append(float(data[i]))
-                  elif self.format[i] == 'class':
-                      classInColumn = i
+                if self.format[i] == 'num':
+                    vector.append(float(data[i]))
+                elif self.format[i] == 'class':
+                    classInColumn = i
             theRealClass = data[classInColumn]
-            #print("REAL ", theRealClass)
+            # print("REAL ", theRealClass)
             classifiedAs = self.classify(vector)
             totals.setdefault(theRealClass, {})
             totals[theRealClass].setdefault(classifiedAs, 0)
             totals[theRealClass][classifiedAs] += 1
         return totals
 
-
-
     def manhattan(self, vector1, vector2):
         """Computes the Manhattan distance."""
         return sum(map(lambda v1, v2: abs(v1 - v2), vector1, vector2))
 
-
     def nearestNeighbor(self, itemVector):
         """return nearest neighbor to itemVector"""
-        return min([ (self.manhattan(itemVector, item[1]), item)
-                     for item in self.data])
-    
+        return min([(self.manhattan(itemVector, item[1]), item)
+                    for item in self.data])
+
     def knn(self, itemVector):
         """returns the predicted class of itemVector using k
         Nearest Neighbors"""
         # changed from min to heapq.nsmallest to get the
         # k closest neighbors
-        neighbors = heapq.nsmallest(self.k,
-                                   [(self.manhattan(itemVector, item[1]), item)
-                     for item in self.data])
+        neighbors = heapq.nsmallest(
+            self.k,
+            [(self.manhattan(itemVector, item[1]), item)
+             for item in self.data]
+        )
         # each neighbor gets a vote
         results = {}
-        for neighbor in neighbors: 
+        for neighbor in neighbors:
             theClass = neighbor[1][0]
             results.setdefault(theClass, 0)
             results[theClass] += 1
-        resultList = sorted([(i[1], i[0]) for i in results.items()], reverse=True)
-        #get all the classes that have the maximum votes
+        resultList = sorted(
+            [(i[1], i[0]) for i in results.items()], reverse=True
+        )
+        # get all the classes that have the maximum votes
         maxVotes = resultList[0][0]
         possibleAnswers = [i[1] for i in resultList if i[0] == maxVotes]
         # randomly select one of the classes that received the max votes
         answer = random.choice(possibleAnswers)
-        return( answer)
-    
+        return (answer)
+
     def classify(self, itemVector):
         """Return class we think item Vector is in"""
         # k represents how many nearest neighbors to use
-        return(self.knn(self.normalizeVector(itemVector)))                             
- 
+        return(self.knn(self.normalizeVector(itemVector)))
+
 
-       
 def tenfold(bucketPrefix, dataFormat, k):
     results = {}
     for i in range(1, 11):
@@ -187,12 +182,12 @@ def tenfold(bucketPrefix, dataFormat, k):
             for (ckey, cvalue) in value.items():
                 results[key].setdefault(ckey, 0)
                 results[key][ckey] += cvalue
-                
+
     # now print results
     categories = list(results.keys())
     categories.sort()
-    print(   "\n       Classified as: ")
-    header =    "        "
+    print("\n       Classified as: ")
+    header = "        "
     subheader = "      +"
     for category in categories:
         header += "% 2s   " % category
@@ -202,7 +197,7 @@ def tenfold(bucketPrefix, dataFormat, k):
     total = 0.0
     correct = 0.0
     for category in categories:
-        row = " %s    |" % category 
+        row = " %s    |" % category
         for c2 in categories:
             if c2 in results[category]:
                 count = results[category][c2]
@@ -214,7 +209,7 @@ def tenfold(bucketPrefix, dataFormat, k):
                 correct += count
         print(row)
     print(subheader)
-    print("\n%5.3f percent correct" %((correct * 100) / total))
+    print("\n%5.3f percent correct" % ((correct * 100) / total))
     print("total of %i instances" % total)
 
 print("SMALL DATA SET")
diff --git a/ch6/naiveBayes.py b/ch6/naiveBayes.py
index e6d44b3..7708b3e 100644
--- a/ch6/naiveBayes.py
+++ b/ch6/naiveBayes.py
@@ -1,5 +1,5 @@
-  
-# 
+
+#
 #  Naive Bayes Classifier chapter 6
 #
 
@@ -15,14 +15,13 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         for the iHealth data the format is:
         "attr	attr	attr	attr	class"
         """
-   
+
         total = 0
         classes = {}
         counts = {}
-        
-        
+
         # reading the data in from the file
-        
+
         self.format = dataFormat.strip().split('\t')
         self.prior = {}
         self.conditional = {}
@@ -42,7 +41,7 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                         if self.format[i] == 'num':
                             vector.append(float(fields[i]))
                         elif self.format[i] == 'attr':
-                            vector.append(fields[i])                           
+                            vector.append(fields[i])
                         elif self.format[i] == 'comment':
                             ignore.append(fields[i])
                         elif self.format[i] == 'class':
@@ -59,7 +58,7 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                         counts[category].setdefault(col, {})
                         counts[category][col].setdefault(columnValue, 0)
                         counts[category][col][columnValue] += 1
-        
+
         #
         # ok done counting. now compute probabilities
         #
@@ -71,20 +70,18 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         # now compute conditional probabilities p(D|h)
         #
         for (category, columns) in counts.items():
-              self.conditional.setdefault(category, {})
-              for (col, valueCounts) in columns.items():
-                  self.conditional[category].setdefault(col, {})
-                  for (attrValue, count) in valueCounts.items():
-                      self.conditional[category][col][attrValue] = (
-                          count / classes[category])
-        self.tmp =  counts               
-        
-
-           
+            self.conditional.setdefault(category, {})
+            for (col, valueCounts) in columns.items():
+                self.conditional[category].setdefault(col, {})
+                for (attrValue, count) in valueCounts.items():
+                    self.conditional[category][col][attrValue] = (
+                        count / classes[category])
+        self.tmp = counts
+
     def testBucket(self, bucketPrefix, bucketNumber):
         """Evaluate the classifier with data from the file
         bucketPrefix-bucketNumber"""
-        
+
         filename = "%s-%02i" % (bucketPrefix, bucketNumber)
         f = open(filename)
         lines = f.readlines()
@@ -97,12 +94,12 @@ def testBucket(self, bucketPrefix, bucketNumber):
             vector = []
             classInColumn = -1
             for i in range(len(self.format)):
-                  if self.format[i] == 'num':
-                      vector.append(float(data[i]))
-                  elif self.format[i] == 'attr':
-                      vector.append(data[i])
-                  elif self.format[i] == 'class':
-                      classInColumn = i
+                if self.format[i] == 'num':
+                    vector.append(float(data[i]))
+                elif self.format[i] == 'attr':
+                    vector.append(data[i])
+                elif self.format[i] == 'class':
+                    classInColumn = i
             theRealClass = data[classInColumn]
             classifiedAs = self.classify(vector)
             totals.setdefault(theRealClass, {})
@@ -110,8 +107,6 @@ def testBucket(self, bucketPrefix, bucketNumber):
             totals[theRealClass][classifiedAs] += 1
         return totals
 
-
-    
     def classify(self, itemVector):
         """Return class we think item Vector is in"""
         results = []
@@ -119,7 +114,7 @@ def classify(self, itemVector):
             prob = prior
             col = 1
             for attrValue in itemVector:
-                if not attrValue in self.conditional[category][col]:
+                if attrValue not in self.conditional[category][col]:
                     # we did not find any instances of this attribute value
                     # occurring with this category so prob = 0
                     prob = 0
@@ -129,7 +124,7 @@ def classify(self, itemVector):
             results.append((prob, category))
         # return the category with the highest probability
         return(max(results)[1])
- 
+
 
 def tenfold(bucketPrefix, dataFormat):
     results = {}
@@ -141,22 +136,22 @@ def tenfold(bucketPrefix, dataFormat):
             for (ckey, cvalue) in value.items():
                 results[key].setdefault(ckey, 0)
                 results[key][ckey] += cvalue
-                
+
     # now print results
     categories = list(results.keys())
     categories.sort()
-    print(   "\n            Classified as: ")
-    header =    "             "
+    print("\n            Classified as: ")
+    header = "             "
     subheader = "               +"
     for category in categories:
         header += "% 10s   " % category
         subheader += "-------+"
-    print (header)
-    print (subheader)
+    print(header)
+    print(subheader)
     total = 0.0
     correct = 0.0
     for category in categories:
-        row = " %10s    |" % category 
+        row = " %10s    |" % category
         for c2 in categories:
             if c2 in results[category]:
                 count = results[category][c2]
@@ -168,18 +163,29 @@ def tenfold(bucketPrefix, dataFormat):
                 correct += count
         print(row)
     print(subheader)
-    print("\n%5.3f percent correct" %((correct * 100) / total))
+    print("\n%5.3f percent correct" % ((correct * 100) / total))
     print("total of %i instances" % total)
 
-tenfold("house-votes/hv", "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr")
-#c = Classifier("house-votes/hv", 0,
-#                       "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr")
-
-#c = Classifier("iHealth/i", 10,
-#                       "attr\tattr\tattr\tattr\tclass")
-#print(c.classify(['health', 'moderate', 'moderate', 'yes']))
+tenfold(
+    "house-votes/hv",
+    "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\t"
+    "attr\tattr\tattr\tattr\tattr\tattr\tattr"
+)
+# c = Classifier(
+#     "house-votes/hv",
+#     0,
+#     "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\t"
+#     "attr\tattr\tattr\tattr\tattr\tattr\tattr"
+# )
 
-#c = Classifier("house-votes-filtered/hv", 5, "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr")
-#t = c.testBucket("house-votes-filtered/hv", 5)
-#print(t)
+# c = Classifier("iHealth/i", 10, "attr\tattr\tattr\tattr\tclass")
+# print(c.classify(['health', 'moderate', 'moderate', 'yes']))
 
+# c = Classifier(
+#     "house-votes-filtered/hv",
+#     5,
+#     "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\t"
+#     "attr\tattr\tattr\tattr\tattr\tattr\tattr"
+# )
+# t = c.testBucket("house-votes-filtered/hv", 5)
+# print(t)
diff --git a/ch6/naiveBayesDensityFunction copy.py b/ch6/naiveBayesDensityFunction copy.py
index afb9b2c..f670f32 100644
--- a/ch6/naiveBayesDensityFunction copy.py	
+++ b/ch6/naiveBayesDensityFunction copy.py	
@@ -1,5 +1,5 @@
-  
-# 
+
+#
 #  Naive Bayes Classifier chapter 6
 #
 
@@ -8,6 +8,7 @@
 
 import math
 
+
 class Classifier:
     def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
 
@@ -17,25 +18,24 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         for the iHealth data the format is:
         "attr	attr	attr	attr	class"
         """
-   
+
         total = 0
         classes = {}
         # counts used for attributes that are not numeric
         counts = {}
         # totals used for attributes that are numereric
-        # we will use these to compute the mean and sample standard deviation for
-        # each attribute - class pair.
+        # we will use these to compute the mean and sample standard deviation
+        # for each attribute - class pair.
         totals = {}
         numericValues = {}
-        
-        
+
         # reading the data in from the file
-        
+
         self.format = dataFormat.strip().split('\t')
-        # 
+        #
         self.prior = {}
         self.conditional = {}
- 
+
         # for each of the buckets numbered 1 through 10:
         for i in range(1, 11):
             # if it is not the bucket we should ignore, read in the data
@@ -53,7 +53,7 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                         if self.format[i] == 'num':
                             nums.append(float(fields[i]))
                         elif self.format[i] == 'attr':
-                            vector.append(fields[i])                           
+                            vector.append(fields[i])
                         elif self.format[i] == 'comment':
                             ignore.append(fields[i])
                         elif self.format[i] == 'class':
@@ -77,12 +77,11 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                     for columnValue in nums:
                         col += 1
                         totals[category].setdefault(col, 0)
-                        #totals[category][col].setdefault(columnValue, 0)
+                        # totals[category][col].setdefault(columnValue, 0)
                         totals[category][col] += columnValue
                         numericValues[category].setdefault(col, [])
                         numericValues[category][col].append(columnValue)
-                    
-        
+
         #
         # ok done counting. now compute probabilities
         #
@@ -94,13 +93,13 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         # now compute conditional probabilities p(h|D)
         #
         for (category, columns) in counts.items():
-              self.conditional.setdefault(category, {})
-              for (col, valueCounts) in columns.items():
-                  self.conditional[category].setdefault(col, {})
-                  for (attrValue, count) in valueCounts.items():
-                      self.conditional[category][col][attrValue] = (
-                          count / classes[category])
-        self.tmp =  counts               
+            self.conditional.setdefault(category, {})
+            for (col, valueCounts) in columns.items():
+                self.conditional[category].setdefault(col, {})
+                for (attrValue, count) in valueCounts.items():
+                    self.conditional[category][col][attrValue] = (
+                        count / classes[category])
+        self.tmp = counts
         #
         # now compute mean and sample standard deviation
         #
@@ -113,7 +112,7 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         # standard deviation
         self.ssd = {}
         for (category, columns) in numericValues.items():
-            
+
             self.ssd.setdefault(category, {})
             for (col, values) in columns.items():
                 SumOfSquareDifferences = 0
@@ -121,14 +120,13 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                 for value in values:
                     SumOfSquareDifferences += (value - theMean)**2
                 columns[col] = 0
-                self.ssd[category][col] = math.sqrt(SumOfSquareDifferences / (classes[category]  - 1))      
-        
+                self.ssd[category][col] = math.sqrt(
+                    SumOfSquareDifferences / (classes[category] - 1))
 
-           
     def testBucket(self, bucketPrefix, bucketNumber):
         """Evaluate the classifier with data from the file
         bucketPrefix-bucketNumber"""
-        
+
         filename = "%s-%02i" % (bucketPrefix, bucketNumber)
         f = open(filename)
         lines = f.readlines()
@@ -142,12 +140,12 @@ def testBucket(self, bucketPrefix, bucketNumber):
             numV = []
             classInColumn = -1
             for i in range(len(self.format)):
-                  if self.format[i] == 'num':
-                      numV.append(float(data[i]))
-                  elif self.format[i] == 'attr':
-                      vector.append(data[i])
-                  elif self.format[i] == 'class':
-                      classInColumn = i
+                if self.format[i] == 'num':
+                    numV.append(float(data[i]))
+                elif self.format[i] == 'attr':
+                    vector.append(data[i])
+                elif self.format[i] == 'class':
+                    classInColumn = i
             theRealClass = data[classInColumn]
             classifiedAs = self.classify(vector, numV)
             totals.setdefault(theRealClass, {})
@@ -155,8 +153,6 @@ def testBucket(self, bucketPrefix, bucketNumber):
             totals[theRealClass][classifiedAs] += 1
         return totals
 
-
-    
     def classify(self, itemVector, numVector):
         """Return class we think item Vector is in"""
         results = []
@@ -165,7 +161,7 @@ def classify(self, itemVector, numVector):
             prob = prior
             col = 1
             for attrValue in itemVector:
-                if not attrValue in self.conditional[category][col]:
+                if attrValue not in self.conditional[category][col]:
                     # we did not find any instances of this attribute value
                     # occurring with this category so prob = 0
                     prob = 0
@@ -173,7 +169,7 @@ def classify(self, itemVector, numVector):
                     prob = prob * self.conditional[category][col][attrValue]
                 col += 1
             col = 1
-            for x in  numVector:
+            for x in numVector:
                 mean = self.means[category][col]
                 ssd = self.ssd[category][col]
                 ePart = math.pow(math.e, -(x - mean)**2/(2*ssd**2))
@@ -181,9 +177,9 @@ def classify(self, itemVector, numVector):
                 col += 1
             results.append((prob, category))
         # return the category with the highest probability
-        #print(results)
+        # print(results)
         return(max(results)[1])
- 
+
 
 def tenfold(bucketPrefix, dataFormat):
     results = {}
@@ -195,22 +191,22 @@ def tenfold(bucketPrefix, dataFormat):
             for (ckey, cvalue) in value.items():
                 results[key].setdefault(ckey, 0)
                 results[key][ckey] += cvalue
-                
+
     # now print results
     categories = list(results.keys())
     categories.sort()
-    print(   "\n            Classified as: ")
-    header =    "             "
+    print("\n            Classified as: ")
+    header = "             "
     subheader = "               +"
     for category in categories:
         header += "% 10s   " % category
         subheader += "-------+"
-    print (header)
-    print (subheader)
+    print(header)
+    print(subheader)
     total = 0.0
     correct = 0.0
     for category in categories:
-        row = " %10s    |" % category 
+        row = " %10s    |" % category
         for c2 in categories:
             if c2 in results[category]:
                 count = results[category][c2]
@@ -222,31 +218,42 @@ def tenfold(bucketPrefix, dataFormat):
                 correct += count
         print(row)
     print(subheader)
-    print("\n%5.3f percent correct" %((correct * 100) / total))
+    print("\n%5.3f percent correct" % ((correct * 100) / total))
     print("total of %i instances" % total)
 
 
 def pdf(mean, ssd, x):
-   """Probability Density Function  computing P(x|y)
-   input is the mean, sample standard deviation for all the items in y,
-   and x."""
-   ePart = math.pow(math.e, -(x-mean)**2/(2*ssd**2))
-   print (ePart)
-   return (1.0 / (math.sqrt(2*math.pi)*ssd)) * ePart
-
-#tenfold("house-votes/hv", "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr")
-#c = Classifier("house-votes/hv", 0,
-#                       "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr")
-#tenfold("pimaSmall/pimaSmall",  "num	num	num	num	num	num	num	num	class")
-#tenfold("pima/pima",  "num	num	num	num	num	num	num	num	class")
-tenfold("mpgData/mpgData",        "class	attr	num	num	num	num	comment")
+    """Probability Density Function  computing P(x|y)
+    input is the mean, sample standard deviation for all the items in y,
+    and x."""
+    ePart = math.pow(math.e, -(x-mean)**2/(2*ssd**2))
+    print (ePart)
+    return (1.0 / (math.sqrt(2*math.pi)*ssd)) * ePart
 
-#c = Classifier("iHealth/i", 10,
-#                       "attr\tattr\tattr\tattr\tclass")
-#print(c.classify([], [3, 78, 50, 32, 88, 31.0, 0.248, 26]))
-#c = Classifier("mpgData/mpgData", 5,       "class	num	num	num	num	num	comment")
+# tenfold(
+#     "house-votes/hv",
+#     "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\t"
+#     "attr\tattr\tattr\tattr\tattr\tattr\tattr"
+# )
+# c = Classifier(
+#     "house-votes/hv",
+#     0,
+#     "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\t"
+#     "attr\tattr\tattr\tattr\tattr\tattr\tattr"
+# )
+# tenfold("pimaSmall/pimaSmall",  "num	num	num	num	num	num	num	num	class")
+# tenfold("pima/pima",  "num	num	num	num	num	num	num	num	class")
+tenfold("mpgData/mpgData",        "class	attr	num	num	num	num	comment")
 
-#c = Classifier("house-votes-filtered/hv", 5, "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr")
-#t = c.testBucket("house-votes-filtered/hv", 5)
-#print(t)
+# c = Classifier("iHealth/i", 10, "attr\tattr\tattr\tattr\tclass")
+# print(c.classify([], [3, 78, 50, 32, 88, 31.0, 0.248, 26]))
+# c = Classifier("mpgData/mpgData", 5, "class	num	num	num	num	num	comment")
 
+# c = Classifier(
+#     "house-votes-filtered/hv",
+#     5,
+#     "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\t"
+#     "attr\tattr\tattr\tattr\tattr\tattr\tattr"
+# )
+# t = c.testBucket("house-votes-filtered/hv", 5)
+# print(t)
diff --git a/ch6/naiveBayesDensityFunction.py b/ch6/naiveBayesDensityFunction.py
index a28d08f..26b623f 100644
--- a/ch6/naiveBayesDensityFunction.py
+++ b/ch6/naiveBayesDensityFunction.py
@@ -1,5 +1,5 @@
-  
-# 
+
+#
 #  Naive Bayes Classifier chapter 6
 #
 
@@ -8,6 +8,7 @@
 
 import math
 
+
 class Classifier:
     def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
 
@@ -17,25 +18,24 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         for the iHealth data the format is:
         "attr	attr	attr	attr	class"
         """
-   
+
         total = 0
         classes = {}
         # counts used for attributes that are not numeric
         counts = {}
         # totals used for attributes that are numereric
-        # we will use these to compute the mean and sample standard deviation for
-        # each attribute - class pair.
+        # we will use these to compute the mean and sample standard deviation
+        # for each attribute - class pair.
         totals = {}
         numericValues = {}
-        
-        
+
         # reading the data in from the file
-        
+
         self.format = dataFormat.strip().split('\t')
-        # 
+        #
         self.prior = {}
         self.conditional = {}
- 
+
         # for each of the buckets numbered 1 through 10:
         for i in range(1, 11):
             # if it is not the bucket we should ignore, read in the data
@@ -53,7 +53,7 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                         if self.format[i] == 'num':
                             nums.append(float(fields[i]))
                         elif self.format[i] == 'attr':
-                            vector.append(fields[i])                           
+                            vector.append(fields[i])
                         elif self.format[i] == 'comment':
                             ignore.append(fields[i])
                         elif self.format[i] == 'class':
@@ -77,12 +77,11 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                     for columnValue in nums:
                         col += 1
                         totals[category].setdefault(col, 0)
-                        #totals[category][col].setdefault(columnValue, 0)
+                        # totals[category][col].setdefault(columnValue, 0)
                         totals[category][col] += columnValue
                         numericValues[category].setdefault(col, [])
                         numericValues[category][col].append(columnValue)
-                    
-        
+
         #
         # ok done counting. now compute probabilities
         #
@@ -94,13 +93,13 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         # now compute conditional probabilities p(h|D)
         #
         for (category, columns) in counts.items():
-              self.conditional.setdefault(category, {})
-              for (col, valueCounts) in columns.items():
-                  self.conditional[category].setdefault(col, {})
-                  for (attrValue, count) in valueCounts.items():
-                      self.conditional[category][col][attrValue] = (
-                          count / classes[category])
-        self.tmp =  counts               
+            self.conditional.setdefault(category, {})
+            for (col, valueCounts) in columns.items():
+                self.conditional[category].setdefault(col, {})
+                for (attrValue, count) in valueCounts.items():
+                    self.conditional[category][col][attrValue] = (
+                        count / classes[category])
+        self.tmp = counts
         #
         # now compute mean and sample standard deviation
         #
@@ -113,7 +112,7 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         # standard deviation
         self.ssd = {}
         for (category, columns) in numericValues.items():
-            
+
             self.ssd.setdefault(category, {})
             for (col, values) in columns.items():
                 SumOfSquareDifferences = 0
@@ -121,14 +120,13 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                 for value in values:
                     SumOfSquareDifferences += (value - theMean)**2
                 columns[col] = 0
-                self.ssd[category][col] = math.sqrt(SumOfSquareDifferences / (classes[category]  - 1))      
-        
+                self.ssd[category][col] = math.sqrt(
+                    SumOfSquareDifferences / (classes[category] - 1))
 
-           
     def testBucket(self, bucketPrefix, bucketNumber):
         """Evaluate the classifier with data from the file
         bucketPrefix-bucketNumber"""
-        
+
         filename = "%s-%02i" % (bucketPrefix, bucketNumber)
         f = open(filename)
         lines = f.readlines()
@@ -142,12 +140,12 @@ def testBucket(self, bucketPrefix, bucketNumber):
             numV = []
             classInColumn = -1
             for i in range(len(self.format)):
-                  if self.format[i] == 'num':
-                      numV.append(float(data[i]))
-                  elif self.format[i] == 'attr':
-                      vector.append(data[i])
-                  elif self.format[i] == 'class':
-                      classInColumn = i
+                if self.format[i] == 'num':
+                    numV.append(float(data[i]))
+                elif self.format[i] == 'attr':
+                    vector.append(data[i])
+                elif self.format[i] == 'class':
+                    classInColumn = i
             theRealClass = data[classInColumn]
             classifiedAs = self.classify(vector, numV)
             totals.setdefault(theRealClass, {})
@@ -155,8 +153,6 @@ def testBucket(self, bucketPrefix, bucketNumber):
             totals[theRealClass][classifiedAs] += 1
         return totals
 
-
-    
     def classify(self, itemVector, numVector):
         """Return class we think item Vector is in"""
         results = []
@@ -165,7 +161,7 @@ def classify(self, itemVector, numVector):
             prob = prior
             col = 1
             for attrValue in itemVector:
-                if not attrValue in self.conditional[category][col]:
+                if attrValue not in self.conditional[category][col]:
                     # we did not find any instances of this attribute value
                     # occurring with this category so prob = 0
                     prob = 0
@@ -173,7 +169,7 @@ def classify(self, itemVector, numVector):
                     prob = prob * self.conditional[category][col][attrValue]
                 col += 1
             col = 1
-            for x in  numVector:
+            for x in numVector:
                 mean = self.means[category][col]
                 ssd = self.ssd[category][col]
                 ePart = math.pow(math.e, -(x - mean)**2/(2*ssd**2))
@@ -181,9 +177,9 @@ def classify(self, itemVector, numVector):
                 col += 1
             results.append((prob, category))
         # return the category with the highest probability
-        #print(results)
+        # print(results)
         return(max(results)[1])
- 
+
 
 def tenfold(bucketPrefix, dataFormat):
     results = {}
@@ -195,22 +191,22 @@ def tenfold(bucketPrefix, dataFormat):
             for (ckey, cvalue) in value.items():
                 results[key].setdefault(ckey, 0)
                 results[key][ckey] += cvalue
-                
+
     # now print results
     categories = list(results.keys())
     categories.sort()
-    print(   "\n            Classified as: ")
-    header =    "             "
+    print("\n            Classified as: ")
+    header = "             "
     subheader = "               +"
     for category in categories:
         header += "% 10s   " % category
         subheader += "-------+"
-    print (header)
-    print (subheader)
+    print(header)
+    print(subheader)
     total = 0.0
     correct = 0.0
     for category in categories:
-        row = " %10s    |" % category 
+        row = " %10s    |" % category
         for c2 in categories:
             if c2 in results[category]:
                 count = results[category][c2]
@@ -222,29 +218,41 @@ def tenfold(bucketPrefix, dataFormat):
                 correct += count
         print(row)
     print(subheader)
-    print("\n%5.3f percent correct" %((correct * 100) / total))
+    print("\n%5.3f percent correct" % ((correct * 100) / total))
     print("total of %i instances" % total)
 
 
 def pdf(mean, ssd, x):
-   """Probability Density Function  computing P(x|y)
-   input is the mean, sample standard deviation for all the items in y,
-   and x."""
-   ePart = math.pow(math.e, -(x-mean)**2/(2*ssd**2))
-   print (ePart)
-   return (1.0 / (math.sqrt(2*math.pi)*ssd)) * ePart
-
-#tenfold("house-votes/hv", "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr")
-#c = Classifier("house-votes/hv", 0,
-#                       "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr")
+    """Probability Density Function  computing P(x|y)
+    input is the mean, sample standard deviation for all the items in y,
+    and x."""
+    ePart = math.pow(math.e, -(x-mean)**2/(2*ssd**2))
+    print (ePart)
+    return (1.0 / (math.sqrt(2*math.pi)*ssd)) * ePart
+
+# tenfold(
+#     "house-votes/hv",
+#     "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\t"
+#     "attr\tattr\tattr\tattr\tattr\tattr\tattr"
+# )
+# c = Classifier(
+#     "house-votes/hv",
+#     0,
+#     "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\t"
+#     "attr\tattr\tattr\tattr\tattr\tattr\tattr"
+# )
 tenfold("pimaSmall/pimaSmall",  "num	num	num	num	num	num	num	num	class")
 tenfold("pima/pima",  "num	num	num	num	num	num	num	num	class")
 
-#c = Classifier("iHealth/i", 10,
+# c = Classifier("iHealth/i", 10,
 #                       "attr\tattr\tattr\tattr\tclass")
-#print(c.classify([], [3, 78, 50, 32, 88, 31.0, 0.248, 26]))
-
-#c = Classifier("house-votes-filtered/hv", 5, "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr")
-#t = c.testBucket("house-votes-filtered/hv", 5)
-#print(t)
+# print(c.classify([], [3, 78, 50, 32, 88, 31.0, 0.248, 26]))
 
+# c = Classifier(
+#     "house-votes-filtered/hv",
+#     5,
+#     "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\t"
+#     "attr\tattr\tattr\tattr\tattr\tattr\tattr"
+# )
+# t = c.testBucket("house-votes-filtered/hv", 5)
+# print(t)
diff --git a/ch6/naiveBayesDensityFunctionTraining.py b/ch6/naiveBayesDensityFunctionTraining.py
index 3c16f06..0ee841b 100644
--- a/ch6/naiveBayesDensityFunctionTraining.py
+++ b/ch6/naiveBayesDensityFunctionTraining.py
@@ -1,5 +1,5 @@
-  
-# 
+
+#
 #  Naive Bayes Classifier chapter 6
 #
 
@@ -8,6 +8,7 @@
 
 import math
 
+
 class Classifier:
     def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
 
@@ -17,25 +18,24 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         for the iHealth data the format is:
         "attr	attr	attr	attr	class"
         """
-   
+
         total = 0
         classes = {}
         # counts used for attributes that are not numeric
         counts = {}
         # totals used for attributes that are numereric
-        # we will use these to compute the mean and sample standard deviation for
-        # each attribute - class pair.
+        # we will use these to compute the mean and sample standard deviation
+        # for each attribute - class pair.
         totals = {}
         numericValues = {}
-        
-        
+
         # reading the data in from the file
-        
+
         self.format = dataFormat.strip().split('\t')
-        # 
+        #
         self.prior = {}
         self.conditional = {}
- 
+
         # for each of the buckets numbered 1 through 10:
         for i in range(1, 11):
             # if it is not the bucket we should ignore, read in the data
@@ -53,7 +53,7 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                         if self.format[i] == 'num':
                             nums.append(float(fields[i]))
                         elif self.format[i] == 'attr':
-                            vector.append(fields[i])                           
+                            vector.append(fields[i])
                         elif self.format[i] == 'comment':
                             ignore.append(fields[i])
                         elif self.format[i] == 'class':
@@ -77,12 +77,11 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                     for columnValue in nums:
                         col += 1
                         totals[category].setdefault(col, 0)
-                        #totals[category][col].setdefault(columnValue, 0)
+                        # totals[category][col].setdefault(columnValue, 0)
                         totals[category][col] += columnValue
                         numericValues[category].setdefault(col, [])
                         numericValues[category][col].append(columnValue)
-                    
-        
+
         #
         # ok done counting. now compute probabilities
         #
@@ -94,24 +93,25 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         # now compute conditional probabilities p(h|D)
         #
         for (category, columns) in counts.items():
-              self.conditional.setdefault(category, {})
-              for (col, valueCounts) in columns.items():
-                  self.conditional[category].setdefault(col, {})
-                  for (attrValue, count) in valueCounts.items():
-                      self.conditional[category][col][attrValue] = (
-                          count / classes[category])
-        self.tmp =  counts               
+            self.conditional.setdefault(category, {})
+            for (col, valueCounts) in columns.items():
+                self.conditional[category].setdefault(col, {})
+                for (attrValue, count) in valueCounts.items():
+                    self.conditional[category][col][attrValue] = (
+                        count / classes[category])
+        self.tmp = counts
         #
         # now compute mean and sample standard deviation
         #
         self.means = {}
         self.ssd = {}
         # ADD YOUR CODE HERE
-        
 
- # test the code
 
-c = Classifier("pimaSmall/pimaSmall",  1, "num	num	num	num	num	num	num	num	class")
+# test the code
+
+c = Classifier(
+    "pimaSmall/pimaSmall",  1, "num	num	num	num	num	num	num	num	class")
 
 # test means computation
 assert('1' in c.means)
diff --git a/ch6/naiveBayesDensityFunctionTrainingSolution.py b/ch6/naiveBayesDensityFunctionTrainingSolution.py
index d62fe1f..eb525a4 100644
--- a/ch6/naiveBayesDensityFunctionTrainingSolution.py
+++ b/ch6/naiveBayesDensityFunctionTrainingSolution.py
@@ -1,5 +1,5 @@
-  
-# 
+
+#
 #  Naive Bayes Classifier chapter 6
 #
 
@@ -8,6 +8,7 @@
 
 import math
 
+
 class Classifier:
     def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
 
@@ -17,25 +18,24 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         for the iHealth data the format is:
         "attr	attr	attr	attr	class"
         """
-   
+
         total = 0
         classes = {}
         # counts used for attributes that are not numeric
         counts = {}
         # totals used for attributes that are numereric
-        # we will use these to compute the mean and sample standard deviation for
-        # each attribute - class pair.
+        # we will use these to compute the mean and sample standard deviation
+        # for each attribute - class pair.
         totals = {}
         numericValues = {}
-        
-        
+
         # reading the data in from the file
-        
+
         self.format = dataFormat.strip().split('\t')
-        # 
+        #
         self.prior = {}
         self.conditional = {}
- 
+
         # for each of the buckets numbered 1 through 10:
         for i in range(1, 11):
             # if it is not the bucket we should ignore, read in the data
@@ -53,7 +53,7 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                         if self.format[i] == 'num':
                             nums.append(float(fields[i]))
                         elif self.format[i] == 'attr':
-                            vector.append(fields[i])                           
+                            vector.append(fields[i])
                         elif self.format[i] == 'comment':
                             ignore.append(fields[i])
                         elif self.format[i] == 'class':
@@ -77,12 +77,11 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                     for columnValue in nums:
                         col += 1
                         totals[category].setdefault(col, 0)
-                        #totals[category][col].setdefault(columnValue, 0)
+                        # totals[category][col].setdefault(columnValue, 0)
                         totals[category][col] += columnValue
                         numericValues[category].setdefault(col, [])
                         numericValues[category][col].append(columnValue)
-                    
-        
+
         #
         # ok done counting. now compute probabilities
         #
@@ -94,13 +93,13 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         # now compute conditional probabilities p(h|D)
         #
         for (category, columns) in counts.items():
-              self.conditional.setdefault(category, {})
-              for (col, valueCounts) in columns.items():
-                  self.conditional[category].setdefault(col, {})
-                  for (attrValue, count) in valueCounts.items():
-                      self.conditional[category][col][attrValue] = (
-                          count / classes[category])
-        self.tmp =  counts               
+            self.conditional.setdefault(category, {})
+            for (col, valueCounts) in columns.items():
+                self.conditional[category].setdefault(col, {})
+                for (attrValue, count) in valueCounts.items():
+                    self.conditional[category][col][attrValue] = (
+                        count / classes[category])
+        self.tmp = counts
         #
         # now compute mean and sample standard deviation
         #
@@ -112,9 +111,9 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
             for (col, cTotal) in columns.items():
                 self.means[category][col] = cTotal / classes[category]
         # standard deviation
-        
+
         for (category, columns) in numericValues.items():
-            
+
             self.ssd.setdefault(category, {})
             for (col, values) in columns.items():
                 SumOfSquareDifferences = 0
@@ -122,12 +121,15 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                 for value in values:
                     SumOfSquareDifferences += (value - theMean)**2
                 columns[col] = 0
-                self.ssd[category][col] = math.sqrt(SumOfSquareDifferences / (classes[category]  - 1))      
-        
+                self.ssd[category][col] = math.sqrt(
+                    SumOfSquareDifferences / (classes[category] - 1)
+                )
+
 
- # test the code
+# test the code
 
-c = Classifier("pimaSmall/pimaSmall",  1, "num	num	num	num	num	num	num	num	class")
+c = Classifier(
+    "pimaSmall/pimaSmall",  1, "num	num	num	num	num	num	num	num	class")
 
 # test means computation
 assert(c.means['1'][1] == 5.25)
diff --git a/ch7/bayesSentiment.py b/ch7/bayesSentiment.py
index a29ab13..7057768 100644
--- a/ch7/bayesSentiment.py
+++ b/ch7/bayesSentiment.py
@@ -1,5 +1,8 @@
 from __future__ import print_function
-import os, codecs, math
+import os
+import codecs
+import math
+
 
 class BayesText:
 
@@ -22,12 +25,12 @@ def __init__(self, trainingdir, stopwordlist, ignoreBucket):
             self.stopwords[line.strip()] = 1
         f.close()
         categories = os.listdir(trainingdir)
-        #filter out files that are not directories
+        # filter out files that are not directories
         self.categories = [filename for filename in categories
                            if os.path.isdir(trainingdir + filename)]
         print("Counting ...")
         for category in self.categories:
-            #print('    ' + category)
+            # print('    ' + category)
             (self.prob[category],
              self.totals[category]) = self.train(trainingdir, category,
                                                  ignoreBucket)
@@ -45,9 +48,9 @@ def __init__(self, trainingdir, stopwordlist, ignoreBucket):
             del self.vocabulary[word]
         # now compute probabilities
         vocabLength = len(self.vocabulary)
-        #print("Computing probabilities:")
+        # print("Computing probabilities:")
         for category in self.categories:
-            #print('    ' + category)
+            # print('    ' + category)
             denominator = self.totals[category] + vocabLength
             for word in self.vocabulary:
                 if word in self.prob[category]:
@@ -56,8 +59,7 @@ def __init__(self, trainingdir, stopwordlist, ignoreBucket):
                     count = 1
                 self.prob[category][word] = (float(count + 1)
                                              / denominator)
-        #print ("DONE TRAINING\n\n")
-                    
+        # print ("DONE TRAINING\n\n")
 
     def train(self, trainingdir, category, bucketNumberToIgnore):
         """counts word occurrences for a particular category"""
@@ -70,16 +72,17 @@ def train(self, trainingdir, category, bucketNumberToIgnore):
             if directory != ignore:
                 currentBucket = trainingdir + category + "/" + directory
                 files = os.listdir(currentBucket)
-                #print("   " + currentBucket)
+                # print("   " + currentBucket)
                 for file in files:
-                    f = codecs.open(currentBucket + '/' + file, 'r', 'iso8859-1')
+                    f = codecs.open(
+                        currentBucket + '/' + file, 'r', 'iso8859-1')
                     for line in f:
                         tokens = line.split()
                         for token in tokens:
                             # get rid of punctuation and lowercase token
                             token = token.strip('\'".,?:-')
                             token = token.lower()
-                            if token != '' and not token in self.stopwords:
+                            if token != '' and token not in self.stopwords:
                                 self.vocabulary.setdefault(token, 0)
                                 self.vocabulary[token] += 1
                                 counts.setdefault(token, 0)
@@ -87,8 +90,7 @@ def train(self, trainingdir, category, bucketNumberToIgnore):
                                 total += 1
                     f.close()
         return(counts, total)
-                    
-                    
+
     def classify(self, filename):
         results = {}
         for category in self.categories:
@@ -97,7 +99,7 @@ def classify(self, filename):
         for line in f:
             tokens = line.split()
             for token in tokens:
-                #print(token)
+                # print(token)
                 token = token.strip('\'".,?:-').lower()
                 if token in self.vocabulary:
                     for category in self.categories:
@@ -107,14 +109,14 @@ def classify(self, filename):
                             self.prob[category][token])
         f.close()
         results = list(results.items())
-        results.sort(key=lambda tuple: tuple[1], reverse = True)
+        results.sort(key=lambda tuple: tuple[1], reverse=True)
         # for debugging I can change this to give me the entire list
         return results[0][0]
 
     def testCategory(self, direc, category, bucketNumber):
         results = {}
         directory = direc + ("%i/" % bucketNumber)
-        #print("Testing " + directory)
+        # print("Testing " + directory)
         files = os.listdir(directory)
         total = 0
         correct = 0
@@ -123,8 +125,8 @@ def testCategory(self, direc, category, bucketNumber):
             result = self.classify(directory + file)
             results.setdefault(result, 0)
             results[result] += 1
-            #if result == category:
-            #               correct += 1
+            # if result == category:
+            #     correct += 1
         return results
 
     def test(self, testdir, bucketNumber):
@@ -133,20 +135,21 @@ def test(self, testdir, bucketNumber):
         category"""
         results = {}
         categories = os.listdir(testdir)
-        #filter out files that are not directories
+        # filter out files that are not directories
         categories = [filename for filename in categories if
                       os.path.isdir(testdir + filename)]
         correct = 0
         total = 0
         for category in categories:
-            #print(".", end="")
+            # print(".", end="")
             results[category] = self.testCategory(
                 testdir + category + '/', category, bucketNumber)
         return results
 
+
 def tenfold(dataPrefix, stoplist):
     results = {}
-    for i in range(0,10):
+    for i in range(0, 10):
         bT = BayesText(dataPrefix, stoplist, i)
         r = bT.test(theDir, i)
         for (key, value) in r.items():
@@ -156,18 +159,18 @@ def tenfold(dataPrefix, stoplist):
                 results[key][ckey] += cvalue
                 categories = list(results.keys())
     categories.sort()
-    print(   "\n       Classified as: ")
-    header =    "          "
+    print("\n       Classified as: ")
+    header = "          "
     subheader = "        +"
     for category in categories:
         header += "% 2s   " % category
         subheader += "-----+"
-    print (header)
-    print (subheader)
+    print(header)
+    print(subheader)
     total = 0.0
     correct = 0.0
     for category in categories:
-        row = " %s    |" % category 
+        row = " %s    |" % category
         for c2 in categories:
             if c2 in results[category]:
                 count = results[category][c2]
@@ -179,7 +182,7 @@ def tenfold(dataPrefix, stoplist):
                 correct += count
         print(row)
     print(subheader)
-    print("\n%5.3f percent correct" %((correct * 100) / total))
+    print("\n%5.3f percent correct" % ((correct * 100) / total))
     print("total of %i instances" % total)
 
 # change these to match your directory structure
diff --git a/ch7/bayesText-ClassifyTemplate.py b/ch7/bayesText-ClassifyTemplate.py
index 662a21a..f4c9a97 100644
--- a/ch7/bayesText-ClassifyTemplate.py
+++ b/ch7/bayesText-ClassifyTemplate.py
@@ -1,4 +1,7 @@
-import os, codecs, math
+import os
+import codecs
+import math
+
 
 class BayesText:
 
@@ -21,7 +24,7 @@ def __init__(self, trainingdir, stopwordlist):
             self.stopwords[line.strip()] = 1
         f.close()
         categories = os.listdir(trainingdir)
-        #filter out files that are not directories
+        # filter out files that are not directories
         self.categories = [filename for filename in categories
                            if os.path.isdir(trainingdir + filename)]
         print("Counting ...")
@@ -54,7 +57,6 @@ def __init__(self, trainingdir, stopwordlist):
                     count = 1
                 self.prob[category][word] = (count + 1) / denominator
         print ("DONE TRAINING\n\n")
-                    
 
     def train(self, trainingdir, category):
         """counts word occurrences for a particular category"""
@@ -63,7 +65,7 @@ def train(self, trainingdir, category):
         counts = {}
         total = 0
         for file in files:
-            #print(currentdir + '/' + file)
+            # print(currentdir + '/' + file)
             f = codecs.open(currentdir + '/' + file, 'r', 'iso8859-1')
             for line in f:
                 tokens = line.split()
@@ -71,7 +73,7 @@ def train(self, trainingdir, category):
                     # get rid of punctuation and lowercase token
                     token = token.strip('\'".,?:-')
                     token = token.lower()
-                    if token != '' and not token in self.stopwords:
+                    if token != '' and token not in self.stopwords:
                         self.vocabulary.setdefault(token, 0)
                         self.vocabulary[token] += 1
                         counts.setdefault(token, 0)
@@ -79,8 +81,7 @@ def train(self, trainingdir, category):
                         total += 1
             f.close()
         return(counts, total)
-                    
-                    
+
     def classify(self, filename):
         results = {}
         for category in self.categories:
@@ -89,7 +90,7 @@ def classify(self, filename):
         for line in f:
             tokens = line.split()
             for token in tokens:
-                #print(token)
+                # print(token)
                 token = token.strip('\'".,?:-').lower()
                 if token in self.vocabulary:
                     for category in self.categories:
@@ -99,22 +100,33 @@ def classify(self, filename):
                             self.prob[category][token])
         f.close()
         results = list(results.items())
-        results.sort(key=lambda tuple: tuple[1], reverse = True)
+        results.sort(key=lambda tuple: tuple[1], reverse=True)
         # for debugging I can change this to give me the entire list
         return results[0][0]
 
-              
+
 # change these to match your directory structure
-trainingDir = "/Users/raz/Dropbox/guide/data/20news-bydate/20news-bydate-train/"
+trainingDir = (
+    "/Users/raz/Dropbox/guide/data/20news-bydate/20news-bydate-train/"
+)
+
 # (just create an empty file to use as a stoplist file.)
 stoplistfile = "/Users/raz/Dropbox/guide/data/20news-bydate/emptyStoplist.txt"
 
 bT = BayesText(trainingDir, stoplistfile)
 print("Running Test ...")
-result = bT.classify("/Users/raz/Dropbox/guide/data/20news-bydate/20news-bydate-test/rec.motorcycles/104673")
+result = bT.classify(
+    "/Users/raz/Dropbox/guide/data/20news-bydate/20news-bydate-test/"
+    "rec.motorcycles/104673"
+)
 print(result)
-result = bT.classify("/Users/raz/Dropbox/guide/data/20news-bydate/20news-bydate-test/sci.med/59246")
+result = bT.classify(
+    "/Users/raz/Dropbox/guide/data/20news-bydate/20news-bydate-test/"
+    "sci.med/59246"
+)
 print(result)
-result = bT.classify("/Users/raz/Dropbox/guide/data/20news-bydate/20news-bydate-test/soc.religion.christian/21424")
+result = bT.classify(
+    "/Users/raz/Dropbox/guide/data/20news-bydate/20news-bydate-test/"
+    "soc.religion.christian/21424"
+)
 print(result)
-
diff --git a/ch7/bayesText.py b/ch7/bayesText.py
index 78b3a5a..40cd887 100644
--- a/ch7/bayesText.py
+++ b/ch7/bayesText.py
@@ -1,5 +1,8 @@
 from __future__ import print_function
-import os, codecs, math
+import os
+import codecs
+import math
+
 
 class BayesText:
 
@@ -22,7 +25,7 @@ def __init__(self, trainingdir, stopwordlist):
             self.stopwords[line.strip()] = 1
         f.close()
         categories = os.listdir(trainingdir)
-        #filter out files that are not directories
+        # filter out files that are not directories
         self.categories = [filename for filename in categories
                            if os.path.isdir(trainingdir + filename)]
         print("Counting ...")
@@ -56,7 +59,6 @@ def __init__(self, trainingdir, stopwordlist):
                 self.prob[category][word] = (float(count + 1)
                                              / denominator)
         print ("DONE TRAINING\n\n")
-                    
 
     def train(self, trainingdir, category):
         """counts word occurrences for a particular category"""
@@ -65,7 +67,7 @@ def train(self, trainingdir, category):
         counts = {}
         total = 0
         for file in files:
-            #print(currentdir + '/' + file)
+            # print(currentdir + '/' + file)
             f = codecs.open(currentdir + '/' + file, 'r', 'iso8859-1')
             for line in f:
                 tokens = line.split()
@@ -73,7 +75,7 @@ def train(self, trainingdir, category):
                     # get rid of punctuation and lowercase token
                     token = token.strip('\'".,?:-')
                     token = token.lower()
-                    if token != '' and not token in self.stopwords:
+                    if token != '' and token not in self.stopwords:
                         self.vocabulary.setdefault(token, 0)
                         self.vocabulary[token] += 1
                         counts.setdefault(token, 0)
@@ -81,8 +83,7 @@ def train(self, trainingdir, category):
                         total += 1
             f.close()
         return(counts, total)
-                    
-                    
+
     def classify(self, filename):
         results = {}
         for category in self.categories:
@@ -91,7 +92,7 @@ def classify(self, filename):
         for line in f:
             tokens = line.split()
             for token in tokens:
-                #print(token)
+                # print(token)
                 token = token.strip('\'".,?:-').lower()
                 if token in self.vocabulary:
                     for category in self.categories:
@@ -101,7 +102,7 @@ def classify(self, filename):
                             self.prob[category][token])
         f.close()
         results = list(results.items())
-        results.sort(key=lambda tuple: tuple[1], reverse = True)
+        results.sort(key=lambda tuple: tuple[1], reverse=True)
         # for debugging I can change this to give me the entire list
         return results[0][0]
 
@@ -121,7 +122,7 @@ def test(self, testdir):
         organized into subdirectories--each subdir is a classification
         category"""
         categories = os.listdir(testdir)
-        #filter out files that are not directories
+        # filter out files that are not directories
         categories = [filename for filename in categories if
                       os.path.isdir(testdir + filename)]
         correct = 0
@@ -134,7 +135,7 @@ def test(self, testdir):
             total += catTotal
         print("\n\nAccuracy is  %f%%  (%i test instances)" %
               ((float(correct) / total) * 100, total))
-            
+
 # change these to match your directory structure
 baseDirectory = "/Users/raz/Dropbox/guide/data/20news-bydate/"
 trainingDir = baseDirectory + "20news-bydate-train/"
diff --git a/ch7/naiveBayes.py b/ch7/naiveBayes.py
index e6d44b3..c625fcd 100644
--- a/ch7/naiveBayes.py
+++ b/ch7/naiveBayes.py
@@ -1,5 +1,5 @@
-  
-# 
+
+#
 #  Naive Bayes Classifier chapter 6
 #
 
@@ -15,14 +15,13 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         for the iHealth data the format is:
         "attr	attr	attr	attr	class"
         """
-   
+
         total = 0
         classes = {}
         counts = {}
-        
-        
+
         # reading the data in from the file
-        
+
         self.format = dataFormat.strip().split('\t')
         self.prior = {}
         self.conditional = {}
@@ -42,7 +41,7 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                         if self.format[i] == 'num':
                             vector.append(float(fields[i]))
                         elif self.format[i] == 'attr':
-                            vector.append(fields[i])                           
+                            vector.append(fields[i])
                         elif self.format[i] == 'comment':
                             ignore.append(fields[i])
                         elif self.format[i] == 'class':
@@ -59,7 +58,7 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                         counts[category].setdefault(col, {})
                         counts[category][col].setdefault(columnValue, 0)
                         counts[category][col][columnValue] += 1
-        
+
         #
         # ok done counting. now compute probabilities
         #
@@ -71,20 +70,18 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         # now compute conditional probabilities p(D|h)
         #
         for (category, columns) in counts.items():
-              self.conditional.setdefault(category, {})
-              for (col, valueCounts) in columns.items():
-                  self.conditional[category].setdefault(col, {})
-                  for (attrValue, count) in valueCounts.items():
-                      self.conditional[category][col][attrValue] = (
-                          count / classes[category])
-        self.tmp =  counts               
-        
-
-           
+            self.conditional.setdefault(category, {})
+            for (col, valueCounts) in columns.items():
+                self.conditional[category].setdefault(col, {})
+                for (attrValue, count) in valueCounts.items():
+                    self.conditional[category][col][attrValue] = (
+                        count / classes[category])
+        self.tmp = counts
+
     def testBucket(self, bucketPrefix, bucketNumber):
         """Evaluate the classifier with data from the file
         bucketPrefix-bucketNumber"""
-        
+
         filename = "%s-%02i" % (bucketPrefix, bucketNumber)
         f = open(filename)
         lines = f.readlines()
@@ -97,12 +94,12 @@ def testBucket(self, bucketPrefix, bucketNumber):
             vector = []
             classInColumn = -1
             for i in range(len(self.format)):
-                  if self.format[i] == 'num':
-                      vector.append(float(data[i]))
-                  elif self.format[i] == 'attr':
-                      vector.append(data[i])
-                  elif self.format[i] == 'class':
-                      classInColumn = i
+                if self.format[i] == 'num':
+                    vector.append(float(data[i]))
+                elif self.format[i] == 'attr':
+                    vector.append(data[i])
+                elif self.format[i] == 'class':
+                    classInColumn = i
             theRealClass = data[classInColumn]
             classifiedAs = self.classify(vector)
             totals.setdefault(theRealClass, {})
@@ -110,8 +107,6 @@ def testBucket(self, bucketPrefix, bucketNumber):
             totals[theRealClass][classifiedAs] += 1
         return totals
 
-
-    
     def classify(self, itemVector):
         """Return class we think item Vector is in"""
         results = []
@@ -119,7 +114,7 @@ def classify(self, itemVector):
             prob = prior
             col = 1
             for attrValue in itemVector:
-                if not attrValue in self.conditional[category][col]:
+                if attrValue not in self.conditional[category][col]:
                     # we did not find any instances of this attribute value
                     # occurring with this category so prob = 0
                     prob = 0
@@ -129,7 +124,7 @@ def classify(self, itemVector):
             results.append((prob, category))
         # return the category with the highest probability
         return(max(results)[1])
- 
+
 
 def tenfold(bucketPrefix, dataFormat):
     results = {}
@@ -141,22 +136,22 @@ def tenfold(bucketPrefix, dataFormat):
             for (ckey, cvalue) in value.items():
                 results[key].setdefault(ckey, 0)
                 results[key][ckey] += cvalue
-                
+
     # now print results
     categories = list(results.keys())
     categories.sort()
-    print(   "\n            Classified as: ")
-    header =    "             "
+    print("\n            Classified as: ")
+    header = "             "
     subheader = "               +"
     for category in categories:
         header += "% 10s   " % category
         subheader += "-------+"
-    print (header)
-    print (subheader)
+    print(header)
+    print(subheader)
     total = 0.0
     correct = 0.0
     for category in categories:
-        row = " %10s    |" % category 
+        row = " %10s    |" % category
         for c2 in categories:
             if c2 in results[category]:
                 count = results[category][c2]
@@ -168,18 +163,29 @@ def tenfold(bucketPrefix, dataFormat):
                 correct += count
         print(row)
     print(subheader)
-    print("\n%5.3f percent correct" %((correct * 100) / total))
+    print("\n%5.3f percent correct" % ((correct * 100) / total))
     print("total of %i instances" % total)
 
-tenfold("house-votes/hv", "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr")
-#c = Classifier("house-votes/hv", 0,
-#                       "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr")
-
-#c = Classifier("iHealth/i", 10,
-#                       "attr\tattr\tattr\tattr\tclass")
-#print(c.classify(['health', 'moderate', 'moderate', 'yes']))
+tenfold(
+    "house-votes/hv",
+    "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\t"
+    "attr\tattr\tattr\tattr\tattr\tattr"
+)
+# c = Classifier(
+#     "house-votes/hv",
+#     0,
+#     "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\t"
+#     "attr\tattr\tattr\tattr\tattr\tattr\tattr"
+# )
 
-#c = Classifier("house-votes-filtered/hv", 5, "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr")
-#t = c.testBucket("house-votes-filtered/hv", 5)
-#print(t)
+# c = Classifier("iHealth/i", 10, "attr\tattr\tattr\tattr\tclass")
+# print(c.classify(['health', 'moderate', 'moderate', 'yes']))
 
+# c = Classifier(
+#     "house-votes-filtered/hv",
+#     5,
+#     "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\t"
+#     "attr\tattr\tattr\tattr\tattr\tattr\tattr"
+# )
+# t = c.testBucket("house-votes-filtered/hv", 5)
+# print(t)
diff --git a/ch7/naiveBayesDensityFunction copy.py b/ch7/naiveBayesDensityFunction copy.py
index afb9b2c..57dd9f7 100644
--- a/ch7/naiveBayesDensityFunction copy.py	
+++ b/ch7/naiveBayesDensityFunction copy.py	
@@ -1,5 +1,5 @@
-  
-# 
+
+#
 #  Naive Bayes Classifier chapter 6
 #
 
@@ -8,6 +8,7 @@
 
 import math
 
+
 class Classifier:
     def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
 
@@ -17,25 +18,24 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         for the iHealth data the format is:
         "attr	attr	attr	attr	class"
         """
-   
+
         total = 0
         classes = {}
         # counts used for attributes that are not numeric
         counts = {}
         # totals used for attributes that are numereric
-        # we will use these to compute the mean and sample standard deviation for
-        # each attribute - class pair.
+        # we will use these to compute the mean and sample standard deviation
+        # for each attribute - class pair.
         totals = {}
         numericValues = {}
-        
-        
+
         # reading the data in from the file
-        
+
         self.format = dataFormat.strip().split('\t')
-        # 
+        #
         self.prior = {}
         self.conditional = {}
- 
+
         # for each of the buckets numbered 1 through 10:
         for i in range(1, 11):
             # if it is not the bucket we should ignore, read in the data
@@ -53,7 +53,7 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                         if self.format[i] == 'num':
                             nums.append(float(fields[i]))
                         elif self.format[i] == 'attr':
-                            vector.append(fields[i])                           
+                            vector.append(fields[i])
                         elif self.format[i] == 'comment':
                             ignore.append(fields[i])
                         elif self.format[i] == 'class':
@@ -77,12 +77,11 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                     for columnValue in nums:
                         col += 1
                         totals[category].setdefault(col, 0)
-                        #totals[category][col].setdefault(columnValue, 0)
+                        # totals[category][col].setdefault(columnValue, 0)
                         totals[category][col] += columnValue
                         numericValues[category].setdefault(col, [])
                         numericValues[category][col].append(columnValue)
-                    
-        
+
         #
         # ok done counting. now compute probabilities
         #
@@ -94,13 +93,13 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         # now compute conditional probabilities p(h|D)
         #
         for (category, columns) in counts.items():
-              self.conditional.setdefault(category, {})
-              for (col, valueCounts) in columns.items():
-                  self.conditional[category].setdefault(col, {})
-                  for (attrValue, count) in valueCounts.items():
-                      self.conditional[category][col][attrValue] = (
-                          count / classes[category])
-        self.tmp =  counts               
+            self.conditional.setdefault(category, {})
+            for (col, valueCounts) in columns.items():
+                self.conditional[category].setdefault(col, {})
+                for (attrValue, count) in valueCounts.items():
+                    self.conditional[category][col][attrValue] = (
+                        count / classes[category])
+        self.tmp = counts
         #
         # now compute mean and sample standard deviation
         #
@@ -113,7 +112,7 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         # standard deviation
         self.ssd = {}
         for (category, columns) in numericValues.items():
-            
+
             self.ssd.setdefault(category, {})
             for (col, values) in columns.items():
                 SumOfSquareDifferences = 0
@@ -121,14 +120,13 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                 for value in values:
                     SumOfSquareDifferences += (value - theMean)**2
                 columns[col] = 0
-                self.ssd[category][col] = math.sqrt(SumOfSquareDifferences / (classes[category]  - 1))      
-        
+                self.ssd[category][col] = math.sqrt(
+                    SumOfSquareDifferences / (classes[category] - 1))
 
-           
     def testBucket(self, bucketPrefix, bucketNumber):
         """Evaluate the classifier with data from the file
         bucketPrefix-bucketNumber"""
-        
+
         filename = "%s-%02i" % (bucketPrefix, bucketNumber)
         f = open(filename)
         lines = f.readlines()
@@ -142,12 +140,12 @@ def testBucket(self, bucketPrefix, bucketNumber):
             numV = []
             classInColumn = -1
             for i in range(len(self.format)):
-                  if self.format[i] == 'num':
-                      numV.append(float(data[i]))
-                  elif self.format[i] == 'attr':
-                      vector.append(data[i])
-                  elif self.format[i] == 'class':
-                      classInColumn = i
+                if self.format[i] == 'num':
+                    numV.append(float(data[i]))
+                elif self.format[i] == 'attr':
+                    vector.append(data[i])
+                elif self.format[i] == 'class':
+                    classInColumn = i
             theRealClass = data[classInColumn]
             classifiedAs = self.classify(vector, numV)
             totals.setdefault(theRealClass, {})
@@ -155,8 +153,6 @@ def testBucket(self, bucketPrefix, bucketNumber):
             totals[theRealClass][classifiedAs] += 1
         return totals
 
-
-    
     def classify(self, itemVector, numVector):
         """Return class we think item Vector is in"""
         results = []
@@ -165,7 +161,7 @@ def classify(self, itemVector, numVector):
             prob = prior
             col = 1
             for attrValue in itemVector:
-                if not attrValue in self.conditional[category][col]:
+                if attrValue not in self.conditional[category][col]:
                     # we did not find any instances of this attribute value
                     # occurring with this category so prob = 0
                     prob = 0
@@ -173,7 +169,7 @@ def classify(self, itemVector, numVector):
                     prob = prob * self.conditional[category][col][attrValue]
                 col += 1
             col = 1
-            for x in  numVector:
+            for x in numVector:
                 mean = self.means[category][col]
                 ssd = self.ssd[category][col]
                 ePart = math.pow(math.e, -(x - mean)**2/(2*ssd**2))
@@ -181,9 +177,9 @@ def classify(self, itemVector, numVector):
                 col += 1
             results.append((prob, category))
         # return the category with the highest probability
-        #print(results)
+        # print(results)
         return(max(results)[1])
- 
+
 
 def tenfold(bucketPrefix, dataFormat):
     results = {}
@@ -195,22 +191,22 @@ def tenfold(bucketPrefix, dataFormat):
             for (ckey, cvalue) in value.items():
                 results[key].setdefault(ckey, 0)
                 results[key][ckey] += cvalue
-                
+
     # now print results
     categories = list(results.keys())
     categories.sort()
-    print(   "\n            Classified as: ")
-    header =    "             "
+    print("\n            Classified as: ")
+    header = "             "
     subheader = "               +"
     for category in categories:
         header += "% 10s   " % category
         subheader += "-------+"
-    print (header)
-    print (subheader)
+    print(header)
+    print(subheader)
     total = 0.0
     correct = 0.0
     for category in categories:
-        row = " %10s    |" % category 
+        row = " %10s    |" % category
         for c2 in categories:
             if c2 in results[category]:
                 count = results[category][c2]
@@ -222,31 +218,41 @@ def tenfold(bucketPrefix, dataFormat):
                 correct += count
         print(row)
     print(subheader)
-    print("\n%5.3f percent correct" %((correct * 100) / total))
+    print("\n%5.3f percent correct" % ((correct * 100) / total))
     print("total of %i instances" % total)
 
 
 def pdf(mean, ssd, x):
-   """Probability Density Function  computing P(x|y)
-   input is the mean, sample standard deviation for all the items in y,
-   and x."""
-   ePart = math.pow(math.e, -(x-mean)**2/(2*ssd**2))
-   print (ePart)
-   return (1.0 / (math.sqrt(2*math.pi)*ssd)) * ePart
-
-#tenfold("house-votes/hv", "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr")
-#c = Classifier("house-votes/hv", 0,
-#                       "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr")
-#tenfold("pimaSmall/pimaSmall",  "num	num	num	num	num	num	num	num	class")
-#tenfold("pima/pima",  "num	num	num	num	num	num	num	num	class")
-tenfold("mpgData/mpgData",        "class	attr	num	num	num	num	comment")
+    """Probability Density Function  computing P(x|y)
+    input is the mean, sample standard deviation for all the items in y,
+    and x."""
+    ePart = math.pow(math.e, -(x-mean)**2/(2*ssd**2))
+    print (ePart)
+    return (1.0 / (math.sqrt(2*math.pi)*ssd)) * ePart
 
-#c = Classifier("iHealth/i", 10,
-#                       "attr\tattr\tattr\tattr\tclass")
-#print(c.classify([], [3, 78, 50, 32, 88, 31.0, 0.248, 26]))
-#c = Classifier("mpgData/mpgData", 5,       "class	num	num	num	num	num	comment")
+# tenfold(
+#     "house-votes/hv",
+#     "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\t"
+#     "attr\tattr\tattr\tattr\tattr\tattr\tattr"
+# )
+# c = Classifier(
+#     "house-votes/hv",
+#     0,
+#     "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\t"
+#     "attr\tattr\tattr\tattr\tattr\tattr\tattr"
+# )
+# tenfold("pimaSmall/pimaSmall", "num	num	num	num	num	num	num	num	class")
+# tenfold("pima/pima", "num	num	num	num	num	num	num	num	class")
+tenfold("mpgData/mpgData",        "class	attr	num	num	num	num	comment")
 
-#c = Classifier("house-votes-filtered/hv", 5, "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr")
-#t = c.testBucket("house-votes-filtered/hv", 5)
-#print(t)
+# c = Classifier("iHealth/i", 10, "attr\tattr\tattr\tattr\tclass")
+# print(c.classify([], [3, 78, 50, 32, 88, 31.0, 0.248, 26]))
 
+# c = Classifier(
+#     "house-votes-filtered/hv",
+#     5,
+#     "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\t"
+#     "attr\tattr\tattr\tattr\tattr\tattr\tattr"
+# )
+# t = c.testBucket("house-votes-filtered/hv", 5)
+# print(t)
diff --git a/ch7/naiveBayesDensityFunction.py b/ch7/naiveBayesDensityFunction.py
index a28d08f..04ed066 100644
--- a/ch7/naiveBayesDensityFunction.py
+++ b/ch7/naiveBayesDensityFunction.py
@@ -1,5 +1,5 @@
-  
-# 
+
+#
 #  Naive Bayes Classifier chapter 6
 #
 
@@ -8,6 +8,7 @@
 
 import math
 
+
 class Classifier:
     def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
 
@@ -17,25 +18,24 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         for the iHealth data the format is:
         "attr	attr	attr	attr	class"
         """
-   
+
         total = 0
         classes = {}
         # counts used for attributes that are not numeric
         counts = {}
         # totals used for attributes that are numereric
-        # we will use these to compute the mean and sample standard deviation for
-        # each attribute - class pair.
+        # we will use these to compute the mean and sample standard deviation
+        # for each attribute - class pair.
         totals = {}
         numericValues = {}
-        
-        
+
         # reading the data in from the file
-        
+
         self.format = dataFormat.strip().split('\t')
-        # 
+        #
         self.prior = {}
         self.conditional = {}
- 
+
         # for each of the buckets numbered 1 through 10:
         for i in range(1, 11):
             # if it is not the bucket we should ignore, read in the data
@@ -53,7 +53,7 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                         if self.format[i] == 'num':
                             nums.append(float(fields[i]))
                         elif self.format[i] == 'attr':
-                            vector.append(fields[i])                           
+                            vector.append(fields[i])
                         elif self.format[i] == 'comment':
                             ignore.append(fields[i])
                         elif self.format[i] == 'class':
@@ -77,12 +77,11 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                     for columnValue in nums:
                         col += 1
                         totals[category].setdefault(col, 0)
-                        #totals[category][col].setdefault(columnValue, 0)
+                        # totals[category][col].setdefault(columnValue, 0)
                         totals[category][col] += columnValue
                         numericValues[category].setdefault(col, [])
                         numericValues[category][col].append(columnValue)
-                    
-        
+
         #
         # ok done counting. now compute probabilities
         #
@@ -94,13 +93,13 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         # now compute conditional probabilities p(h|D)
         #
         for (category, columns) in counts.items():
-              self.conditional.setdefault(category, {})
-              for (col, valueCounts) in columns.items():
-                  self.conditional[category].setdefault(col, {})
-                  for (attrValue, count) in valueCounts.items():
-                      self.conditional[category][col][attrValue] = (
-                          count / classes[category])
-        self.tmp =  counts               
+            self.conditional.setdefault(category, {})
+            for (col, valueCounts) in columns.items():
+                self.conditional[category].setdefault(col, {})
+                for (attrValue, count) in valueCounts.items():
+                    self.conditional[category][col][attrValue] = (
+                        count / classes[category])
+        self.tmp = counts
         #
         # now compute mean and sample standard deviation
         #
@@ -113,7 +112,7 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         # standard deviation
         self.ssd = {}
         for (category, columns) in numericValues.items():
-            
+
             self.ssd.setdefault(category, {})
             for (col, values) in columns.items():
                 SumOfSquareDifferences = 0
@@ -121,14 +120,13 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                 for value in values:
                     SumOfSquareDifferences += (value - theMean)**2
                 columns[col] = 0
-                self.ssd[category][col] = math.sqrt(SumOfSquareDifferences / (classes[category]  - 1))      
-        
+                self.ssd[category][col] = math.sqrt(
+                    SumOfSquareDifferences / (classes[category] - 1))
 
-           
     def testBucket(self, bucketPrefix, bucketNumber):
         """Evaluate the classifier with data from the file
         bucketPrefix-bucketNumber"""
-        
+
         filename = "%s-%02i" % (bucketPrefix, bucketNumber)
         f = open(filename)
         lines = f.readlines()
@@ -142,12 +140,12 @@ def testBucket(self, bucketPrefix, bucketNumber):
             numV = []
             classInColumn = -1
             for i in range(len(self.format)):
-                  if self.format[i] == 'num':
-                      numV.append(float(data[i]))
-                  elif self.format[i] == 'attr':
-                      vector.append(data[i])
-                  elif self.format[i] == 'class':
-                      classInColumn = i
+                if self.format[i] == 'num':
+                    numV.append(float(data[i]))
+                elif self.format[i] == 'attr':
+                    vector.append(data[i])
+                elif self.format[i] == 'class':
+                    classInColumn = i
             theRealClass = data[classInColumn]
             classifiedAs = self.classify(vector, numV)
             totals.setdefault(theRealClass, {})
@@ -155,8 +153,6 @@ def testBucket(self, bucketPrefix, bucketNumber):
             totals[theRealClass][classifiedAs] += 1
         return totals
 
-
-    
     def classify(self, itemVector, numVector):
         """Return class we think item Vector is in"""
         results = []
@@ -165,7 +161,7 @@ def classify(self, itemVector, numVector):
             prob = prior
             col = 1
             for attrValue in itemVector:
-                if not attrValue in self.conditional[category][col]:
+                if attrValue not in self.conditional[category][col]:
                     # we did not find any instances of this attribute value
                     # occurring with this category so prob = 0
                     prob = 0
@@ -173,7 +169,7 @@ def classify(self, itemVector, numVector):
                     prob = prob * self.conditional[category][col][attrValue]
                 col += 1
             col = 1
-            for x in  numVector:
+            for x in numVector:
                 mean = self.means[category][col]
                 ssd = self.ssd[category][col]
                 ePart = math.pow(math.e, -(x - mean)**2/(2*ssd**2))
@@ -181,9 +177,9 @@ def classify(self, itemVector, numVector):
                 col += 1
             results.append((prob, category))
         # return the category with the highest probability
-        #print(results)
+        # print(results)
         return(max(results)[1])
- 
+
 
 def tenfold(bucketPrefix, dataFormat):
     results = {}
@@ -195,22 +191,22 @@ def tenfold(bucketPrefix, dataFormat):
             for (ckey, cvalue) in value.items():
                 results[key].setdefault(ckey, 0)
                 results[key][ckey] += cvalue
-                
+
     # now print results
     categories = list(results.keys())
     categories.sort()
-    print(   "\n            Classified as: ")
-    header =    "             "
+    print("\n            Classified as: ")
+    header = "             "
     subheader = "               +"
     for category in categories:
         header += "% 10s   " % category
         subheader += "-------+"
-    print (header)
-    print (subheader)
+    print(header)
+    print(subheader)
     total = 0.0
     correct = 0.0
     for category in categories:
-        row = " %10s    |" % category 
+        row = " %10s    |" % category
         for c2 in categories:
             if c2 in results[category]:
                 count = results[category][c2]
@@ -222,29 +218,40 @@ def tenfold(bucketPrefix, dataFormat):
                 correct += count
         print(row)
     print(subheader)
-    print("\n%5.3f percent correct" %((correct * 100) / total))
+    print("\n%5.3f percent correct" % ((correct * 100) / total))
     print("total of %i instances" % total)
 
 
 def pdf(mean, ssd, x):
-   """Probability Density Function  computing P(x|y)
-   input is the mean, sample standard deviation for all the items in y,
-   and x."""
-   ePart = math.pow(math.e, -(x-mean)**2/(2*ssd**2))
-   print (ePart)
-   return (1.0 / (math.sqrt(2*math.pi)*ssd)) * ePart
-
-#tenfold("house-votes/hv", "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr")
-#c = Classifier("house-votes/hv", 0,
-#                       "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr")
-tenfold("pimaSmall/pimaSmall",  "num	num	num	num	num	num	num	num	class")
-tenfold("pima/pima",  "num	num	num	num	num	num	num	num	class")
-
-#c = Classifier("iHealth/i", 10,
-#                       "attr\tattr\tattr\tattr\tclass")
-#print(c.classify([], [3, 78, 50, 32, 88, 31.0, 0.248, 26]))
-
-#c = Classifier("house-votes-filtered/hv", 5, "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr")
-#t = c.testBucket("house-votes-filtered/hv", 5)
-#print(t)
+    """Probability Density Function  computing P(x|y)
+    input is the mean, sample standard deviation for all the items in y,
+    and x."""
+    ePart = math.pow(math.e, -(x-mean)**2/(2*ssd**2))
+    print (ePart)
+    return (1.0 / (math.sqrt(2*math.pi)*ssd)) * ePart
+
+# tenfold(
+#     "house-votes/hv",
+#     "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\t"
+#     "attr\tattr\tattr\tattr\tattr\tattr\tattr"
+# )
+# c = Classifier(
+#     "house-votes/hv",
+#     0,
+#     "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\t"
+#     "attr\tattr\tattr\tattr\tattr\tattr\tattr"
+# )
+tenfold("pimaSmall/pimaSmall", "num	num	num	num	num	num	num	num	class")
+tenfold("pima/pima", "num	num	num	num	num	num	num	num	class")
+
+# c = Classifier("iHealth/i", 10, "attr\tattr\tattr\tattr\tclass")
+# print(c.classify([], [3, 78, 50, 32, 88, 31.0, 0.248, 26]))
 
+# c = Classifier(
+#     "house-votes-filtered/hv",
+#     5,
+#     "class\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\tattr\t"
+#     "attr\tattr\tattr\tattr\tattr\tattr\tattr"
+# )
+# t = c.testBucket("house-votes-filtered/hv", 5)
+# print(t)
diff --git a/ch7/naiveBayesDensityFunctionTraining.py b/ch7/naiveBayesDensityFunctionTraining.py
index 3c16f06..36c231f 100644
--- a/ch7/naiveBayesDensityFunctionTraining.py
+++ b/ch7/naiveBayesDensityFunctionTraining.py
@@ -1,5 +1,5 @@
-  
-# 
+
+#
 #  Naive Bayes Classifier chapter 6
 #
 
@@ -8,6 +8,7 @@
 
 import math
 
+
 class Classifier:
     def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
 
@@ -17,25 +18,24 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         for the iHealth data the format is:
         "attr	attr	attr	attr	class"
         """
-   
+
         total = 0
         classes = {}
         # counts used for attributes that are not numeric
         counts = {}
         # totals used for attributes that are numereric
-        # we will use these to compute the mean and sample standard deviation for
-        # each attribute - class pair.
+        # we will use these to compute the mean and sample standard deviation
+        # for each attribute - class pair.
         totals = {}
         numericValues = {}
-        
-        
+
         # reading the data in from the file
-        
+
         self.format = dataFormat.strip().split('\t')
-        # 
+        #
         self.prior = {}
         self.conditional = {}
- 
+
         # for each of the buckets numbered 1 through 10:
         for i in range(1, 11):
             # if it is not the bucket we should ignore, read in the data
@@ -53,7 +53,7 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                         if self.format[i] == 'num':
                             nums.append(float(fields[i]))
                         elif self.format[i] == 'attr':
-                            vector.append(fields[i])                           
+                            vector.append(fields[i])
                         elif self.format[i] == 'comment':
                             ignore.append(fields[i])
                         elif self.format[i] == 'class':
@@ -77,12 +77,11 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                     for columnValue in nums:
                         col += 1
                         totals[category].setdefault(col, 0)
-                        #totals[category][col].setdefault(columnValue, 0)
+                        # totals[category][col].setdefault(columnValue, 0)
                         totals[category][col] += columnValue
                         numericValues[category].setdefault(col, [])
                         numericValues[category][col].append(columnValue)
-                    
-        
+
         #
         # ok done counting. now compute probabilities
         #
@@ -94,24 +93,25 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         # now compute conditional probabilities p(h|D)
         #
         for (category, columns) in counts.items():
-              self.conditional.setdefault(category, {})
-              for (col, valueCounts) in columns.items():
-                  self.conditional[category].setdefault(col, {})
-                  for (attrValue, count) in valueCounts.items():
-                      self.conditional[category][col][attrValue] = (
-                          count / classes[category])
-        self.tmp =  counts               
+            self.conditional.setdefault(category, {})
+            for (col, valueCounts) in columns.items():
+                self.conditional[category].setdefault(col, {})
+                for (attrValue, count) in valueCounts.items():
+                    self.conditional[category][col][attrValue] = (
+                        count / classes[category])
+        self.tmp = counts
         #
         # now compute mean and sample standard deviation
         #
         self.means = {}
         self.ssd = {}
         # ADD YOUR CODE HERE
-        
 
- # test the code
 
-c = Classifier("pimaSmall/pimaSmall",  1, "num	num	num	num	num	num	num	num	class")
+# test the code
+
+c = Classifier(
+    "pimaSmall/pimaSmall", 1, "num	num	num	num	num	num	num	num	class")
 
 # test means computation
 assert('1' in c.means)
diff --git a/ch7/naiveBayesDensityFunctionTrainingSolution.py b/ch7/naiveBayesDensityFunctionTrainingSolution.py
index d62fe1f..18ccc27 100644
--- a/ch7/naiveBayesDensityFunctionTrainingSolution.py
+++ b/ch7/naiveBayesDensityFunctionTrainingSolution.py
@@ -1,5 +1,5 @@
-  
-# 
+
+#
 #  Naive Bayes Classifier chapter 6
 #
 
@@ -8,6 +8,7 @@
 
 import math
 
+
 class Classifier:
     def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
 
@@ -17,25 +18,24 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         for the iHealth data the format is:
         "attr	attr	attr	attr	class"
         """
-   
+
         total = 0
         classes = {}
         # counts used for attributes that are not numeric
         counts = {}
         # totals used for attributes that are numereric
-        # we will use these to compute the mean and sample standard deviation for
-        # each attribute - class pair.
+        # we will use these to compute the mean and sample standard deviation
+        # for each attribute - class pair.
         totals = {}
         numericValues = {}
-        
-        
+
         # reading the data in from the file
-        
+
         self.format = dataFormat.strip().split('\t')
-        # 
+        #
         self.prior = {}
         self.conditional = {}
- 
+
         # for each of the buckets numbered 1 through 10:
         for i in range(1, 11):
             # if it is not the bucket we should ignore, read in the data
@@ -53,7 +53,7 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                         if self.format[i] == 'num':
                             nums.append(float(fields[i]))
                         elif self.format[i] == 'attr':
-                            vector.append(fields[i])                           
+                            vector.append(fields[i])
                         elif self.format[i] == 'comment':
                             ignore.append(fields[i])
                         elif self.format[i] == 'class':
@@ -77,12 +77,11 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                     for columnValue in nums:
                         col += 1
                         totals[category].setdefault(col, 0)
-                        #totals[category][col].setdefault(columnValue, 0)
+                        # totals[category][col].setdefault(columnValue, 0)
                         totals[category][col] += columnValue
                         numericValues[category].setdefault(col, [])
                         numericValues[category][col].append(columnValue)
-                    
-        
+
         #
         # ok done counting. now compute probabilities
         #
@@ -94,13 +93,13 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
         # now compute conditional probabilities p(h|D)
         #
         for (category, columns) in counts.items():
-              self.conditional.setdefault(category, {})
-              for (col, valueCounts) in columns.items():
-                  self.conditional[category].setdefault(col, {})
-                  for (attrValue, count) in valueCounts.items():
-                      self.conditional[category][col][attrValue] = (
-                          count / classes[category])
-        self.tmp =  counts               
+            self.conditional.setdefault(category, {})
+            for (col, valueCounts) in columns.items():
+                self.conditional[category].setdefault(col, {})
+                for (attrValue, count) in valueCounts.items():
+                    self.conditional[category][col][attrValue] = (
+                        count / classes[category])
+        self.tmp = counts
         #
         # now compute mean and sample standard deviation
         #
@@ -112,9 +111,9 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
             for (col, cTotal) in columns.items():
                 self.means[category][col] = cTotal / classes[category]
         # standard deviation
-        
+
         for (category, columns) in numericValues.items():
-            
+
             self.ssd.setdefault(category, {})
             for (col, values) in columns.items():
                 SumOfSquareDifferences = 0
@@ -122,12 +121,14 @@ def __init__(self, bucketPrefix, testBucketNumber, dataFormat):
                 for value in values:
                     SumOfSquareDifferences += (value - theMean)**2
                 columns[col] = 0
-                self.ssd[category][col] = math.sqrt(SumOfSquareDifferences / (classes[category]  - 1))      
-        
+                self.ssd[category][col] = math.sqrt(
+                    SumOfSquareDifferences / (classes[category] - 1))
+
 
- # test the code
+# test the code
 
-c = Classifier("pimaSmall/pimaSmall",  1, "num	num	num	num	num	num	num	num	class")
+c = Classifier(
+    "pimaSmall/pimaSmall", 1, "num	num	num	num	num	num	num	num	class")
 
 # test means computation
 assert(c.means['1'][1] == 5.25)
diff --git a/ch8/hierarchicalClusterer.py b/ch8/hierarchicalClusterer.py
index dc73466..cab2c8a 100644
--- a/ch8/hierarchicalClusterer.py
+++ b/ch8/hierarchicalClusterer.py
@@ -6,6 +6,7 @@
 Example code for hierarchical clustering
 """
 
+
 def getMedian(alist):
     """get median value of list alist"""
     tmp = list(alist)
@@ -15,7 +16,7 @@ def getMedian(alist):
         return tmp[alen // 2]
     else:
         return (tmp[alen // 2] + tmp[(alen // 2) - 1]) / 2
-    
+
 
 def normalizeColumn(column):
     """Normalize column using Modified Standard Score"""
@@ -24,10 +25,11 @@ def normalizeColumn(column):
     result = [(x - median) / asd for x in column]
     return result
 
+
 class hClusterer:
     """ this clusterer assumes that the first column of the data is a label
     not used in the clustering. The other columns contain numeric data"""
-    
+
     def __init__(self, filename):
         file = open(filename)
         self.data = {}
@@ -43,34 +45,32 @@ def __init__(self, filename):
             toggle = 0
             for cell in range(self.cols):
                 if toggle == 0:
-                   self.data[cell].append(cells[cell])
-                   toggle = 1
+                    self.data[cell].append(cells[cell])
+                    toggle = 1
                 else:
                     self.data[cell].append(float(cells[cell]))
         # now normalize number columns (that is, skip the first column)
         for i in range(1, self.cols):
-                self.data[i] = normalizeColumn(self.data[i])
+            self.data[i] = normalizeColumn(self.data[i])
 
         ###
-        ###  I have read in the data and normalized the 
-        ###  columns. Now for each element i in the data, I am going to
-        ###     1. compute the Euclidean Distance from element i to all the 
-        ###        other elements.  This data will be placed in neighbors,
-        ###        which is a Python dictionary. Let's say i = 1, and I am 
-        ###        computing the distance to the neighbor j and let's say j 
-        ###        is 2. The neighbors dictionary for i will look like
-        ###        {2: ((1,2), 1.23),  3: ((1, 3), 2.3)... }
-        ###
-        ###     2. find the closest neighbor
-        ###
-        ###     3. place the element on a priority queue, called simply queue,
-        ###        based on the distance to the nearest neighbor (and a counter
-        ###        used to break ties.
-
-
-
-        # now push distances on queue        
-        rows = len(self.data[0])              
+        #  I have read in the data and normalized the
+        #  columns. Now for each element i in the data, I am going to
+        #     1. compute the Euclidean Distance from element i to all the
+        #        other elements.  This data will be placed in neighbors,
+        #        which is a Python dictionary. Let's say i = 1, and I am
+        #        computing the distance to the neighbor j and let's say j
+        #        is 2. The neighbors dictionary for i will look like
+        #        {2: ((1,2), 1.23),  3: ((1, 3), 2.3)... }
+        #
+        #     2. find the closest neighbor
+        #
+        #     3. place the element on a priority queue, called simply queue,
+        #        based on the distance to the nearest neighbor (and a counter
+        #        used to break ties.
+
+        # now push distances on queue
+        rows = len(self.data[0])
 
         for i in range(rows):
             minDistance = 99999
@@ -80,9 +80,9 @@ def __init__(self, filename):
                 if i != j:
                     dist = self.distance(i, j)
                     if i < j:
-                        pair = (i,j)
+                        pair = (i, j)
                     else:
-                        pair = (j,i)
+                        pair = (j, i)
                     neighbors[j] = (pair, dist)
                     if dist < minDistance:
                         minDistance = dist
@@ -93,97 +93,92 @@ def __init__(self, filename):
                 nearestPair = (i, nearestNeighbor)
             else:
                 nearestPair = (nearestNeighbor, i)
-                
-            # put instance on priority queue    
+
+            # put instance on priority queue
             self.queue.put((minDistance, self.counter,
                             [[self.data[0][i]], nearestPair, neighbors]))
             self.counter += 1
-    
 
     def distance(self, i, j):
         sumSquares = 0
         for k in range(1, self.cols):
             sumSquares += (self.data[k][i] - self.data[k][j])**2
         return math.sqrt(sumSquares)
-            
 
     def cluster(self):
-         done = False
-         while not done:
-             topOne = self.queue.get()
-             nearestPair = topOne[2][1]
-             if not self.queue.empty():
-                 nextOne = self.queue.get()
-                 nearPair = nextOne[2][1]
-                 tmp = []
-                 ##
-                 ##  I have just popped two elements off the queue,
-                 ##  topOne and nextOne. I need to check whether nextOne
-                 ##  is topOne's nearest neighbor and vice versa.
-                 ##  If not, I will pop another element off the queue
-                 ##  until I find topOne's nearest neighbor. That is what
-                 ##  this while loop does.
-                 ##
-
-                 while nearPair != nearestPair:
-                     tmp.append((nextOne[0], self.counter, nextOne[2]))
-                     self.counter += 1
-                     nextOne = self.queue.get()
-                     nearPair = nextOne[2][1]
-                 ##
-                 ## this for loop pushes the elements I popped off in the
-                 ## above while loop.
-                 ##                 
-                 for item in tmp:
-                     self.queue.put(item)
-                     
-                 if len(topOne[2][0]) == 1:
+        done = False
+        while not done:
+            topOne = self.queue.get()
+            nearestPair = topOne[2][1]
+            if not self.queue.empty():
+                nextOne = self.queue.get()
+                nearPair = nextOne[2][1]
+                tmp = []
+                ##
+                #  I have just popped two elements off the queue,
+                #  topOne and nextOne. I need to check whether nextOne
+                #  is topOne's nearest neighbor and vice versa.
+                #  If not, I will pop another element off the queue
+                #  until I find topOne's nearest neighbor. That is what
+                #  this while loop does.
+                ##
+
+                while nearPair != nearestPair:
+                    tmp.append((nextOne[0], self.counter, nextOne[2]))
+                    self.counter += 1
+                    nextOne = self.queue.get()
+                    nearPair = nextOne[2][1]
+                ##
+                # this for loop pushes the elements I popped off in the
+                # above while loop.
+                ##
+                for item in tmp:
+                    self.queue.put(item)
+
+                if len(topOne[2][0]) == 1:
                     item1 = topOne[2][0][0]
-                 else:
-                     item1 = topOne[2][0]
-                 if len(nextOne[2][0]) == 1:
+                else:
+                    item1 = topOne[2][0]
+                if len(nextOne[2][0]) == 1:
                     item2 = nextOne[2][0][0]
-                 else:
-                     item2 = nextOne[2][0]
-                 ##  curCluster is, perhaps obviously, the new cluster
-                 ##  which combines cluster item1 with cluster item2.
-                 curCluster = (item1, item2)
-
-                 ## Now I am doing two things. First, finding the nearest
-                 ## neighbor to this new cluster. Second, building a new
-                 ## neighbors list by merging the neighbors lists of item1
-                 ## and item2. If the distance between item1 and element 23
-                 ## is 2 and the distance betweeen item2 and element 23 is 4
-                 ## the distance between element 23 and the new cluster will
-                 ## be 2 (i.e., the shortest distance).
-                 ##
-
-                 minDistance = 99999
-                 nearestPair = ()
-                 nearestNeighbor = ''
-                 merged = {}
-                 nNeighbors = nextOne[2][2]
-                 for (key, value) in topOne[2][2].items():
+                else:
+                    item2 = nextOne[2][0]
+                #  curCluster is, perhaps obviously, the new cluster
+                #  which combines cluster item1 with cluster item2.
+                curCluster = (item1, item2)
+
+                # Now I am doing two things. First, finding the nearest
+                # neighbor to this new cluster. Second, building a new
+                # neighbors list by merging the neighbors lists of item1
+                # and item2. If the distance between item1 and element 23
+                # is 2 and the distance betweeen item2 and element 23 is 4
+                # the distance between element 23 and the new cluster will
+                # be 2 (i.e., the shortest distance).
+                ##
+
+                minDistance = 99999
+                nearestPair = ()
+                nearestNeighbor = ''
+                merged = {}
+                nNeighbors = nextOne[2][2]
+                for (key, value) in topOne[2][2].items():
                     if key in nNeighbors:
                         if nNeighbors[key][1] < value[1]:
-                             dist =  nNeighbors[key]
+                            dist = nNeighbors[key]
                         else:
                             dist = value
                         if dist[1] < minDistance:
-                             minDistance =  dist[1]
-                             nearestPair = dist[0]
-                             nearestNeighbor = key
+                            minDistance = dist[1]
+                            nearestPair = dist[0]
+                            nearestNeighbor = key
                         merged[key] = dist
-                    
-                 if merged == {}:
+
+                if merged == {}:
                     return curCluster
-                 else:
-                    self.queue.put( (minDistance, self.counter,
-                                     [curCluster, nearestPair, merged]))
-                    self.counter += 1
-                               
-                        
-                         
+                else:
+                    self.queue.put((minDistance, self.counter,
+                                    [curCluster, nearestPair, merged]))
+                self.counter += 1
 
 
 def printDendrogram(T, sep=3):
@@ -191,17 +186,17 @@ def printDendrogram(T, sep=3):
     length-2 tuple. printDendrogram is written and provided by David Eppstein
     2002. Accessed on 14 April 2014:
     http://code.activestate.com/recipes/139422-dendrogram-drawing/ """
-	
+
     def isPair(T):
         return type(T) == tuple and len(T) == 2
-    
+
     def maxHeight(T):
         if isPair(T):
             h = max(maxHeight(T[0]), maxHeight(T[1]))
         else:
             h = len(str(T))
         return h + sep
-        
+
     activeLevels = {}
 
     def traverse(T, h, isFirst):
@@ -215,14 +210,14 @@ def traverse(T, h, isFirst):
 
         while len(s) < h:
             s.append('-')
-        
+
         if (isFirst >= 0):
             s.append('+')
             if isFirst:
                 activeLevels[h] = 1
             else:
                 del activeLevels[h]
-        
+
         A = list(activeLevels)
         A.sort()
         for L in A:
@@ -231,19 +226,15 @@ def traverse(T, h, isFirst):
                     s.append(' ')
                 s.append('|')
 
-        print (''.join(s))    
-        
+        print (''.join(s))
+
         if isPair(T):
             traverse(T[1], h-sep, 0)
 
     traverse(T, maxHeight(T), -1)
 
-
-
-
 filename = '//Users/raz/Dropbox/guide/data/dogs.csv'
 
 hg = hClusterer(filename)
 cluster = hg.cluster()
 printDendrogram(cluster)
-
diff --git a/ch8/hierarchicalClustererTemplate.py b/ch8/hierarchicalClustererTemplate.py
index eb97cfd..4ae6a84 100644
--- a/ch8/hierarchicalClustererTemplate.py
+++ b/ch8/hierarchicalClustererTemplate.py
@@ -6,6 +6,7 @@
 Example code for hierarchical clustering
 """
 
+
 def getMedian(alist):
     """get median value of list alist"""
     tmp = list(alist)
@@ -15,7 +16,7 @@ def getMedian(alist):
         return tmp[alen // 2]
     else:
         return (tmp[alen // 2] + tmp[(alen // 2) - 1]) / 2
-    
+
 
 def normalizeColumn(column):
     """Normalize column using Modified Standard Score"""
@@ -24,10 +25,11 @@ def normalizeColumn(column):
     result = [(x - median) / asd for x in column]
     return result
 
+
 class hClusterer:
     """ this clusterer assumes that the first column of the data is a label
     not used in the clustering. The other columns contain numeric data"""
-    
+
     def __init__(self, filename):
         file = open(filename)
         self.data = {}
@@ -43,8 +45,8 @@ def __init__(self, filename):
             toggle = 0
             for cell in range(self.cols):
                 if toggle == 0:
-                   self.data[cell].append(cells[cell])
-                   toggle = 1
+                    self.data[cell].append(cells[cell])
+                    toggle = 1
                 else:
                     self.data[cell].append(float(cells[cell]))
         # now normalize number columns (that is, skip the first column)
@@ -52,54 +54,52 @@ def __init__(self, filename):
                 self.data[i] = normalizeColumn(self.data[i])
 
         ###
-        ###  I have read in the data and normalized the 
-        ###  columns. Now for each element i in the data, I am going to
-        ###     1. compute the Euclidean Distance from element i to all the 
-        ###        other elements.  This data will be placed in neighbors, which
-        ###        is a Python dictionary. Let's say i = 1, and I am computing
-        ###        the distance to the neighbor j and let's say j is 2. The
-        ###        neighbors dictionary for i will look like
-        ###        {2: ((1,2), 1.23),  3: ((1, 3), 2.3)... }
-        ###
-        ###     2. find the closest neighbor
-        ###
-        ###     3. place the element on a priority queue, called simply queue,
-        ###        based on the distance to the nearest neighbor (and a counter
-        ###        used to break ties.
-
-
-
-        # TO DO        
-    
+        #  I have read in the data and normalized the
+        #  columns. Now for each element i in the data, I am going to
+        #     1. compute the Euclidean Distance from element i to all the
+        #        other elements.  This data will be placed in neighbors, which
+        #        is a Python dictionary. Let's say i = 1, and I am computing
+        #        the distance to the neighbor j and let's say j is 2. The
+        #        neighbors dictionary for i will look like
+        #        {2: ((1,2), 1.23),  3: ((1, 3), 2.3)... }
+        #
+        #     2. find the closest neighbor
+        #
+        #     3. place the element on a priority queue, called simply queue,
+        #        based on the distance to the nearest neighbor (and a counter
+        #        used to break ties.
+
+        # TO DO
 
     def distance(self, i, j):
         sumSquares = 0
         for k in range(1, self.cols):
             sumSquares += (self.data[k][i] - self.data[k][j])**2
         return math.sqrt(sumSquares)
-            
 
     def cluster(self):
         # TODO
         return "TO DO"
-                         
 
 
 def printDendrogram(T, sep=3):
-    """Print dendrogram of a binary tree.  Each tree node is represented by a length-2 tuple.
-    printDendrogram is written and provided by David Eppstein 2002. Accessed on 14 April 2014:
-    http://code.activestate.com/recipes/139422-dendrogram-drawing/ """
-	
+    """Print dendrogram of a binary tree. Each tree node is represented by
+    a length-2 tuple.
+
+    printDendrogram is written and provided by David Eppstein 2002. Accessed
+    on 14 April 2014:
+    http://code.activestate.com/recipes/139422-dendrogram-drawing/"""
+
     def isPair(T):
         return type(T) == tuple and len(T) == 2
-    
+
     def maxHeight(T):
         if isPair(T):
             h = max(maxHeight(T[0]), maxHeight(T[1]))
         else:
             h = len(str(T))
         return h + sep
-        
+
     activeLevels = {}
 
     def traverse(T, h, isFirst):
@@ -113,14 +113,14 @@ def traverse(T, h, isFirst):
 
         while len(s) < h:
             s.append('-')
-        
+
         if (isFirst >= 0):
             s.append('+')
             if isFirst:
                 activeLevels[h] = 1
             else:
                 del activeLevels[h]
-        
+
         A = list(activeLevels)
         A.sort()
         for L in A:
@@ -129,20 +129,16 @@ def traverse(T, h, isFirst):
                     s.append(' ')
                 s.append('|')
 
-        print (''.join(s))    
-        
+        print (''.join(s))
+
         if isPair(T):
             traverse(T[1], h-sep, 0)
 
     traverse(T, maxHeight(T), -1)
 
-
-
-
 filename = '//Users/raz/Dropbox/guide/pg2dm-python/ch8/dogs.csv'
-#filename = '//Users/raz/Dropbox/guide/pg2dm-python/ch8/cerealTemp.csv'
+# filename = '//Users/raz/Dropbox/guide/pg2dm-python/ch8/cerealTemp.csv'
 
 hg = hClusterer(filename)
 cluster = hg.cluster()
 printDendrogram(cluster)
-
diff --git a/ch8/kmeans.py b/ch8/kmeans.py
index a43b0b1..69dd99e 100644
--- a/ch8/kmeans.py
+++ b/ch8/kmeans.py
@@ -1,5 +1,5 @@
 import math
-import random 
+import random
 
 
 """
@@ -9,6 +9,7 @@
 
 """
 
+
 def getMedian(alist):
     """get median of list"""
     tmp = list(alist)
@@ -18,7 +19,7 @@ def getMedian(alist):
         return tmp[alen // 2]
     else:
         return (tmp[alen // 2] + tmp[(alen // 2) - 1]) / 2
-    
+
 
 def normalizeColumn(column):
     """normalize the values of a column using Modified Standard Score
@@ -34,7 +35,7 @@ class kClusterer:
     This clusterer assumes that the first column of the data is a label
     not used in the clustering. The other columns contain numeric data
     """
-    
+
     def __init__(self, filename, k):
         """ k is the number of clusters to make
         This init method:
@@ -70,11 +71,11 @@ def __init__(self, filename, k):
             toggle = 0
             for cell in range(self.cols):
                 if toggle == 0:
-                   self.data[cell].append(cells[cell])
-                   toggle = 1
+                    self.data[cell].append(cells[cell])
+                    toggle = 1
                 else:
                     self.data[cell].append(float(cells[cell]))
-                    
+
         self.datasize = len(self.data[1])
         self.memberOf = [-1 for x in range(len(self.data[1]))]
         #
@@ -85,25 +86,22 @@ def __init__(self, filename, k):
 
         # select random centroids from existing points
         random.seed()
-        self.centroids = [[self.data[i][r]  for i in range(1, len(self.data))]
-                           for r in random.sample(range(len(self.data[0])),
+        self.centroids = [[self.data[i][r] for i in range(1, len(self.data))]
+                          for r in random.sample(range(len(self.data[0])),
                                                  self.k)]
         self.assignPointsToCluster()
 
-            
-
     def updateCentroids(self):
         """Using the points in the clusters, determine the centroid
         (mean point) of each cluster"""
         members = [self.memberOf.count(i) for i in range(len(self.centroids))]
         self.centroids = [[sum([self.data[k][i]
                                 for i in range(len(self.data[0]))
-                                if self.memberOf[i] == centroid])/members[centroid]
+                                if self.memberOf[i] == centroid]) /
+                           members[centroid]
                            for k in range(1, len(self.data))]
-                          for centroid in range(len(self.centroids))] 
-            
-        
-    
+                          for centroid in range(len(self.centroids))]
+
     def assignPointToCluster(self, i):
         """ assign point to cluster based on distance from centroids"""
         min = 999999
@@ -126,9 +124,7 @@ def assignPointsToCluster(self):
         self.sse = 0
         self.memberOf = [self.assignPointToCluster(i)
                          for i in range(len(self.data[1]))]
-        
 
-        
     def euclideanDistance(self, i, j):
         """ compute distance of point i from centroid j"""
         sumSquares = 0
@@ -141,10 +137,11 @@ def kCluster(self):
         As you can see this method repeatedly
             updates the centroids by computing the mean point of each cluster
             re-assign the points to clusters based on these new centroids
-        until the number of points that change cluster membership is less than 1%.
+        until the number of points that change cluster membership is less
+        than 1%.
         """
         done = False
- 
+
         while not done:
             self.iterationNumber += 1
             self.updateCentroids()
@@ -152,20 +149,20 @@ def kCluster(self):
             #
             # we are done if fewer than 1% of the points change clusters
             #
-            if float(self.pointsChanged) / len(self.memberOf) <  0.01:
+            if float(self.pointsChanged) / len(self.memberOf) < 0.01:
                 done = True
         print("Final SSE: %f" % self.sse)
 
     def showMembers(self):
         """Display the results"""
         for centroid in range(len(self.centroids)):
-             print ("\n\nClass %i\n========" % centroid)
-             for name in [self.data[0][i]  for i in range(len(self.data[0]))
-                          if self.memberOf[i] == centroid]:
-                 print (name)
-        
+            print ("\n\nClass %i\n========" % centroid)
+            for name in [self.data[0][i] for i in range(len(self.data[0]))
+                         if self.memberOf[i] == centroid]:
+                print (name)
+
 ##
-## RUN THE K-MEANS CLUSTERER ON THE DOG DATA USING K = 3
+# RUN THE K-MEANS CLUSTERER ON THE DOG DATA USING K = 3
 ###
 # change the path in the following to match where dogs.csv is on your machine
 km = kClusterer('../../data/dogs.csv', 3)
diff --git a/ch8/kmeansPlusPlus.py b/ch8/kmeansPlusPlus.py
index 2105280..3ecca1a 100644
--- a/ch8/kmeansPlusPlus.py
+++ b/ch8/kmeansPlusPlus.py
@@ -1,5 +1,5 @@
 import math
-import random 
+import random
 
 
 """
@@ -9,6 +9,7 @@
 
 """
 
+
 def getMedian(alist):
     """get median of list"""
     tmp = list(alist)
@@ -18,7 +19,7 @@ def getMedian(alist):
         return tmp[alen // 2]
     else:
         return (tmp[alen // 2] + tmp[(alen // 2) - 1]) / 2
-    
+
 
 def normalizeColumn(column):
     """normalize the values of a column using Modified Standard Score
@@ -34,7 +35,7 @@ class kClusterer:
     This clusterer assumes that the first column of the data is a label
     not used in the clustering. The other columns contain numeric data
     """
-    
+
     def __init__(self, filename, k):
         """ k is the number of clusters to make
         This init method:
@@ -70,11 +71,11 @@ def __init__(self, filename, k):
             toggle = 0
             for cell in range(self.cols):
                 if toggle == 0:
-                   self.data[cell].append(cells[cell])
-                   toggle = 1
+                    self.data[cell].append(cells[cell])
+                    toggle = 1
                 else:
                     self.data[cell].append(float(cells[cell]))
-                    
+
         self.datasize = len(self.data[1])
         self.memberOf = [-1 for x in range(len(self.data[1]))]
         #
@@ -88,11 +89,10 @@ def __init__(self, filename, k):
         self.selectInitialCentroids()
         self.assignPointsToCluster()
 
-
     def showData(self):
         for i in range(len(self.data[0])):
             print("%20s   %8.4f  %8.4f" %
-                (self.data[0][i], self.data[1][i], self.data[2][i]))
+                  (self.data[0][i], self.data[1][i], self.data[2][i]))
 
     def distanceToClosestCentroid(self, point, centroidList):
         result = self.eDistance(point, centroidList[0])
@@ -102,7 +102,6 @@ def distanceToClosestCentroid(self, point, centroidList):
                 result = distance
         return result
 
-
     def selectInitialCentroids(self):
         """implement the k-means++ method of selecting
         the set of initial centroids"""
@@ -115,7 +114,7 @@ def selectInitialCentroids(self):
         for i in range(0, self.k - 1):
             # for every point in the data find its distance to
             # the closest centroid
-            weights = [self.distanceToClosestCentroid(x, centroids) 
+            weights = [self.distanceToClosestCentroid(x, centroids)
                        for x in range(len(self.data[0]))]
             total = sum(weights)
             # instead of raw distances, convert so sum of weight = 1
@@ -130,25 +129,21 @@ def selectInitialCentroids(self):
                 x += 1
                 total += weights[x]
             centroids.append(x)
-        self.centroids = [[self.data[i][r]  for i in range(1, len(self.data))]
-                            for r in centroids]
-                
-            
-    
- 
+        self.centroids = [[self.data[i][r] for i in range(1, len(self.data))]
+                          for r in centroids]
+
     def updateCentroids(self):
         """Using the points in the clusters, determine the centroid
         (mean point) of each cluster"""
         members = [self.memberOf.count(i) for i in range(len(self.centroids))]
-        
+
         self.centroids = [[sum([self.data[k][i]
-                            for i in range(len(self.data[0]))
-                            if self.memberOf[i] == centroid])/members[centroid]
+                                for i in range(len(self.data[0]))
+                                if self.memberOf[i] == centroid]) /
+                           members[centroid]
                            for k in range(1, len(self.data))]
-                          for centroid in range(len(self.centroids))] 
-            
-        
-    
+                          for centroid in range(len(self.centroids))]
+
     def assignPointToCluster(self, i):
         """ assign point to cluster based on distance from centroids"""
         min = 999999
@@ -171,7 +166,6 @@ def assignPointsToCluster(self):
         self.sse = 0
         self.memberOf = [self.assignPointToCluster(i)
                          for i in range(len(self.data[1]))]
-        
 
     def eDistance(self, i, j):
         """ compute distance of point i from centroid j"""
@@ -179,7 +173,7 @@ def eDistance(self, i, j):
         for k in range(1, self.cols):
             sumSquares += (self.data[k][i] - self.data[k][j])**2
         return math.sqrt(sumSquares)
-      
+
     def euclideanDistance(self, i, j):
         """ compute distance of point i from centroid j"""
         sumSquares = 0
@@ -192,10 +186,11 @@ def kCluster(self):
         As you can see this method repeatedly
             updates the centroids by computing the mean point of each cluster
             re-assign the points to clusters based on these new centroids
-        until the number of points that change cluster membership is less than 1%.
+        until the number of points that change cluster membership is less
+        than 1%.
         """
         done = False
- 
+
         while not done:
             self.iterationNumber += 1
             self.updateCentroids()
@@ -203,20 +198,20 @@ def kCluster(self):
             #
             # we are done if fewer than 1% of the points change clusters
             #
-            if float(self.pointsChanged) / len(self.memberOf) <  0.01:
+            if float(self.pointsChanged) / len(self.memberOf) < 0.01:
                 done = True
         print("Final SSE: %f" % self.sse)
 
     def showMembers(self):
         """Display the results"""
         for centroid in range(len(self.centroids)):
-             print ("\n\nClass %i\n========" % centroid)
-             for name in [self.data[0][i]  for i in range(len(self.data[0]))
-                          if self.memberOf[i] == centroid]:
-                 print (name)
-        
+            print("\n\nClass %i\n========" % centroid)
+            for name in [self.data[0][i] for i in range(len(self.data[0]))
+                         if self.memberOf[i] == centroid]:
+                print (name)
+
 ##
-## RUN THE K-MEANS CLUSTERER ON THE DOG DATA USING K = 3
+# RUN THE K-MEANS CLUSTERER ON THE DOG DATA USING K = 3
 ###
 km = kClusterer('../../data/dogs.csv', 3)
 km.kCluster()