olin-toolboxes · Subeen-Kim · Nov 2, 2017
diff --git a/Repeating_10times_04.png b/Repeating_10times_04.png
diff --git a/learning_curve.py b/learning_curve.py
@@ -21,7 +21,7 @@ def display_digits():
 
 def train_model():
     """Train a model on pictures of digits.
-    
+
     Read in 8x8 pictures of numbers and evaluate the accuracy of the model
     when different percentages of the data are used as training data. This function
     plots the average accuracy of the model as a function of the percent of data
@@ -39,16 +39,31 @@ def train_model():
     # For consistency with the previous example use
     # model = LogisticRegression(C=10**-10) for your learner
 
-    # TODO: your code here
+    repeating_index = range(10)
+    repeating_list = numpy.zeros(len(repeating_index))
+
+    data = load_digits()
+    number = 0
+    for index in train_percentages:
+        for repeating in repeating_index:
+            X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, train_size=index/100)
+            model = LogisticRegression(C=10**-10)
+            model.fit(X_train, y_train)
+            repeating_list[repeating] = model.score(X_test, y_test)
+        print(repeating_list)
+        test_accuracies[number] = sum(repeating_list)/len(repeating_list)
+        number += 1
+            #print("Train accuracy %f" %model.score(X_train, y_train))
+            #print("Test accuracy %f"%model.score(X_test, y_test))
 
     fig = plt.figure()
     plt.plot(train_percentages, test_accuracies)
     plt.xlabel('Percentage of Data Used for Training')
     plt.ylabel('Accuracy on Test Set')
     plt.show()
-
+    fig.savefig('Repeating_10times_05.png',dpi=600,bbox_inches='tight')
 
 if __name__ == "__main__":
     # Feel free to comment/uncomment as needed
-    display_digits()
-    # train_model()
+    # display_digits()
+    train_model()
diff --git a/questions.txt b/questions.txt
@@ -0,0 +1,21 @@
+"""
+Subeen Kim
+
+1. What is the general trend in the curve?
+
+	As the percentage of data used for tarining increases, the accuracy on test set is improved.
+
+2. Are there parts of the curve that appear to be noiser than others? Why?
+
+	The end part of graph (extremley small or large portion for training) is noiser than the middle part (training ~ 0.5).
+	When the tested or trained portin is not enough, there might be some exceptionall cases, which induces some noise of graph.
+
+3. How many trials do you need to get a smooth curve?
+
+	When I tried for 100 times, it was able to get a smooth curve. (image attatched)
+
+4. Try different values for C. What happens?
+
+	As C is larger, the accurancy on test sets is well improved.
+
+"""