对p1进行了大修，修复了图1和图2显示错误的bug。原因是所有的losses没有做转置。矩阵和绘图用的坐标系起点不同。

tsycnh · tsycnh · commit 31e3df47b3c2 · 2017-07-16T13:36:03.000+08:00
diff --git a/p1 gradient descent.py b/p1 gradient descent.py
@@ -2,13 +2,17 @@
 import matplotlib.pyplot as plt
 from mpl_toolkits.mplot3d import Axes3D
 # 本代码是一个最简单的线形回归问题，优化函数为经典的gradient descent
-rate = 1e-2 # learning rate
+rate = 0.2 # learning rate
 def da(y,y_p,x):
     return (y-y_p)*(-x)
 
 def db(y,y_p):
     return (y-y_p)*(-1)
-
+def calc_loss(a,b,x,y):
+    tmp = y - (a * x + b)
+    tmp = tmp ** 2  # 对矩阵内的每一个元素平方
+    SSE = sum(tmp) / (2 * len(x))
+    return SSE
 def draw_hill(x,y):
     a = np.linspace(-20,20,100)
     print(a)
@@ -21,52 +25,54 @@ def draw_hill(x,y):
         for bi in range(0,len(b)):
             a0 = a[ai]
             b0 = b[bi]
-            tmp = y - (a0*x + b0)
-            tmp = tmp**2 # 对矩阵内的每一个元素平方
-            SSE = sum(tmp)/2
+            SSE = calc_loss(a=a0,b=b0,x=x,y=y)
             allSSE[ai][bi] = SSE
 
     a,b = np.meshgrid(a, b)
 
     return [a,b,allSSE]
-# simulated data
+#  模拟数据
 x = [30	,35,37,	59,	70,	76,	88,	100]
 y = [1100,	1423,	1377,	1800,	2304,	2588,	3495,	4839]
 
-
 # 数据归一化
 x_max = max(x)
 x_min = min(x)
 y_max = max(y)
 y_min = min(y)
-# x_mean = np.mean(x)
+
 for i in range(0,len(x)):
     x[i] = (x[i] - x_min)/(x_max - x_min)
     y[i] = (y[i] - y_min)/(y_max - y_min)
 
 [ha,hb,hallSSE] = draw_hill(x,y)
-
+hallSSE = hallSSE.T# 重要，将所有的losses做一个转置。原因是矩阵是以左上角至右下角顺序排列元素，而绘图是以左下角为原点。
 # 初始化a,b值
-a = 10
-b = -20
-fig4 = plt.figure(4,figsize=(12,8))
+a = 10.0
+b = -20.0
+fig = plt.figure(1, figsize=(12, 8))
+
+# 绘制图1的曲面
+ax = fig.add_subplot(2, 2, 1, projection='3d')
+ax.set_top_view()
+ax.plot_surface(ha, hb, hallSSE, rstride=2, cstride=2, cmap='rainbow')
+
+# 绘制图2的等高线图
 plt.subplot(2,2,2)
-plt.contourf(ha,hb,hallSSE,15,alpha=0.75,cmap=plt.cm.hot)
+ta = np.linspace(-20, 20, 100)
+tb = np.linspace(-20, 20, 100)
+plt.contourf(ha,hb,hallSSE,15,alpha=0.5,cmap=plt.cm.hot)
 C = plt.contour(ha,hb,hallSSE,15,colors='black')
 plt.clabel(C,inline=True)
 plt.xlabel('a')
 plt.ylabel('b')
-plt.xticks()
-plt.yticks()
-# plt.show()
-# plot bowl
-ax = fig4.add_subplot(2, 2, 1, projection='3d')
-ax.plot_surface(ha, hb, hallSSE, rstride=2, cstride=2, cmap='rainbow')
+
 plt.ion() # iteration on
-all_a = []
-all_b = []
+
 all_loss = []
 all_step = []
+last_a = a
+last_b = b
 for step in range(1,500):
     loss = 0
     all_da = 0
@@ -76,36 +82,38 @@ def draw_hill(x,y):
         loss = loss + (y[i] - y_p)*(y[i] - y_p)/2
         all_da = all_da + da(y[i],y_p,x[i])
         all_db = all_db + db(y[i],y_p)
+    #loss_ = calc_loss(a = a,b=b,x=np.array(x),y=np.array(y))
+    loss = loss/len(x)
 
-    a = a - rate*all_da
-    b = b - rate*all_db
-
-    all_a.append(a)
-    all_b.append(b)
-    all_loss.append(loss)
-    all_step.append(step)
-
-    # plot gradient descent point
+    # 绘制图1中的loss点
     ax.scatter(a, b, loss, color='black')
-
+    # 绘制图2中的loss点
     plt.subplot(2,2,2)
-    plt.scatter(a,b,loss,color='blue')
-
-    # plot lines
-    plt.subplot(2,2,3)
-    plt.plot(x,y)
-    plt.plot(x,y,'o')
+    plt.scatter(a,b,s=5,color='blue')
+    plt.plot([last_a,a],[last_b,b],color='aqua')
+    # 绘制图3中的回归直线
+    plt.subplot(2, 2, 3)
+    plt.plot(x, y)
+    plt.plot(x, y, 'o')
     x_ = np.linspace(0, 1, 2)
     y_draw = a * x_ + b
-    plt.plot(x_,y_draw)
-
-    # plot losses
+    plt.plot(x_, y_draw)
+    # 绘制图4的loss更新曲线
+    all_loss.append(loss)
+    all_step.append(step)
     plt.subplot(2,2,4)
     plt.plot(all_step,all_loss,color='orange')
     plt.xlabel("step")
     plt.ylabel("loss")
 
-    if step%10 == 0:
+
+    # print('a = %.3f,b = %.3f' % (a,b))
+    last_a = a
+    last_b = b
+    a = a - rate*all_da
+    b = b - rate*all_db
+
+    if step%1 == 0:
         print("step: ", step, " loss: ", loss)
         plt.show()
         plt.pause(0.01)
diff --git a/p4 momentum.py b/p4 momentum.py
@@ -12,7 +12,7 @@ def db(y,y_p):
     return (y-y_p)*(-1)
 
 def draw_hill(x,y):
-    a = np.linspace(-20,20,100)
+    a = np.linspace(-5,18,100)
     print(a)
     b = np.linspace(-20,20,100)
     x = np.array(x)
@@ -68,8 +68,8 @@ def get_batch_data(x,y,batch=3):
 
 # 绘制等高线图
 plt.subplot(2,2,2)
-plt.contourf(ha,hb,hallSSE,15,alpha=0.75,cmap=plt.cm.hot)
-C = plt.contour(ha,hb,hallSSE,15,colors='black')
+plt.contourf(ha,hb,hallSSE,30,alpha=0.75,cmap=plt.cm.hot)
+C = plt.contour(ha,hb,hallSSE,10,colors='black')
 plt.clabel(C,inline=True)
 plt.xlabel('a')
 plt.ylabel('b')
@@ -98,7 +98,7 @@ def get_batch_data(x,y,batch=3):
 
         all_da = all_da + da(y[i],y_p,x[i])
         all_db = all_db + db(y[i],y_p)
-
+    loss = loss/len(x)
     va = gamma * last_va + rate*all_da
     vb = gamma * last_vb + rate*all_db
 
@@ -114,11 +114,11 @@ def get_batch_data(x,y,batch=3):
     all_step.append(step)
 
     # plot gradient descent point
-    ax.scatter(a, b, loss/len(x), color='black')
+    ax.scatter(a, b, loss, color='black')
 
     # plot on contour
     plt.subplot(2,2,2)
-    plt.scatter(a,b,loss/len(x),color='blue',marker='.',linewidths=0.1)
+    plt.scatter(a,b,3,color='blue',marker='.')
 
     # plot lines
     plt.subplot(2,2,3)
@@ -134,9 +134,9 @@ def get_batch_data(x,y,batch=3):
     plt.xlabel("step")
     plt.ylabel("loss")
 
-    if step%10 == 0:
+    if step%1 == 0:
         print("step: ", step, " loss: ", loss)
         plt.show()
-        plt.pause(0.01)
+        plt.pause(5)
 plt.show()
 plt.pause(99999999999)
diff --git a/p5 Nesterov.py b/p5 Nesterov.py
@@ -0,0 +1,142 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D
+import random
+# 本代码是一个最简单的线形回归问题，优化函数为 NAG (Nesterov accelerated gradient),该优化函数基于momentum改进而来
+
+rate = 1e-2 # learning rate
+def da(y,y_p,x):
+    return (y-y_p)*(-x)
+
+def db(y,y_p):
+    return (y-y_p)*(-1)
+
+def draw_hill(x,y):
+    a = np.linspace(-10,15,100)
+    print(a)
+    b = np.linspace(-20,20,100)
+    x = np.array(x)
+    y = np.array(y)
+
+    allSSE = np.zeros(shape=(len(a),len(b)))
+    for ai in range(0,len(a)):
+        for bi in range(0,len(b)):
+            a0 = a[ai]
+            b0 = b[bi]
+            tmp = y - (a0*x + b0)
+            tmp = tmp**2 # 对矩阵内的每一个元素平方
+            SSE = sum(tmp)/(2*len(x))
+            allSSE[ai][bi] = SSE
+
+    a,b = np.meshgrid(a, b)
+
+    return [a,b,allSSE]
+
+def shuffle_data(x,y):
+    # 随机打乱x，y的数据，并且保持x和y一一对应
+    seed = random.random()
+    random.seed(seed)
+    random.shuffle(x)
+    random.seed(seed)
+    random.shuffle(y)
+
+def get_batch_data(x,y,batch=3):
+    shuffle_data(x,y)
+    x_new = x[0:batch]
+    y_new = y[0:batch]
+    return [x_new,y_new]
+# simulated data
+x = [30	 ,   35,   37,	 59,   70,	 76,   88,	100]
+y = [1100, 1423, 1377, 1800, 2304, 2588, 3495, 4839]
+
+
+# 数据归一化
+x_max = max(x)
+x_min = min(x)
+y_max = max(y)
+y_min = min(y)
+for i in range(0,len(x)):
+    x[i] = (x[i] - x_min)/(x_max - x_min)
+    y[i] = (y[i] - y_min)/(y_max - y_min)
+
+[ha,hb,hallSSE] = draw_hill(x,y)
+
+# 初始化a,b值
+a = 10
+b = -20
+fig4 = plt.figure(4,figsize=(12,8))
+
+# 绘制等高线图
+plt.subplot(2,2,2)
+plt.contourf(ha,hb,hallSSE,15,alpha=0.75,cmap=plt.cm.hot)
+C = plt.contour(ha,hb,hallSSE,15,colors='black')
+plt.clabel(C,inline=True)
+plt.xlabel('a')
+plt.ylabel('b')
+plt.xticks()
+plt.yticks()
+# plt.show()
+# plot bowl
+ax = fig4.add_subplot(2, 2, 1, projection='3d')
+ax.plot_surface(ha, hb, hallSSE, rstride=2, cstride=2, cmap='rainbow')
+plt.ion() # iteration on
+all_a = []
+all_b = []
+all_loss = []
+all_step = []
+last_va = 0 # momentum
+last_vb = 0
+gamma = 0.9
+for step in range(1,500):
+    loss = 0
+    all_da = 0
+    all_db = 0
+
+    for i in range(0,len(x)):
+        y_p = a*x[i] + b
+        loss = loss +(y[i] - y_p)*(y[i] - y_p)/2
+
+        all_da = all_da + da(y[i],y_p,x[i])
+        all_db = all_db + db(y[i],y_p)
+
+    va = gamma * last_va + rate*all_da
+    vb = gamma * last_vb + rate*all_db
+
+    a = a - va
+    b = b - vb
+
+    last_va = va
+    last_vb = vb
+
+    all_a.append(a)
+    all_b.append(b)
+    all_loss.append(loss)
+    all_step.append(step)
+
+    # plot gradient descent point
+    ax.scatter(a, b, loss/len(x), color='black')
+
+    # plot on contour
+    plt.subplot(2,2,2)
+    plt.scatter(a,b,loss/len(x),color='blue',marker='.',linewidths=0.1)
+
+    # plot lines
+    plt.subplot(2,2,3)
+    plt.plot(x,y)
+    plt.plot(x,y,'o')
+    x_ = np.linspace(0, 1, 2)
+    y_draw = a * x_ + b
+    plt.plot(x_,y_draw)
+
+    # plot losses
+    plt.subplot(2,2,4)
+    plt.plot(all_step,all_loss,color='orange')
+    plt.xlabel("step")
+    plt.ylabel("loss")
+
+    if step%10 == 0:
+        print("step: ", step, " loss: ", loss)
+        plt.show()
+        plt.pause(0.01)
+plt.show()
+plt.pause(99999999999)