Skip to content

Commit

Permalink
Add adamax
Browse files Browse the repository at this point in the history
  • Loading branch information
yacineMahdid committed Sep 10, 2020
1 parent ff29db4 commit dd9dee4
Show file tree
Hide file tree
Showing 2 changed files with 204 additions and 96 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"- [AdaDelta](https://youtu.be/6gvh0IySNMs)\n",
"- [Adam](https://youtu.be/6nqV58NA_Ew)\n",
"- [Nesterov](https://youtu.be/6FrBXv9OcqE)\n",
"- Adamax\n",
"\n",
"## Tests\n",
"In order to demonstrate the algorithms capabilities to optimize a function we used these simple test setup:\n",
Expand All @@ -21,7 +22,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -383,40 +384,80 @@
" \n",
" if i % 100 == 0:\n",
" print(f\"Iteration {i}\")\n",
" print(model)\n",
" \n",
" \n",
"def adamax(model, xs, ys, learning_rate = 0.1, b1 = 0.9, b2 = 0.999, max_iteration = 1000):\n",
" \"\"\"\n",
" Adamax: This is the adamax optimizer that build upong adam with L_inf norm\n",
" model: The model we want to optimize the parameter on\n",
" xs: the feature of my dataset\n",
" ys: the continous value (target)\n",
" learning_rate: the amount of learning we want to happen at each time step (default is 0.1 and will be updated by the optimizer)\n",
" b1: this is the first decaying average with proposed default value of 0.9 (deep learning purposes)\n",
" b2: this is the second decaying average with proposed default value of 0.999 (deep learning purposes)\n",
" max_iteration: the number of sgd round we want to do before stopping the optimization\n",
" \"\"\"\n",
" \n",
" \n",
" # Variable Initialization\n",
" num_param = len(model.weights)\n",
" m = [0 for _ in range(num_param)] # two m for each parameter\n",
" v = [0 for _ in range(num_param)] # two v for each parameter\n",
" g = [0 for _ in range(num_param)] # two gradient\n",
" \n",
" for t in range(1,max_iteration):\n",
" \n",
" # Calculate the gradients \n",
" x, y = stochastic_sample(xs, ys)\n",
" \n",
" # Get the partial derivatives\n",
" g = model.derivate(x, y)\n",
"\n",
" # Update the m and v parameter\n",
" m = [b1*m_i + (1 - b1)*g_i for m_i, g_i in zip(m, g)]\n",
" v = [np.maximum(b2*v_i, np.absolute(g_i)) for v_i, g_i in zip(v, g)]\n",
"\n",
" # Bias correction for m only\n",
" m_cor = [m_i / (1 - (b1**t)) for m_i in m]\n",
"\n",
" # Update the parameter\n",
" model.weights = [weight - (learning_rate / np.sqrt(v_i))*m_cor_i for weight, v_i, m_cor_i in zip(model.weights, v, m_cor)]\n",
" \n",
" if t % 100 == 0:\n",
" print(f\"Iteration {t}\")\n",
" print(model)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nesterov Accelerated Gradient\n",
"Iteration 0\n",
"y = [0.89010029] + [0.35356173]*x\n",
"Adamax\n",
"Iteration 100\n",
"y = [0.3723535] + [0.9655646]*x\n",
"y = [-0.00777475] + [1.00552222]*x\n",
"Iteration 200\n",
"y = [0.01159183] + [0.9884176]*x\n",
"y = [0.00039666] + [1.00003985]*x\n",
"Iteration 300\n",
"y = [0.00013029] + [1.00053685]*x\n",
"y = [9.6917012e-06] + [0.99999651]*x\n",
"Iteration 400\n",
"y = [0.00110482] + [1.00002591]*x\n",
"y = [2.99389693e-07] + [0.99999994]*x\n",
"Iteration 500\n",
"y = [1.20330159e-05] + [0.99999153]*x\n",
"y = [-3.59743715e-09] + [1.]*x\n",
"Iteration 600\n",
"y = [4.34325924e-05] + [1.00000075]*x\n",
"y = [2.77363348e-11] + [1.]*x\n",
"Iteration 700\n",
"y = [-1.87460485e-05] + [1.00003432]*x\n",
"y = [-6.49186838e-14] + [1.]*x\n",
"Iteration 800\n",
"y = [1.26114336e-05] + [0.99998661]*x\n",
"y = [-3.5671997e-15] + [1.]*x\n",
"Iteration 900\n",
"y = [-1.84626241e-06] + [1.0000026]*x\n",
"y = [4.67870626e-06] + [0.99999889]*x\n"
"y = [6.75723728e-17] + [1.]*x\n",
"y = [4.19651576e-17] + [1.]*x\n"
]
}
],
Expand Down Expand Up @@ -463,57 +504,60 @@
"adadelta(model, xs, ys)\n",
"print(model)\n",
"\n",
"\n",
"# Adam\n",
"model = Line()\n",
"print(\"Adam\")\n",
"adam(model, xs, ys)\n",
"print(model)\n",
"\n",
"'''\n",
"\n",
"# Nesterov Accelerated Gradient\n",
"model = Line()\n",
"print(\"Nesterov Accelerated Gradient\")\n",
"nesterov(model, xs, ys)\n",
"print(model)\n",
"'''\n",
"\n",
"# Adamax\n",
"model = Line()\n",
"print(\"Adamax\")\n",
"adamax(model, xs, ys)\n",
"print(model)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nesterov Accelerated Gradient\n",
"Iteration 0\n",
"y = [0.66878322] + [0.90014342]*x\n",
"Adamax\n",
"Iteration 100\n",
"y = [-0.17903726] + [2.40553571]*x\n",
"y = [0.4369314] + [1.89897329]*x\n",
"Iteration 200\n",
"y = [0.05899865] + [2.04887923]*x\n",
"y = [0.13822549] + [1.96756579]*x\n",
"Iteration 300\n",
"y = [0.00894693] + [1.97536479]*x\n",
"y = [0.03852642] + [1.99071418]*x\n",
"Iteration 400\n",
"y = [0.01374569] + [2.05300722]*x\n",
"y = [0.01035516] + [1.99740938]*x\n",
"Iteration 500\n",
"y = [0.10548793] + [1.91493233]*x\n",
"y = [0.0021898] + [1.99963751]*x\n",
"Iteration 600\n",
"y = [0.00385495] + [2.01632264]*x\n",
"y = [0.00051143] + [1.99991838]*x\n",
"Iteration 700\n",
"y = [0.05427682] + [2.13589417]*x\n",
"y = [9.66789006e-05] + [1.99998553]*x\n",
"Iteration 800\n",
"y = [0.0113579] + [2.00570216]*x\n",
"y = [1.43786098e-05] + [1.99999787]*x\n",
"Iteration 900\n",
"y = [-0.02275247] + [1.96719449]*x\n",
"y = [-0.00900316] + [2.00493823]*x\n"
"y = [2.96576946e-06] + [1.99999894]*x\n",
"y = [5.99822337e-07] + [1.99999991]*x\n"
]
}
],
"source": [
"\n",
"# Here we have a simple line with intercept = 0 and slope = 2\n",
"xs = [1,2,3,4,5,6,7]\n",
"ys = [2,4,6,8,10,12,14]\n",
Expand Down Expand Up @@ -560,46 +604,50 @@
"print(\"Adam\")\n",
"adam(model, xs, ys)\n",
"print(model)\n",
"'''\n",
"\n",
"# Nesterov Accelerated Gradient\n",
"model = Line()\n",
"print(\"Nesterov Accelerated Gradient\")\n",
"nesterov(model, xs, ys)\n",
"print(model)\n",
"'''\n",
"\n",
"# Adamax\n",
"model = Line()\n",
"print(\"Adamax\")\n",
"adamax(model, xs, ys)\n",
"print(model)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nesterov Accelerated Gradient\n",
"Iteration 0\n",
"y = [0.30475578] + [0.9422567]*x\n",
"Adamax\n",
"Iteration 100\n",
"y = [-0.39695759] + [2.51820116]*x\n",
"y = [1.11731293] + [1.9747599]*x\n",
"Iteration 200\n",
"y = [-1.48096144] + [3.46703259]*x\n",
"y = [1.05362733] + [1.99324401]*x\n",
"Iteration 300\n",
"y = [1.90685378] + [2.11784826]*x\n",
"y = [1.02088689] + [1.99784093]*x\n",
"Iteration 400\n",
"y = [1.03772983] + [1.98962059]*x\n",
"y = [1.00584065] + [1.99878137]*x\n",
"Iteration 500\n",
"y = [1.00457709] + [2.05050135]*x\n",
"y = [1.00157963] + [1.99971604]*x\n",
"Iteration 600\n",
"y = [0.99465739] + [1.99190331]*x\n",
"y = [1.00050744] + [1.99986608]*x\n",
"Iteration 700\n",
"y = [1.00337418] + [1.99869932]*x\n",
"y = [1.00011891] + [1.99998385]*x\n",
"Iteration 800\n",
"y = [0.99671896] + [1.98843029]*x\n",
"y = [1.00003134] + [1.99999406]*x\n",
"Iteration 900\n",
"y = [1.00465637] + [1.99875298]*x\n",
"y = [0.99462305] + [2.00330215]*x\n"
"y = [1.00000663] + [1.99999862]*x\n",
"y = [1.00000132] + [1.99999974]*x\n"
]
}
],
Expand Down Expand Up @@ -649,12 +697,18 @@
"print(\"Adam\")\n",
"adam(model, xs, ys)\n",
"print(model)\n",
"'''\n",
"\n",
"# Nesterov Accelerated Gradient\n",
"model = Line()\n",
"print(\"Nesterov Accelerated Gradient\")\n",
"nesterov(model, xs, ys)\n",
"print(model)\n",
"'''\n",
"\n",
"# Adamax\n",
"model = Line()\n",
"print(\"Adamax\")\n",
"adamax(model, xs, ys)\n",
"print(model)"
]
},
Expand Down
Loading

0 comments on commit dd9dee4

Please sign in to comment.