updated dynamical systems and cretaed cont. optim.

victorballester7 · Oct 17, 2023 · f125c75 · f125c75
1 parent 2d3f917
commit f125c75
Show file tree

Hide file tree

Showing 6 changed files with 255 additions and 1 deletion.
diff --git a/.github/workflows/buildpdf.yml b/.github/workflows/buildpdf.yml
@@ -177,6 +177,11 @@ jobs:
         with:
           root_file: Advanced_topics_in_functional_analysis_and_PDEs.tex
           working_directory: Mathematics/5th/Advanced_topics_in_functional_analysis_and_PDEs/
+      - name: Compile - CO
+        uses: xu-cheng/latex-action@v2
+        with:
+          root_file: Continuos_optimization.tex
+          working_directory: Mathematics/5th/Continuos_optimization/
       # - name: Compile - IEPDE
       #   uses: xu-cheng/latex-action@v2
       #   with:
@@ -278,6 +283,7 @@ jobs:
             Mathematics/5th/Advanced_dynamical_systems/Advanced_dynamical_systems.pdf
             Mathematics/5th/Advanced_probability/Advanced_probability.pdf
             Mathematics/5th/Advanced_topics_in_functional_analysis_and_PDEs/Advanced_topics_in_functional_analysis_and_PDEs.pdf
+            Mathematics/5th/Continuos_optimization/Continuos_optimization.pdf
             Mathematics/5th/Introduction_to_nonlinear_elliptic_PDEs/Introduction_to_nonlinear_elliptic_PDEs.pdf
             Mathematics/5th/Montecarlo_methods/Montecarlo_methods.pdf
             Mathematics/5th/Stochastic_calculus/Stochastic_calculus.pdf

diff --git a/Mathematics/5th/Advanced_dynamical_systems/Advanced_dynamical_systems.tex b/Mathematics/5th/Advanced_dynamical_systems/Advanced_dynamical_systems.tex
@@ -3,7 +3,7 @@
 \begin{document}
 \changecolor{ADS}
 \begin{multicols}{2}[\section{Advanced dynamical sytems}]
-  \subsection{Introduction}
+  \subsection{Discrete maps}
   \subsubsection{Maps in \texorpdfstring{$\S^1$}{S1}}
   \begin{proposition}
     Let $\alpha=\frac{p}{q}\in\QQ$ and let $R_\alpha:\S^1\to \S^1$ be the rotation of angle $\alpha$. Then, all the points of $\S^1$ are periodic for $R_\alpha$ with period $q$.
@@ -196,5 +196,30 @@
   \begin{remark}
     The Lyapunov exponent measures the exponential growth rate of tangent vectors along orbits. It can rarely be computed explicitly, but if we can show that $\chi(x,\vf{v})>0$ for some $\vf{v}$, then we know that the system is \emph{chaotic}.
   \end{remark}
+  \subsection{Hamiltonian systems}
+  \subsubsection{Introduction}
+  \begin{definition}
+    Let $U\subseteq \RR^n\times \RR^n$ be open and $H:U\rightarrow \RR$ be a $\mathcal{C}^1$ function. We define the \emph{Hamiltonian vector field} associated to $H$ as:
+    \begin{equation}\label{ADS:ham_system}
+      \begin{cases}
+        \displaystyle\dot{\vf{x}}=\pdv{H}{\vf{p}}\vspace{0.1cm} \\
+        \displaystyle\dot{\vf{p}}=-\pdv{H}{\vf{x}}
+      \end{cases}
+    \end{equation}
+  \end{definition}
+  \begin{remark}
+    Recall that $H$ is a first integral of the system \mcref{ADS:ham_system}.
+  \end{remark}
+  \begin{lemma}
+    Let $H:U\rightarrow \RR$ be a $\mathcal{C}^1$ function, $W\subseteq U$. Then, the volume of $W$ under the field of \mcref{ADS:ham_system} is preserved.
+  \end{lemma}
+  \begin{proof}
+    Let $W_t:=\vf{\phi}_t(W)$, where $\phi_t$ is the flow of \mcref{ADS:ham_system}. Then:
+    \begin{multline*}
+      \dv{}{t}\vol(W_t)=\dv{}{t}\int_{\phi_t(W)}\dd{\vf{x}}=\int_W\dv{}{t}\det \vf{D\phi}_t=\\
+      =\int_W\trace\left(\dv{}{t}\vf{D\phi}_t\right)=\int_W\div \vf{X}_H
+    \end{multline*}
+    where $\vf{X}_H$ is the vector field of \mcref{ADS:ham_system}, and  we used that the derivative of the determinant map is the trace. But an easy computation shows that $\div \vf{X}_H=0$.
+  \end{proof}
 \end{multicols}
 \end{document}
diff --git a/Mathematics/5th/Continuous_optimization/Continuous_optimization.tex b/Mathematics/5th/Continuous_optimization/Continuous_optimization.tex
@@ -0,0 +1,218 @@
+\documentclass[../../../main_math.tex]{subfiles}
+
+\begin{document}
+\changecolor{CO}
+\begin{multicols}{2}[\section{Continuous optimization}]
+  \subsection{First order descent methods}
+  In this section we are interested in finding $\min_{x\in H} f(x)$, where $H$ is a Hilbert space and $f:H\rightarrow \mathbb{R}$ is a differentiable function.
+  \subsubsection{Gradient descent}
+  \begin{definition}[Gradient descent algorithm]
+    We define the \emph{gradient descent algorithm} as:
+    \begin{equation*}
+      \vf{x}_{k+1}=\vf{x}_k-\tau \grad f(\vf{x}_k)=:T_\tau(\vf{x}_k)
+    \end{equation*}
+    with $\tau>0$.
+  \end{definition}
+  \begin{definition}
+    One can choose $\tau$ in different ways:
+    \begin{itemize}
+      \item Optimal step: $\tau_k=\argmin_{\tau>0} f(\vf{x}_k-\tau \grad f(\vf{x}_k))$
+      \item \emph{Armijo-type rule}: find $i\geq 0$ such that:
+            $$
+              f(\vf{x}_k-\tau\rho^i \grad f(\vf{x}_k))\leq f(\vf{x}_k)-c \tau \rho^i \norm{\grad f(\vf{x}_k)}^2
+            $$
+            with $c,\rho\in (0,1)$ fixed.
+      \item Gradient with fixed step $\tau>0$
+            \begin{itemize}
+              \item We choose $\vf{x}_{k+1}$ as the minimizer of a quadratic approximation of $f$:
+                    \begin{multline*}
+                      \vf{x}_{k+1}=\argmin_{x\in H} \bigg\{ f(\vf{x}_k)+\langle \grad f(\vf{x}_k),x-\vf{x}_k\rangle+\\+\left.\frac{1}{2\tau}\norm{x-\vf{x}_k}^2 \right\}
+                    \end{multline*}
+              \item \emph{Frank-Wolfe-type method}:
+                    $$
+                      \vf{x}_{k+1}=\argmin_{x\in H_k} \{ f(\vf{x}_k)+\langle \grad f(\vf{x}_k),x-\vf{x}_k\rangle \}
+                    $$
+                    with $H_k$ appropriately chosen.
+            \end{itemize}
+    \end{itemize}
+  \end{definition}
+  \begin{proposition}
+    Let $f\in\mathcal{C}^1$ bounded from below with $\grad f$ $L$-Lipschitz continuous. Then, $\displaystyle\lim_{k\rightarrow\infty} \grad f(\vf{x}_k)=0$, provided that $0<\tau<\frac{2}{L}$.
+  \end{proposition}
+  \begin{proposition}
+    If $f$ is convex, for any $\vf{x},\vf{y}\in H$ we have:
+    $$
+      f(\vf{y})\geq f(\vf{x})+\langle \grad f(\vf{x}),\vf{y}-\vf{x}\rangle
+    $$
+  \end{proposition}
+  \begin{definition}
+    Let $A:H\to H$ be an operator. We say that $A$ is \emph{$L$-co-coercive} if $\forall x,y\in H$:
+    $$
+      \langle Ax-Ay,x-y\rangle\geq L \norm{x-y}^2
+    $$
+  \end{definition}
+  \begin{definition}
+    Let $A:H\to H$ be an operator. We say that $A$ is \emph{firmly non-expansive} if $\forall x,y\in H$:
+    $$
+      \norm{Ax-Ay}^2+\norm{(I-A)x-(I-A)y}^2\leq \norm{x-y}^2
+    $$
+  \end{definition}
+  \begin{lemma}
+    Let $A:H\to H$ be an operator. Then, $A$ is firmly non-expansive if and only if $A$ is $1$-co-coercive.
+  \end{lemma}
+  \begin{theorem}[Baillon-Haddad]
+    If $f$ is convex and $\grad f$ is $L$-Lipschitz continuous, then for any $\vf{x},\vf{y}\in H$ we have:
+    $$
+      \langle \grad f(\vf{x})-\grad f(\vf{y}),\vf{x}-\vf{y}\rangle\geq \frac{1}{L} \norm{\grad f(\vf{x})-\grad f(\vf{y})}^2
+    $$
+    That is, $\grad f$ is $L^{-1}$-co-coercive.
+  \end{theorem}
+  \begin{lemma}
+    If $f$ is convex with $L$-Lipschitz continuous gradient, then the mapping $T_\tau=I-\tau \grad f$ is a 1-Lipschitz when $0\leq \tau\leq \frac{2}{L}$.
+  \end{lemma}
+  \begin{proposition}
+    Let $\vf{x}_*$ be a minimizer of $f$. We have that:
+    $$
+      \frac{f(\vf{x}_k)-f(\vf{x}_*)}{\norm{\vf{x}_k-\vf{x}_*}}\leq \norm{\grad f(\vf{x}_k)}
+    $$
+    Moreover if $\Delta_k:=f(\vf{x}_k)-f(\vf{x}_*)$, we have:
+    $$
+      \Delta_{k+1}\leq \Delta_k-\frac{\kappa}{\norm{\vf{x}_0-\vf{x}_*}} {\Delta_k}^2
+    $$
+    with $\kappa=\tau\left( 1-\frac{\tau L}{2} \right)$.
+  \end{proposition}
+  \begin{lemma}
+    Let $(a_k)\geq 0$ be a sequence such that $a_{k+1}\leq a_k-c^{-1}{a_k}^2$ for some $c>0$. Then, $\forall k\geq 0$, $a_k\leq \frac{c}{k+1}$.
+  \end{lemma}
+  \begin{theorem}
+    The gradient descent with fixed step satisfies:
+    $$
+      \Delta_k\leq \frac{\norm{\vf{x}_0-\vf{x}_*}^2}{\kappa (k+1)}
+    $$
+    for $\kappa=\tau\left( 1-\frac{\tau L}{2} \right)$.
+  \end{theorem}
+  \begin{remark}
+    $\kappa$ is maximal for $\tau=\frac{1}{L}$, which gives:
+    $$
+      \Delta_k\leq \frac{2L\norm{\vf{x}_0-\vf{x}_*}^2}{(k+1)^2}
+    $$
+  \end{remark}
+  \begin{definition}
+    We say that $f$ is \emph{strongly convex} (or \emph{$\gamma$-convex}) if $\vf{D}^2f\geq \gamma \vf{I}$ with $\gamma>0$\footnote{This means that all the eigenvalues of $\vf{D}^2f$ are greater or equal than $\gamma$.}.
+  \end{definition}
+  \begin{proposition}
+    Let $f$ be strongly convex and $\vf{x}_*$ be a minimizer of $f$. Then:
+    $$
+      \norm{\vf{x}_k-\vf{x}_*}\leq q^k \norm{\vf{x}_0-\vf{x}_*}
+    $$
+    with $q=\frac{1-\gamma/L}{1+\gamma/L}$, and $\gamma/L<1$ can be thought as the inverse condition number of the problem.
+  \end{proposition}
+  \begin{theorem}
+    For any $n\geq 2$ and any $\vf{x}_0\in\RR^n$, $L>0$ and $k<n$, there exists a convex $\mathcal{C}^1$ function $f$ with $L$-Lipschitz continuous gradient such that for any first-order method we have:
+    $$
+      f(\vf{x}_k)-f(\vf{x}_*)\geq \frac{L\norm{\vf{x}_0-\vf{x}_*}^2}{8{(k+1)}^2}
+    $$
+    where $\vf{x}_*$ is a minimizer of $f$.
+  \end{theorem}
+  \begin{theorem}
+    For any $\vf{x}_0\in\RR^\infty\simeq \ell_2(\NN)$ and $\gamma,L>0$, there exists a $\gamma$-strongly convex $\mathcal{C}^1$ function $f$ with $L$-Lipschitz continuous gradient such that for any first-order method we have:
+    \begin{align*}
+      f(\vf{x}_k)-f(\vf{x}_*)  & \geq \frac{\gamma}{2}q^{2k}\norm{\vf{x}_0-\vf{x}_*}^2 \\
+      \norm{\vf{x}_k-\vf{x}_*} & \geq q^k \norm{\vf{x}_0-\vf{x}_*}
+    \end{align*}
+    where $\vf{x}_*$ is a minimizer of $f$ and $q=\frac{\sqrt{Q}-1}{\sqrt{Q}+1}$ with $Q=\frac{L}{\gamma}\geq 1$ is the condition number of the problem.
+  \end{theorem}
+  \subsubsection{Higher order methods}
+  \begin{definition}[Newton method]
+    We define the \emph{Newton method} as:
+    \begin{multline*}
+      \vf{x}_{k+1}=\argmin_{\vf{x}\in H} \bigg\{ f(\vf{x}_k)+\langle \grad f(\vf{x}_k),\vf{x}-\vf{x}_k\rangle+\\
+      +\left.\frac{1}{2}\langle \vf{D}^2f(\vf{x}_k)(\vf{x}-\vf{x}_k),\vf{x}-\vf{x}_k\rangle \right\}
+    \end{multline*}
+  \end{definition}
+  \begin{theorem}
+    Let $f\in\mathcal{C}^2$ and $\vf{x}_*$ be a minimizer of $f$. Suppose $\vf{D}^2f$ is $M$-Lipschitz, $\vf{D}^2f\geq \gamma$, $q:=\frac{M}{2\gamma^2}\norm{\grad f({x}_0)}$ and assume ${x}_0$ is close enough to ${x}_*$ so that $q<1$. Then:
+    $$
+      \norm{\vf{x}_k-\vf{x}_*}\leq \frac{2\gamma}{M}q^{2^k}
+    $$
+  \end{theorem}
+  \begin{definition}
+    A \emph{multistep method} has the general form:
+    $$
+      \vf{x}_{k+1}=\vf{x}_k-\alpha \grad f(\vf{x}_k)+\beta(\vf{x}_k-\vf{x}_{k-1})
+    $$
+  \end{definition}
+  \begin{theorem}[Heavy ball method]
+    Let $\vf{x}_*$ be a local minimizer of $f$ such that $\gamma \vf{I}\leq \vf{D}^2f(\vf{x}_*)\leq L\vf{I}$ and choose $\alpha$, $\beta$ with $0\leq \beta <1$ and $0<\alpha<\frac{2(1+\beta)}{L}$. There exists $q<1$ such that if $q<\tilde{q}<1$ and if $\vf{x}_0$, $\vf{x}_1$ are close enough to $\vf{x}_*$, we have:
+    $$
+      \norm{\vf{x}_k-\vf{x}_*}\leq c(\tilde{q}){\tilde{q}}^k
+    $$
+    Moreover, this is almost optimal: if $$
+      \alpha=\frac{4}{{(\sqrt{L}+\sqrt{\gamma})}^2}\qquad \beta={\left( \frac{\sqrt{L}-\sqrt{\gamma}}{\sqrt{L}+\sqrt{\gamma}} \right)}^2
+    $$
+    then:
+    $$
+      q=\frac{\sqrt{L}-\sqrt{\gamma}}{\sqrt{L}+\sqrt{\gamma}}=\frac{\sqrt{Q}-1}{\sqrt{Q}+1}
+    $$
+  \end{theorem}
+  \begin{lemma}
+    Let $\vf{A}\in\mathcal{M}_n(\RR)$ with $\rho(\vf{A})\leq \rho$. Then, $\forall \tilde{\rho}>\rho$, there exists a norm $\norm{\cdot}_*$ in $\CC^n$ such that $\norm{\vf{A}}_*\leq \tilde{\rho}$.
+  \end{lemma}
+  \begin{definition}[Conjugate gradient]
+    We define the \emph{conjugate gradient method} as:
+    \begin{equation*}
+      \vf{x}_{k+1}=\vf{x}_k-\alpha_k \grad f(\vf{x}_k)+\beta_k(\vf{x}_k-\vf{x}_{k-1})
+    \end{equation*}
+    where $\alpha_k$ and $\beta_k$ are the minimizers of:
+    $$
+      \alpha_k,\beta_k=\argmin_{\alpha,\beta\in\RR} f(\vf{x}_k-\alpha \grad f(\vf{x}_k)+\beta(\vf{x}_k-\vf{x}_{k-1}))
+    $$
+  \end{definition}
+  \begin{lemma}
+    The gradients $\vf{p}_k:=\grad f(\vf{x}_k)$ are pairwise orthogonal.
+  \end{lemma}
+  \begin{corollary}
+    For a quadratic function, the conjugate gradient is the \textit{best} first order method.
+  \end{corollary}
+  \begin{corollary}
+    A solution of the conjugate gradient method is found in $k=\rank \vf{A}$ iterations.
+  \end{corollary}
+  \begin{definition}[Nesterov's accelerated gradient method]
+    Let $\vf{x}_0=\vf{x}_{-1}$ be given. We define the \emph{Nesterov's method} as:
+    $$
+      \begin{cases}
+        \vf{y}_k=\vf{x}_k+\frac{t_k-1}{t_{k+1}}(\vf{x}_k-\vf{x}_{k-1}) \\
+        \vf{x}_{k+1}=\vf{y}_k-\tau \grad f(\vf{y}_k)
+      \end{cases}
+    $$
+    where $\tau=L^{-1}$ and for instance $t_k=1+\frac{k}{2}$. Moreover, we have:
+    $$
+      f(\vf{x}_k)-f(\vf{x}_*)\leq \frac{2L\norm{\vf{x}_0-\vf{x}_*}^2}{(k+1)^2}
+    $$
+  \end{definition}
+  \subsubsection{Non-smooth problems}
+  \begin{definition}
+    We define the \emph{subgradient descent method} as;
+    $$
+      \vf{x}_{k+1}=\vf{x}_k-h_k\frac{\grad f(\vf{x}_k)}{\norm{\grad f(\vf{x}_k)}}
+    $$
+    where $h_k>0$ is a step size.
+  \end{definition}
+  \begin{proposition}
+    If $f$ is $M$-Lipschitz, then the subgradient descent method satisfies:
+    $$
+      \min_{0\leq i\leq k} \{ \norm{\vf{x}_i-\vf{x}_*} \}\leq M\frac{\norm{\vf{x}_0-\vf{x}_*} + \sum_{i=0}^k h_i}{2\sum_{i=0}^k h_i}
+    $$
+    and choosing $h_i=\frac{C}{\sqrt{k+1}}$ for $k$ iterations, we have:
+    $$
+      \min_{0\leq i\leq k} \{ \norm{\vf{x}_i-\vf{x}_*} \}\leq \frac{2M\norm{\vf{x}_0-\vf{x}_*}}{C\sqrt{k+1}}
+    $$
+  \end{proposition}
+  \begin{definition}
+    We define the \emph{implicit descent} as:
+    $$
+      \vf{x}_{k+1}=\vf{x}_k-\tau \grad f(\vf{x}_{k+1})
+    $$
+  \end{definition}
+\end{multicols}
+\end{document}
diff --git a/index.html b/index.html
@@ -73,6 +73,7 @@ <h2 class="special-color">Mathematics</h2>
           <li><button class="button" onclick="window.location.href='https://github.com/victorballester7/Complete-summaries/releases/latest/download/Advanced_dynamical_systems.pdf';" target="_top">Advanced dynamic systems</button></li>
           <li><button class="button" onclick="window.location.href='https://github.com/victorballester7/Complete-summaries/releases/latest/download/Advanced_probability.pdf';" target="_top">Advanced probability</button></li>
           <li><button class="button" onclick="window.location.href='https://github.com/victorballester7/Complete-summaries/releases/latest/download/Advanced_topics_in_functional_analysis_and_PDEs.pdf';" target="_top">Advanced topics in functional analysis and PDEs</button></li>
+          <li><button class="button" onclick="window.location.href='https://github.com/victorballester7/Complete-summaries/releases/latest/download/Continuous_optimization.pdf';" target="_top">Continuous optimization</button></li>
           <!-- <li><button class="button" onclick="window.location.href='https://github.com/victorballester7/Complete-summaries/releases/latest/download/Introduction_to_evolution_PDEs.pdf';" target="_top">Introduction to evolution PDEs</button></li> -->
           <li><button class="button" onclick="window.location.href='https://github.com/victorballester7/Complete-summaries/releases/latest/download/Introduction_to_nonlinear_elliptic_PDEs.pdf';" target="_top">Introduction to nonlinear elliptic PDEs</button></li>
           <li><button class="button" onclick="window.location.href='https://github.com/victorballester7/Complete-summaries/releases/latest/download/Montecarlo_methods.pdf';" target="_top">Montecarlo methods</button></li>

diff --git a/main_math.tex b/main_math.tex
@@ -110,6 +110,9 @@ \chapter{Fifth year}
 \subfile{Mathematics/5th/Advanced_topics_in_functional_analysis_and_PDEs/Advanced_topics_in_functional_analysis_and_PDEs.tex}
 \cleardoublepage
 
+\subfile{Mathematics/5th/Continuous_optimization/Continuous_optimization.tex}
+\cleardoublepage
+
 \subfile{Mathematics/5th/Introduction_to_nonlinear_elliptic_PDEs/Introduction_to_nonlinear_elliptic_PDEs.tex}
 \cleardoublepage
 

diff --git a/preamble_formulas.sty b/preamble_formulas.sty
@@ -98,6 +98,7 @@
       {SC}{\sta}    % stochastic calculus
       {INLP}{\phy}  % Instabilities and nonlinear phenomena
       {MM}{\sta}    % Montecarlo methods
+      {CO}{\ana}    % Continuous optimization
   }{\col}%
 }
 \ExplSyntaxOff