diff --git a/.github/workflows/buildpdf.yml b/.github/workflows/buildpdf.yml index d9eacc2..8ca41fb 100644 --- a/.github/workflows/buildpdf.yml +++ b/.github/workflows/buildpdf.yml @@ -177,6 +177,11 @@ jobs: with: root_file: Advanced_topics_in_functional_analysis_and_PDEs.tex working_directory: Mathematics/5th/Advanced_topics_in_functional_analysis_and_PDEs/ + - name: Compile - CO + uses: xu-cheng/latex-action@v2 + with: + root_file: Continuos_optimization.tex + working_directory: Mathematics/5th/Continuos_optimization/ # - name: Compile - IEPDE # uses: xu-cheng/latex-action@v2 # with: @@ -278,6 +283,7 @@ jobs: Mathematics/5th/Advanced_dynamical_systems/Advanced_dynamical_systems.pdf Mathematics/5th/Advanced_probability/Advanced_probability.pdf Mathematics/5th/Advanced_topics_in_functional_analysis_and_PDEs/Advanced_topics_in_functional_analysis_and_PDEs.pdf + Mathematics/5th/Continuos_optimization/Continuos_optimization.pdf Mathematics/5th/Introduction_to_nonlinear_elliptic_PDEs/Introduction_to_nonlinear_elliptic_PDEs.pdf Mathematics/5th/Montecarlo_methods/Montecarlo_methods.pdf Mathematics/5th/Stochastic_calculus/Stochastic_calculus.pdf diff --git a/Mathematics/5th/Advanced_dynamical_systems/Advanced_dynamical_systems.tex b/Mathematics/5th/Advanced_dynamical_systems/Advanced_dynamical_systems.tex index e29e40f..fc73245 100644 --- a/Mathematics/5th/Advanced_dynamical_systems/Advanced_dynamical_systems.tex +++ b/Mathematics/5th/Advanced_dynamical_systems/Advanced_dynamical_systems.tex @@ -3,7 +3,7 @@ \begin{document} \changecolor{ADS} \begin{multicols}{2}[\section{Advanced dynamical sytems}] - \subsection{Introduction} + \subsection{Discrete maps} \subsubsection{Maps in \texorpdfstring{$\S^1$}{S1}} \begin{proposition} Let $\alpha=\frac{p}{q}\in\QQ$ and let $R_\alpha:\S^1\to \S^1$ be the rotation of angle $\alpha$. Then, all the points of $\S^1$ are periodic for $R_\alpha$ with period $q$. @@ -196,5 +196,30 @@ \begin{remark} The Lyapunov exponent measures the exponential growth rate of tangent vectors along orbits. It can rarely be computed explicitly, but if we can show that $\chi(x,\vf{v})>0$ for some $\vf{v}$, then we know that the system is \emph{chaotic}. \end{remark} + \subsection{Hamiltonian systems} + \subsubsection{Introduction} + \begin{definition} + Let $U\subseteq \RR^n\times \RR^n$ be open and $H:U\rightarrow \RR$ be a $\mathcal{C}^1$ function. We define the \emph{Hamiltonian vector field} associated to $H$ as: + \begin{equation}\label{ADS:ham_system} + \begin{cases} + \displaystyle\dot{\vf{x}}=\pdv{H}{\vf{p}}\vspace{0.1cm} \\ + \displaystyle\dot{\vf{p}}=-\pdv{H}{\vf{x}} + \end{cases} + \end{equation} + \end{definition} + \begin{remark} + Recall that $H$ is a first integral of the system \mcref{ADS:ham_system}. + \end{remark} + \begin{lemma} + Let $H:U\rightarrow \RR$ be a $\mathcal{C}^1$ function, $W\subseteq U$. Then, the volume of $W$ under the field of \mcref{ADS:ham_system} is preserved. + \end{lemma} + \begin{proof} + Let $W_t:=\vf{\phi}_t(W)$, where $\phi_t$ is the flow of \mcref{ADS:ham_system}. Then: + \begin{multline*} + \dv{}{t}\vol(W_t)=\dv{}{t}\int_{\phi_t(W)}\dd{\vf{x}}=\int_W\dv{}{t}\det \vf{D\phi}_t=\\ + =\int_W\trace\left(\dv{}{t}\vf{D\phi}_t\right)=\int_W\div \vf{X}_H + \end{multline*} + where $\vf{X}_H$ is the vector field of \mcref{ADS:ham_system}, and we used that the derivative of the determinant map is the trace. But an easy computation shows that $\div \vf{X}_H=0$. + \end{proof} \end{multicols} \end{document} \ No newline at end of file diff --git a/Mathematics/5th/Continuous_optimization/Continuous_optimization.tex b/Mathematics/5th/Continuous_optimization/Continuous_optimization.tex new file mode 100644 index 0000000..84288f5 --- /dev/null +++ b/Mathematics/5th/Continuous_optimization/Continuous_optimization.tex @@ -0,0 +1,218 @@ +\documentclass[../../../main_math.tex]{subfiles} + +\begin{document} +\changecolor{CO} +\begin{multicols}{2}[\section{Continuous optimization}] + \subsection{First order descent methods} + In this section we are interested in finding $\min_{x\in H} f(x)$, where $H$ is a Hilbert space and $f:H\rightarrow \mathbb{R}$ is a differentiable function. + \subsubsection{Gradient descent} + \begin{definition}[Gradient descent algorithm] + We define the \emph{gradient descent algorithm} as: + \begin{equation*} + \vf{x}_{k+1}=\vf{x}_k-\tau \grad f(\vf{x}_k)=:T_\tau(\vf{x}_k) + \end{equation*} + with $\tau>0$. + \end{definition} + \begin{definition} + One can choose $\tau$ in different ways: + \begin{itemize} + \item Optimal step: $\tau_k=\argmin_{\tau>0} f(\vf{x}_k-\tau \grad f(\vf{x}_k))$ + \item \emph{Armijo-type rule}: find $i\geq 0$ such that: + $$ + f(\vf{x}_k-\tau\rho^i \grad f(\vf{x}_k))\leq f(\vf{x}_k)-c \tau \rho^i \norm{\grad f(\vf{x}_k)}^2 + $$ + with $c,\rho\in (0,1)$ fixed. + \item Gradient with fixed step $\tau>0$ + \begin{itemize} + \item We choose $\vf{x}_{k+1}$ as the minimizer of a quadratic approximation of $f$: + \begin{multline*} + \vf{x}_{k+1}=\argmin_{x\in H} \bigg\{ f(\vf{x}_k)+\langle \grad f(\vf{x}_k),x-\vf{x}_k\rangle+\\+\left.\frac{1}{2\tau}\norm{x-\vf{x}_k}^2 \right\} + \end{multline*} + \item \emph{Frank-Wolfe-type method}: + $$ + \vf{x}_{k+1}=\argmin_{x\in H_k} \{ f(\vf{x}_k)+\langle \grad f(\vf{x}_k),x-\vf{x}_k\rangle \} + $$ + with $H_k$ appropriately chosen. + \end{itemize} + \end{itemize} + \end{definition} + \begin{proposition} + Let $f\in\mathcal{C}^1$ bounded from below with $\grad f$ $L$-Lipschitz continuous. Then, $\displaystyle\lim_{k\rightarrow\infty} \grad f(\vf{x}_k)=0$, provided that $0<\tau<\frac{2}{L}$. + \end{proposition} + \begin{proposition} + If $f$ is convex, for any $\vf{x},\vf{y}\in H$ we have: + $$ + f(\vf{y})\geq f(\vf{x})+\langle \grad f(\vf{x}),\vf{y}-\vf{x}\rangle + $$ + \end{proposition} + \begin{definition} + Let $A:H\to H$ be an operator. We say that $A$ is \emph{$L$-co-coercive} if $\forall x,y\in H$: + $$ + \langle Ax-Ay,x-y\rangle\geq L \norm{x-y}^2 + $$ + \end{definition} + \begin{definition} + Let $A:H\to H$ be an operator. We say that $A$ is \emph{firmly non-expansive} if $\forall x,y\in H$: + $$ + \norm{Ax-Ay}^2+\norm{(I-A)x-(I-A)y}^2\leq \norm{x-y}^2 + $$ + \end{definition} + \begin{lemma} + Let $A:H\to H$ be an operator. Then, $A$ is firmly non-expansive if and only if $A$ is $1$-co-coercive. + \end{lemma} + \begin{theorem}[Baillon-Haddad] + If $f$ is convex and $\grad f$ is $L$-Lipschitz continuous, then for any $\vf{x},\vf{y}\in H$ we have: + $$ + \langle \grad f(\vf{x})-\grad f(\vf{y}),\vf{x}-\vf{y}\rangle\geq \frac{1}{L} \norm{\grad f(\vf{x})-\grad f(\vf{y})}^2 + $$ + That is, $\grad f$ is $L^{-1}$-co-coercive. + \end{theorem} + \begin{lemma} + If $f$ is convex with $L$-Lipschitz continuous gradient, then the mapping $T_\tau=I-\tau \grad f$ is a 1-Lipschitz when $0\leq \tau\leq \frac{2}{L}$. + \end{lemma} + \begin{proposition} + Let $\vf{x}_*$ be a minimizer of $f$. We have that: + $$ + \frac{f(\vf{x}_k)-f(\vf{x}_*)}{\norm{\vf{x}_k-\vf{x}_*}}\leq \norm{\grad f(\vf{x}_k)} + $$ + Moreover if $\Delta_k:=f(\vf{x}_k)-f(\vf{x}_*)$, we have: + $$ + \Delta_{k+1}\leq \Delta_k-\frac{\kappa}{\norm{\vf{x}_0-\vf{x}_*}} {\Delta_k}^2 + $$ + with $\kappa=\tau\left( 1-\frac{\tau L}{2} \right)$. + \end{proposition} + \begin{lemma} + Let $(a_k)\geq 0$ be a sequence such that $a_{k+1}\leq a_k-c^{-1}{a_k}^2$ for some $c>0$. Then, $\forall k\geq 0$, $a_k\leq \frac{c}{k+1}$. + \end{lemma} + \begin{theorem} + The gradient descent with fixed step satisfies: + $$ + \Delta_k\leq \frac{\norm{\vf{x}_0-\vf{x}_*}^2}{\kappa (k+1)} + $$ + for $\kappa=\tau\left( 1-\frac{\tau L}{2} \right)$. + \end{theorem} + \begin{remark} + $\kappa$ is maximal for $\tau=\frac{1}{L}$, which gives: + $$ + \Delta_k\leq \frac{2L\norm{\vf{x}_0-\vf{x}_*}^2}{(k+1)^2} + $$ + \end{remark} + \begin{definition} + We say that $f$ is \emph{strongly convex} (or \emph{$\gamma$-convex}) if $\vf{D}^2f\geq \gamma \vf{I}$ with $\gamma>0$\footnote{This means that all the eigenvalues of $\vf{D}^2f$ are greater or equal than $\gamma$.}. + \end{definition} + \begin{proposition} + Let $f$ be strongly convex and $\vf{x}_*$ be a minimizer of $f$. Then: + $$ + \norm{\vf{x}_k-\vf{x}_*}\leq q^k \norm{\vf{x}_0-\vf{x}_*} + $$ + with $q=\frac{1-\gamma/L}{1+\gamma/L}$, and $\gamma/L<1$ can be thought as the inverse condition number of the problem. + \end{proposition} + \begin{theorem} + For any $n\geq 2$ and any $\vf{x}_0\in\RR^n$, $L>0$ and $k0$, there exists a $\gamma$-strongly convex $\mathcal{C}^1$ function $f$ with $L$-Lipschitz continuous gradient such that for any first-order method we have: + \begin{align*} + f(\vf{x}_k)-f(\vf{x}_*) & \geq \frac{\gamma}{2}q^{2k}\norm{\vf{x}_0-\vf{x}_*}^2 \\ + \norm{\vf{x}_k-\vf{x}_*} & \geq q^k \norm{\vf{x}_0-\vf{x}_*} + \end{align*} + where $\vf{x}_*$ is a minimizer of $f$ and $q=\frac{\sqrt{Q}-1}{\sqrt{Q}+1}$ with $Q=\frac{L}{\gamma}\geq 1$ is the condition number of the problem. + \end{theorem} + \subsubsection{Higher order methods} + \begin{definition}[Newton method] + We define the \emph{Newton method} as: + \begin{multline*} + \vf{x}_{k+1}=\argmin_{\vf{x}\in H} \bigg\{ f(\vf{x}_k)+\langle \grad f(\vf{x}_k),\vf{x}-\vf{x}_k\rangle+\\ + +\left.\frac{1}{2}\langle \vf{D}^2f(\vf{x}_k)(\vf{x}-\vf{x}_k),\vf{x}-\vf{x}_k\rangle \right\} + \end{multline*} + \end{definition} + \begin{theorem} + Let $f\in\mathcal{C}^2$ and $\vf{x}_*$ be a minimizer of $f$. Suppose $\vf{D}^2f$ is $M$-Lipschitz, $\vf{D}^2f\geq \gamma$, $q:=\frac{M}{2\gamma^2}\norm{\grad f({x}_0)}$ and assume ${x}_0$ is close enough to ${x}_*$ so that $q<1$. Then: + $$ + \norm{\vf{x}_k-\vf{x}_*}\leq \frac{2\gamma}{M}q^{2^k} + $$ + \end{theorem} + \begin{definition} + A \emph{multistep method} has the general form: + $$ + \vf{x}_{k+1}=\vf{x}_k-\alpha \grad f(\vf{x}_k)+\beta(\vf{x}_k-\vf{x}_{k-1}) + $$ + \end{definition} + \begin{theorem}[Heavy ball method] + Let $\vf{x}_*$ be a local minimizer of $f$ such that $\gamma \vf{I}\leq \vf{D}^2f(\vf{x}_*)\leq L\vf{I}$ and choose $\alpha$, $\beta$ with $0\leq \beta <1$ and $0<\alpha<\frac{2(1+\beta)}{L}$. There exists $q<1$ such that if $q<\tilde{q}<1$ and if $\vf{x}_0$, $\vf{x}_1$ are close enough to $\vf{x}_*$, we have: + $$ + \norm{\vf{x}_k-\vf{x}_*}\leq c(\tilde{q}){\tilde{q}}^k + $$ + Moreover, this is almost optimal: if $$ + \alpha=\frac{4}{{(\sqrt{L}+\sqrt{\gamma})}^2}\qquad \beta={\left( \frac{\sqrt{L}-\sqrt{\gamma}}{\sqrt{L}+\sqrt{\gamma}} \right)}^2 + $$ + then: + $$ + q=\frac{\sqrt{L}-\sqrt{\gamma}}{\sqrt{L}+\sqrt{\gamma}}=\frac{\sqrt{Q}-1}{\sqrt{Q}+1} + $$ + \end{theorem} + \begin{lemma} + Let $\vf{A}\in\mathcal{M}_n(\RR)$ with $\rho(\vf{A})\leq \rho$. Then, $\forall \tilde{\rho}>\rho$, there exists a norm $\norm{\cdot}_*$ in $\CC^n$ such that $\norm{\vf{A}}_*\leq \tilde{\rho}$. + \end{lemma} + \begin{definition}[Conjugate gradient] + We define the \emph{conjugate gradient method} as: + \begin{equation*} + \vf{x}_{k+1}=\vf{x}_k-\alpha_k \grad f(\vf{x}_k)+\beta_k(\vf{x}_k-\vf{x}_{k-1}) + \end{equation*} + where $\alpha_k$ and $\beta_k$ are the minimizers of: + $$ + \alpha_k,\beta_k=\argmin_{\alpha,\beta\in\RR} f(\vf{x}_k-\alpha \grad f(\vf{x}_k)+\beta(\vf{x}_k-\vf{x}_{k-1})) + $$ + \end{definition} + \begin{lemma} + The gradients $\vf{p}_k:=\grad f(\vf{x}_k)$ are pairwise orthogonal. + \end{lemma} + \begin{corollary} + For a quadratic function, the conjugate gradient is the \textit{best} first order method. + \end{corollary} + \begin{corollary} + A solution of the conjugate gradient method is found in $k=\rank \vf{A}$ iterations. + \end{corollary} + \begin{definition}[Nesterov's accelerated gradient method] + Let $\vf{x}_0=\vf{x}_{-1}$ be given. We define the \emph{Nesterov's method} as: + $$ + \begin{cases} + \vf{y}_k=\vf{x}_k+\frac{t_k-1}{t_{k+1}}(\vf{x}_k-\vf{x}_{k-1}) \\ + \vf{x}_{k+1}=\vf{y}_k-\tau \grad f(\vf{y}_k) + \end{cases} + $$ + where $\tau=L^{-1}$ and for instance $t_k=1+\frac{k}{2}$. Moreover, we have: + $$ + f(\vf{x}_k)-f(\vf{x}_*)\leq \frac{2L\norm{\vf{x}_0-\vf{x}_*}^2}{(k+1)^2} + $$ + \end{definition} + \subsubsection{Non-smooth problems} + \begin{definition} + We define the \emph{subgradient descent method} as; + $$ + \vf{x}_{k+1}=\vf{x}_k-h_k\frac{\grad f(\vf{x}_k)}{\norm{\grad f(\vf{x}_k)}} + $$ + where $h_k>0$ is a step size. + \end{definition} + \begin{proposition} + If $f$ is $M$-Lipschitz, then the subgradient descent method satisfies: + $$ + \min_{0\leq i\leq k} \{ \norm{\vf{x}_i-\vf{x}_*} \}\leq M\frac{\norm{\vf{x}_0-\vf{x}_*} + \sum_{i=0}^k h_i}{2\sum_{i=0}^k h_i} + $$ + and choosing $h_i=\frac{C}{\sqrt{k+1}}$ for $k$ iterations, we have: + $$ + \min_{0\leq i\leq k} \{ \norm{\vf{x}_i-\vf{x}_*} \}\leq \frac{2M\norm{\vf{x}_0-\vf{x}_*}}{C\sqrt{k+1}} + $$ + \end{proposition} + \begin{definition} + We define the \emph{implicit descent} as: + $$ + \vf{x}_{k+1}=\vf{x}_k-\tau \grad f(\vf{x}_{k+1}) + $$ + \end{definition} +\end{multicols} +\end{document} \ No newline at end of file diff --git a/index.html b/index.html index d9afac0..9c4225c 100644 --- a/index.html +++ b/index.html @@ -73,6 +73,7 @@

Mathematics

  • +
  • diff --git a/main_math.tex b/main_math.tex index c58d895..7a7d18f 100644 --- a/main_math.tex +++ b/main_math.tex @@ -110,6 +110,9 @@ \chapter{Fifth year} \subfile{Mathematics/5th/Advanced_topics_in_functional_analysis_and_PDEs/Advanced_topics_in_functional_analysis_and_PDEs.tex} \cleardoublepage +\subfile{Mathematics/5th/Continuous_optimization/Continuous_optimization.tex} +\cleardoublepage + \subfile{Mathematics/5th/Introduction_to_nonlinear_elliptic_PDEs/Introduction_to_nonlinear_elliptic_PDEs.tex} \cleardoublepage diff --git a/preamble_formulas.sty b/preamble_formulas.sty index 342821e..12bd0b6 100644 --- a/preamble_formulas.sty +++ b/preamble_formulas.sty @@ -98,6 +98,7 @@ {SC}{\sta} % stochastic calculus {INLP}{\phy} % Instabilities and nonlinear phenomena {MM}{\sta} % Montecarlo methods + {CO}{\ana} % Continuous optimization }{\col}% } \ExplSyntaxOff