From fd965c5da62ca887e9ba44208061a7ed6138e3fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor?= Date: Thu, 12 Oct 2023 19:14:46 +0200 Subject: [PATCH] updated montecarlo --- Mathematics/3rd/Probability/Probability.tex | 2 +- .../Montecarlo_methods/Montecarlo_methods.tex | 162 +++++++++++++++++- 2 files changed, 161 insertions(+), 3 deletions(-) diff --git a/Mathematics/3rd/Probability/Probability.tex b/Mathematics/3rd/Probability/Probability.tex index ce3e25e..7b3ccc7 100644 --- a/Mathematics/3rd/Probability/Probability.tex +++ b/Mathematics/3rd/Probability/Probability.tex @@ -786,7 +786,7 @@ \begin{proposition} Let $(\Omega,\mathcal{A},\Prob)$ be a probability space, $X$, $Y$ be random variables such that they have finite $2k$-th moment. Then, $XY$ has finite $k$-th moment. \end{proposition} - \begin{theorem}[Cauchy-Schwarz inequality] + \begin{theorem}[Cauchy-Schwarz inequality]\label{P:cauchy-schwarz} Let $(\Omega,\mathcal{A},\Prob)$ be a probability space and $X$, $Y$ be two random variables such that $\Exp(X^2)<\infty$. Then: $$\Exp(|XY|)\leq {\left(\Exp(X^2)\Exp(Y^2)\right)}^{1/2}$$ \end{theorem} \begin{definition}[Variance] diff --git a/Mathematics/5th/Montecarlo_methods/Montecarlo_methods.tex b/Mathematics/5th/Montecarlo_methods/Montecarlo_methods.tex index f564089..a263d18 100644 --- a/Mathematics/5th/Montecarlo_methods/Montecarlo_methods.tex +++ b/Mathematics/5th/Montecarlo_methods/Montecarlo_methods.tex @@ -109,8 +109,15 @@ $$ X:=\sqrt{-2\log(U)}\cos(2\pi V)\quad Y:=\sqrt{-2\log(U)}\sin(2\pi V) $$ - Then, $X$, $Y$ are \iid $N(0,1)$. + Then, $X$ and $Y$ are \iid $N(0,1)$. \end{proposition} + \begin{proof} + Let $\varphi:\RR^2\to\RR$ be bounded and measurable. Then: + \begin{multline*} + \Exp(\varphi(X,Y))=\\=\!\!\!\int_{{(0,1)}^2}\!\!\!\!\varphi\left( \sqrt{-2\log u} \cos(2\pi v), \sqrt{-2\log u} \sin(2\pi v)\right)\dd{u}\dd{v}=\\=\int_{\RR^2}\varphi(x,y) \frac{1}{2\pi}\exp{-\frac{x^2+y^2}{2}}\dd{x}\dd{y} + \end{multline*} + by the change of variable formula. Thus, $X$ and $Y$ are \iid $N(0,1)$. + \end{proof} \begin{proposition}[Polar method] Let $U$, $V$ be \iid $U(\DD)$, where $\DD\subset \RR^2$ is the open unit disk. Let $R^2=U^2+V^2$ and set: $$ @@ -119,7 +126,158 @@ Then, $X$, $Y$ are \iid $N(0,1)$. \end{proposition} \begin{proposition} - Let $\vf{X}\in N_d(0,\vf{I}_d)$, $\vf\mu\in\RR^d$ and $\vf{A}\in\mathcal{M}_d(\RR)$. Then, $\vf\mu+\vf{AX}\sim N_d(\vf\mu,\transpose{\vf{AA}})$. + Let $\vf{X}\in N_d(0,\vf{I}_d)$, $\vf\mu\in\RR^d$ and $\vf{A}\in\mathcal{M}_d(\RR)$. Then, $\vf\mu+\vf{AX}\sim N_d(\vf\mu,\vf{A}\transpose{\vf{A}})$. + \end{proposition} + \begin{remark} + To simulate $\vf{Y}\sim N_d(\vf\mu,\vf\Sigma)$, we proceed as follows: + \begin{enumerate} + \item Find $\vf{A}\in\mathcal{M}_d(\RR)$ such that $\vf\Sigma=\vf{A}\transpose{\vf{A}}$ (e.g.\ by Cholesky decomposition). + \item Simulate $\vf{X}\sim N_d(0,\vf{I}_d)$. + \item Set $\vf{Y}=\vf\mu+\vf{AX}$. + \end{enumerate} + \end{remark} + \subsection{Variance reduction techniques} + \subsubsection{Antithetic control} + \begin{definition} + Let $Y=g(X)$ be a random variable with $X\sim N(0,\sigma^2)$. The \emph{antithetic method} consists in using the estimator: + $$ + \overline{Y}_n^{\text{A}}:=\frac{1}{n} \sum_{i=1}^n \frac{g(X_i)+g(-X_i)}{2} + $$ + where ${(X_i)}_{1\leq i\leq n}$ are \iid $N(0,\sigma^2)$. + \end{definition} + \begin{lemma} + The antithetic estimator $\overline{Y}_n^{\text{A}}$ is an unbiased and consistent estimator of $\Exp(Y)$. Furthermore: + $$ + \sqrt{n}\left(\overline{Y}_n^{\text{A}}-\Exp(Y)\right)\overset{\text{d}}{\longrightarrow}N\left(0,\Var\left(\frac{g(X)+g(-X)}{2}\right)\right) + $$ + \end{lemma} + \begin{remark} + In terms of computational cost, $\overline{Y}_n^{\text{A}}$ is more expensive than $\overline{Y}_n$ but cheaper than $\overline{Y}_{2n}$. + \end{remark} + \begin{proposition} + We have that $\Var(\overline{Y}_n^{\text{A}})\leq \Var(\overline{Y}_n)$. Moreover, if + $$ + \cov(g(X),g(-X))\leq 0 + $$ + then $\Var(\overline{Y}_n^{\text{A}})\leq \Var(\overline{Y}_{2n})$. + \end{proposition} + \begin{proof} + \begin{align*} + \Var(\overline{Y}_n^{\text{A}}) & =\frac{1}{2n}\left(\Var(g(X))+\cov(g(X),g(-X))\right) \\ + & \leq \Var(\overline{Y}_n) + \end{align*} + And clearly if $\cov(g(X),g(-X))\leq 0$, then $\Var(\overline{Y}_n^{\text{A}})\leq \Var(\overline{Y}_{2n})$. + \end{proof} + \begin{proposition} + If $g$ is monotone, then: + $$ + \cov(g(X),g(-X))\leq 0 + $$ + \end{proposition} + \begin{proof} + Let $X_1$, $X_2$ be \iid copies of $X$. Then, since $g$ is monotone: + $$ + (g(X_1)-g(X_2))(g(-X_1)-g(-X_2))\leq 0 + $$ + Now taking expectations: + \begin{multline*} + 0\geq\Exp((g(X_1)-g(X_2))(g(-X_1)-g(-X_2)))=\\=2 \cov(g(X),g(-X)) + \end{multline*} + \end{proof} + \subsubsection{Control variate} + \begin{definition} + The principle of the \emph{control variate} is to find a real-valued random variable $X$ such that $\Exp(X)$ is known, and a constant $b\in\RR$ such that: + $$ + \Var(Y+b(X-\Exp(X)))\ll \Var(Y) + $$ + This suggests the following estimator: + $$ + \overline{Y}_n(b):=\frac{1}{n}\sum_{i=1}^n [Y_i+b(X_i-\Exp(X))] + $$ + where ${(Y_i)}_{1\leq i\leq n}$ are \iid copies of $Y$ and ${(X_i)}_{1\leq i\leq n}$ are \iid copies of $X$. + \end{definition} + \begin{lemma} + The control variate estimator $\overline{Y}_n(b)$ is unbiased and consistent. Furthermore: + $$ + \sqrt{n}\left(\overline{Y}_n(b)-\Exp(Y)\right)\overset{\text{d}}{\longrightarrow}N\left(0,\Var(Y(b))\right) + $$ + where $Y(b):=Y+b(X-\Exp(X))$. + \end{lemma} + \begin{remark} + If $b=0$, the control variate estimator $\overline{Y}_n(b)$ coincides with the classical estimator $\overline{Y}_n$. Otherwise, the computational cost of $\overline{Y}_n(b)$ is higher than $\overline{Y}_n$, but it does not depend on the choice of $b\neq 0$. + \end{remark} + \begin{proposition} + The minimum of $\Var(Y(b))$ is attained for $$\hat{b}= \frac{\cov(Y,X)}{\Var(X)}$$ and in that case, $\Var(Y(\hat{b}))=\Var(Y)(1-{\rho_{XY}}^2)$, where $\rho_{XY}$ is the correlation between $X$ and $Y$. + \end{proposition} + \begin{remark} + Usually, $\hat{b}$ is unknown, but we can use an estimator of it, such as: + $$ + \hat{b}_n:=\frac{\sum_{i=1}^n (Y_i-\overline{Y}_n)(X_i-\overline{X}_n)}{\sum_{i=1}^n {(X_i-\overline{X}_n)}^2} + $$ + \end{remark} + \begin{definition} + Let $\vf{X}$ be a random vector such that $\Exp(\vf{X})$ is known, and $\vf{b}\in \RR^d$. We define the \emph{multiple control variate estimator} as: + $$ + \overline{Y}_n(\vf{b}):=\frac{1}{n}\sum_{i=1}^n [Y_i+\transpose{\vf{b}}(\vf{X}_i-\Exp(\vf{X}))] + $$ + where ${(Y_i)}_{1\leq i\leq n}$ are \iid copies of $Y$ and ${(\vf{X}_i)}_{1\leq i\leq n}$ are \iid copies of $\vf{X}$. + We also define $Y(\vf{b}):= Y+\transpose{\vf{b}}(\vf{X}-\Exp(\vf{X}))$. + \end{definition} + \begin{lemma} + The multiple control variate estimator $\overline{Y}_n(\vf{b})$ is unbiased and consistent. Furthermore: + $$ + \sqrt{n}\left(\overline{Y}_n(\vf{b})-\Exp(Y)\right)\overset{\text{d}}{\longrightarrow}N\left(0,\Var(Y(\vf{b}))\right) + $$ + \end{lemma} + \begin{proposition} + The minimum of $\Var(Y(\vf{b}))$ is attained for $$\hat{\vf{b}}= + -\Var(\vf{X})^{-1}\cov(\vf{X},Y) + $$ + and in that case, $\Var(Y(\hat{\vf{b}}))=\Var(Y)(1-R^2)$, where: + $$ + R^2:=\frac{\transpose{\cov(\vf{X},Y)}\Var(\vf{X})^{-1}\cov(\vf{X},Y)}{\Var(Y)} + $$ \end{proposition} + \subsubsection{Importance sampling} + \begin{definition} + Let $(\Omega, \mathcal{F}, \Prob)$ be a probability space. We say that a probability measure $\QQ$ is \emph{equivalent} to $\Prob$ if: + $$ + \{A\in \mathcal{F} : \Prob(A)=0\}=\{A\in \mathcal{F} : \QQ(A)=0\} + $$ + \end{definition} + \begin{lemma} + Let $\QQ$ be a probability measure equivalent to $\Prob$. Then, there exists a random variable $L>0$ such that $\forall A\in \mathcal{F}$, we have $\QQ(A)=\Exp_\Prob(L\indi{A})$. Furthermore, for all $X$ bounded: + $$ + \Exp_\QQ(X)=\Exp_\Prob(L X)\qquad \Exp_\Prob(X)=\Exp_\QQ\left(\frac{X}{L}\right) + $$ + Conversely, if we have a random variable $L>0$ such that $\Exp_\Prob(L)=1$, then + $$ + \function{\QQ}{\mathcal{F}}{[0,1]}{A}{\Exp_\Prob(L\indi{A})} + $$ + is a probability measure equivalent to $\Prob$. + \end{lemma} + \begin{remark} + The principle of the importance sampling method is to change the probability measure in order to give more weight to important outcomes. + \end{remark} + \begin{definition} + Let $(\QQ,L)$ be such that it defines a probability measure equivalent to $\Prob$. The \emph{importance sampling estimator} is defined as: + $$ + \overline{Y}_n^{\QQ}:=\frac{1}{n}\sum_{i=1}^n {L_i}^{-1}Y_i + $$ + where $(L_i,Y_i)$ are \iid copies of $(L,Y)$ (under $\QQ$). + \end{definition} + \begin{lemma} + The importance sampling estimator $\overline{Y}_n^{\QQ}$ is unbiased and consistent. Furthermore: + $$ + \sqrt{n}\left(\overline{Y}_n^{\QQ}-\Exp_\Prob(Y)\right)\overset{\text{d}}{\longrightarrow}N\left(0,\Var_{\QQ}(L^{-1}Y)\right) + $$ + \end{lemma} + \begin{remark} + In terms of computational cost, the importance sampling estimator $\overline{Y}_n^{\QQ}$ is more expensive than the classical estimator $\overline{Y}_n$. In terms of precision, the best estimator is the one with the smallest variance so we have to compare $\Var(Y)$ and $\Var_{\QQ}(L^{-1}Y)$. It holds: + $$ + \Var_{\QQ}(L^{-1}Y)=\Exp_\Prob\left(L^{-1}Y^2\right)-{\Exp_\Prob(Y)}^2 + $$ + This quantity can be larger or smaller than $\Var(Y )$ depending on the choice of $L$ and thus the success of importance sampling relies on the choice of an effective change of probability measure. + \end{remark} \end{multicols} \end{document} \ No newline at end of file