diff options
author | Urbain Vaes <urbain@vaes.uk> | 2022-10-12 14:05:03 +0200 |
---|---|---|
committer | Urbain Vaes <urbain@vaes.uk> | 2022-10-12 14:05:03 +0200 |
commit | d416ad78744d6e7c2a7a07fefda797a216efd25b (patch) | |
tree | 90f7476f3bc3426d2e245a186c8f66aa29a6cd54 /main.tex | |
parent | cf2a2c6d28328f087de67efdba22230c2beaff72 (diff) |
Minor change
Diffstat (limited to 'main.tex')
-rwxr-xr-x | main.tex | 227 |
1 files changed, 206 insertions, 21 deletions
@@ -110,7 +110,12 @@ \section{Mobility estimation for Langevin dynamics using control variates} \begin{frame} - {Collaborators and reference} + % {Part I: Mobility estimation for Langevin dynamics using control variates} + \begin{center} + \Large + \color{blue} + Part I: Mobility estimation for Langevin dynamics + \end{center} \begin{figure} \centering \begin{minipage}[t]{.2\linewidth} @@ -161,8 +166,8 @@ \begin{frame}<beamer> % \frametitle{Outline for section \thesection} \frametitle{Outline} - \tableofcontents[currentsubsection,sectionstyle=show/shaded,subsectionstyle=show/shaded/hide] - % \tableofcontents[currentsubsection] + % \tableofcontents[currentsubsection,sectionstyle=show/shaded,subsectionstyle=show/shaded/hide] + \tableofcontents[currentsubsection] \end{frame} } @@ -336,13 +341,13 @@ \begin{frame} {Mathematical expression for the effective diffusion (dimension 1)} \vspace{.2cm} - \begin{exampleblock}{Expression of $D$ in terms of the solution to a Poisson equation} + \begin{block}{Expression of $D$ in terms of the solution to a Poisson equation} The effective diffusion coefficient is given by where $D = \emph{ \ip{\phi}{p}}$ and $\phi$ is the solution to \[ \emph{- \mathcal L \phi = p}, \qquad \phi \in L^2_0(\mu) := \bigl\{ u \in L^2(\mu): \ip{u}{1} = 0 \bigr\}. \] - \end{exampleblock} + \end{block} \textbf{Key idea of the proof:} Apply It\^o's formula to $\phi$ \begin{align*} \d \phi(q_s, p_s) @@ -629,13 +634,13 @@ \qquad \leadsto \text{\emph{independent} of $\gamma$}. \] - \begin{exampleblock}{Scaling of the mean square error when using $J$ realizations} + \begin{block}{Scaling of the mean square error when using $J$ realizations} Assuming an asymptotic scaling as $\gamma^{-\sigma}$ of $D_{\vect e}$, we have \[ \forall \gamma \in (0, 1), \qquad \frac{\rm MSE}{D_{\vect e}^2} \leq \frac{C}{\gamma^{4-2 \sigma} T^2} + \frac{2}{J} \] - \end{exampleblock} + \end{block} \end{frame} % \subsection{Variance reduction using control variates} @@ -786,13 +791,38 @@ \section{Optimal importance sampling for overdamped Langevin dynamics} +% \begin{frame} +% \begin{center} +% \huge Part II: Optimal importance sampling for overdamped Langevin dynamics +% \end{center} +% \end{frame} + \begin{frame} - {Collaborators} + \begin{center} + \Large + \color{blue} + Part II: importance sampling for overdamped Langevin dynamics + \end{center} + \begin{figure} \centering \begin{minipage}[t]{.2\linewidth} \centering \raisebox{\dimexpr-\height+\ht\strutbox}{% + \includegraphics[height=\linewidth]{figures/collaborators/martin.jpg} + } + \end{minipage}\hspace{.03\linewidth}% + \begin{minipage}[t]{.21\linewidth} + Martin Chak + \vspace{0.2cm} + + \includegraphics[height=1.2cm,width=\linewidth,keepaspectratio]{figures/collaborators/sorbonne.png} + \flushleft \scriptsize + Sorbonne Université + \end{minipage}\hspace{.1\linewidth}%% + \begin{minipage}[t]{.2\linewidth} + \centering + \raisebox{\dimexpr-\height+\ht\strutbox}{% \includegraphics[height=\linewidth]{figures/collaborators/tony.jpg} } \end{minipage}\hspace{.03\linewidth}% @@ -804,6 +834,8 @@ \flushleft \scriptsize CERMICS \& Inria \end{minipage}\hspace{.1\linewidth}%% + + \vspace{.5cm} \begin{minipage}[t]{.2\linewidth} \centering \raisebox{\dimexpr-\height+\ht\strutbox}{% @@ -819,11 +851,6 @@ CERMICS \& Inria \end{minipage} \end{figure} - - \vspace{.7cm} - \textbf{Outline:} - \vspace{.2cm} - \tableofcontents \end{frame} \subsection{Background and problem statement} @@ -831,7 +858,7 @@ \begin{frame} {The sampling problem} - \begin{exampleblock} + \begin{block} {Objective of the sampling problem} Calculate averages with respect to \[ @@ -839,7 +866,7 @@ \qquad Z = \int_{\torus^d} \e^{-V}. \] \vspace{-.4cm} - \end{exampleblock} + \end{block} \vspace{-.2cm} \textbf{Often in applications}: @@ -851,7 +878,7 @@ \textbf{Markov chain Monte Carlo (MCMC) approach}: \[ - \mu(f) \approx \mu^T (f) := \frac{1}{T} \int_{0}^{T} f(Y_t) \, \d t + I := \mu(f) \approx \mu^T (f) := \frac{1}{T} \int_{0}^{T} f(Y_t) \, \d t \] for a Markov process $(Y_t)_{t\geq 0}$ that is \emph{ergodic} with respect to~$\mu$. @@ -869,7 +896,7 @@ \mu_{U} = \frac{\e^{-V - U}}{Z_U}, \qquad Z_U = \int_{\torus^d} \e^{-V-U}, \] - then $\mu(f)$ may be approximated by + then $I = \mu(f)$ may be approximated by \begin{equation*} \label{eq:estimator} \mu^T_U(f) := @@ -878,24 +905,182 @@ {\displaystyle \frac{1}{T} \int_0^T(\e^U)(X_t) \, \d t}. \end{equation*} + \textbf{Markov process}: \emph{overdamped Langevin} dynamics + \[ + \d X_t = -\nabla (V+U)(X_t) \, \d t + \sqrt{2} \, \d W_t, + \qquad X_0 = x_0. + \] + \textbf{Asymptotic variance}: Under appropriate conditions, it holds that \[ - \sqrt{T} \bigl( \mu^T_U(f) - \mu(f)\bigr) + \sqrt{T} \bigl( \mu^T_U(f) - I \bigr) \xrightarrow[T \to \infty]{\rm Law} \mathcal N\bigl(0, \sigma^2_f[U]\bigr). \] - \begin{exampleblock} + \begin{block} {Objective} Find $U$ such that the asymptotic variance $\sigma^2_f[U]$ is minimized. - \end{exampleblock} + \end{block} +\end{frame} + +\begin{frame} + {Background: importance sampling in the i.i.d.\ setting (1/2)} + Given i.i.d.\ samples $\{X^1, X^2, \dotsc\}$ from $\mu_U$, + we define + \[ + \mu_U^N(f) := + \displaystyle \frac + {\sum_{n=1}^{N} (f \e^U)(X^{n})} + {\sum_{n=1}^{N} (\e^U)(X^{n})} + = I + \displaystyle \frac + {\frac{1}{N} \sum_{n=1}^{N} \left((f-I) \e^U\right)(X^{n})} + {\frac{1}{N} \sum_{n=1}^{N} (\e^U)(X^{n})}, + \] + + \textbf{Numerator:} by the \emph{central limit theorem}, + \[ + \frac{1}{\sqrt{N}} \sum_{n=1}^{N} \left((f-I) \e^U\right) (X^{n}) + \xrightarrow[N \to \infty]{\rm Law} \mathcal N\left(0, \int_{\torus^d} \abs*{(f-I) \e^U}^2 \, \d \mu_{U}\right) + \] + + \textbf{Denominator:} by the strong law of large numbers, + \[ + \frac{1}{N} \sum_{n=1}^{N} \left(\e^U\right)\left(X^{n}\right) \xrightarrow[N \to \infty]{\rm a.s.} + \frac{Z}{Z_U}. + \] + + \textbf{Therefore}, by Slutsky's theorem, + \[ + \sqrt{N} \bigl( \mu^N_U(f) - I\bigr) + \xrightarrow[T \to \infty]{\rm Law} \mathcal N\bigl(0, s^2_f[U]\bigr), + \qquad + s^2_f[U] := \frac{2 Z_U^2}{Z^2} \int_{\torus^n} \bigl\lvert (f-I) \e^U \bigr\rvert^2 \, \d \mu_{U}. + \] +\end{frame} + +\begin{frame} + {Background: importance sampling in the i.i.d.\ setting (2/2)} + By the Cauchy--Schwarz inequality, + it holds that + \[ + s^2_f[U] + \geq \frac{2Z_U^2}{Z^2} \left( \int_{\torus^d} \abs{f-I} \e^U \, \d \mu_{U} \right)^2, + \] + with equality when $\abs{f-I} \e^U$ is constant. + + \begin{block} + {Optimal importance distribution} + The \emph{optimal $\mu_U$} in the i.i.d.\ setting is + \[ + \mu_{U} \propto \abs{f-I} \e^{-V} + \] + \end{block} + + \textbf{Objectives}: + \begin{itemize} + \item Is there a counterpart of this formula in the \emph{MCMC setting}? + \item If not, can we approximate the optimal distribution numerically? + \end{itemize} +\end{frame} + +\subsection{Minimizing the asymptotic variance for one observable} +\begin{frame} + {Formula for the asymptotic variance} + Let $\mathcal L_U$ denote the generator of the Markov semigroup associated to the modified potential; + \[ + \mathcal L_U = - \nabla (V + U) \cdot \nabla + \Delta. + \] + \begin{block} + {Limit theorem} + Under appropriate conditions, + it holds that + \[ + \sqrt{T} \bigl( \mu^T_U(f) - I\bigr) + \xrightarrow[T \to \infty]{\rm Law} \mathcal N\bigl(0, \sigma^2_f[U]\bigr). + \] + The \emph{asymptotic variance} is given by + \[ + \sigma^2_f[U] + = \frac{2Z_U^2}{Z^2}\int_{\torus^d} \phi_U (f-I) \, \e^U \, \d\mu_{U}, + \] + where $\phi_U$ is the unique solution in~$H^1(\mu_{U}) \cap L^2_0(\mu_{U})$ to + \[ + -\mathcal L_U \phi_{U} = (f- I) \e^U. + \] + \end{block} + \textbf{Main ideas of the proof:} central limit theorem for martingales, Slutsky's theorem. \end{frame} \begin{frame} - {Background} + {Explicit optimal $U$ in dimension 1} + In \emph{dimension one}, it holds that + \begin{equation} + \label{eq:lower_bound_asymvar} + \sigma^2_f[U] \geq \frac{2}{Z^2} \inf_{A \in \real} \bigg(\int_{\torus} \bigl\lvert F(x) + A \bigr\rvert \d x \bigg)^2. + \end{equation} + where + \[ + F(x) := \int_0^x \bigl( f(\xi)-I \bigr) \e^{-V(\xi)}\d \xi. + \] + This inequality~\eqref{eq:lower_bound_asymvar} is an equality for + \[ + U(x) = U_*(x) = - V(x) -\ln\abs*{F(x) + A_*}, + \] + where $A_*$ is the constant achieving the infimum in~\eqref{eq:lower_bound_asymvar}. + + \begin{itemize} + \item The potential $U_*$ is generally \alert{singular}: impractical for numerics\dots + \item The lower bound in~\eqref{eq:lower_bound_asymvar} can be approached by a smooth~$U$. + \end{itemize} \end{frame} +\begin{frame} + {Example (1/2)} + Assume that $V = 0$ and $f(x) = \cos(x)$. + \begin{figure}[ht] + \centering + \includegraphics[width=0.8\linewidth]{figures/driftopt/1d_optimal_cosine.pdf} + \label{fig:optimal_perturbation_potential} + \end{figure} + $\rightsquigarrow$ The optimal potential ``divides'' the domain into two parts. +\end{frame} + +\begin{frame} + {Example (2/2)} + Assume that $V(x) = 5\cos(2 x)$ and~$f(x) = \sin(x)$. + The target measure is \alert{multimodal}. + \begin{figure}[ht] + \centering + \includegraphics[width=0.8\linewidth]{figures/driftopt/1d_optimal_metastable.pdf} + \label{fig:optimal_perturbation_potential_1d_metastable} + \end{figure} + \emph{Variance reduction} by a factor $> 1000!$ +\end{frame} + +\begin{frame} + {Finding the optimal $U$ in the multidimensional setting} + In \emph{dimension one}, it holds that + \begin{equation} + \label{eq:lower_bound_asymvar} + \sigma^2_f[U] \geq \frac{2}{Z^2} \inf_{A \in \real} \bigg(\int_{\torus} \bigl\lvert F(x) + A \bigr\rvert \d x \bigg)^2. + \end{equation} + where + \[ + F(x) := \int_0^x \bigl( f(\xi)-I \bigr) \e^{-V(\xi)}\d \xi. + \] + This inequality~\eqref{eq:lower_bound_asymvar} is an equality for + \[ + U(x) = U_*(x) = - V(x) -\ln\abs*{F(x) + A_*}, + \] + where $A_*$ is the constant achieving the infimum in~\eqref{eq:lower_bound_asymvar}. + + \begin{itemize} + \item The potential $U_*$ is generally \alert{singular}: impractical for numerics\dots + \item The lower bound in~\eqref{eq:lower_bound_asymvar} can be approached by a smooth~$U$. + \end{itemize} +\end{frame} \appendix |