summaryrefslogtreecommitdiff
path: root/main.tex
diff options
context:
space:
mode:
authorUrbain Vaes <urbain@vaes.uk>2022-10-12 14:05:03 +0200
committerUrbain Vaes <urbain@vaes.uk>2022-10-12 14:05:03 +0200
commitd416ad78744d6e7c2a7a07fefda797a216efd25b (patch)
tree90f7476f3bc3426d2e245a186c8f66aa29a6cd54 /main.tex
parentcf2a2c6d28328f087de67efdba22230c2beaff72 (diff)
Minor change
Diffstat (limited to 'main.tex')
-rwxr-xr-xmain.tex227
1 files changed, 206 insertions, 21 deletions
diff --git a/main.tex b/main.tex
index fb194c7..e1fbc9e 100755
--- a/main.tex
+++ b/main.tex
@@ -110,7 +110,12 @@
\section{Mobility estimation for Langevin dynamics using control variates}
\begin{frame}
- {Collaborators and reference}
+ % {Part I: Mobility estimation for Langevin dynamics using control variates}
+ \begin{center}
+ \Large
+ \color{blue}
+ Part I: Mobility estimation for Langevin dynamics
+ \end{center}
\begin{figure}
\centering
\begin{minipage}[t]{.2\linewidth}
@@ -161,8 +166,8 @@
\begin{frame}<beamer>
% \frametitle{Outline for section \thesection}
\frametitle{Outline}
- \tableofcontents[currentsubsection,sectionstyle=show/shaded,subsectionstyle=show/shaded/hide]
- % \tableofcontents[currentsubsection]
+ % \tableofcontents[currentsubsection,sectionstyle=show/shaded,subsectionstyle=show/shaded/hide]
+ \tableofcontents[currentsubsection]
\end{frame}
}
@@ -336,13 +341,13 @@
\begin{frame}
{Mathematical expression for the effective diffusion (dimension 1)}
\vspace{.2cm}
- \begin{exampleblock}{Expression of $D$ in terms of the solution to a Poisson equation}
+ \begin{block}{Expression of $D$ in terms of the solution to a Poisson equation}
The effective diffusion coefficient is given by where $D = \emph{ \ip{\phi}{p}}$ and $\phi$ is the solution to
\[
\emph{- \mathcal L \phi = p},
\qquad \phi \in L^2_0(\mu) := \bigl\{ u \in L^2(\mu): \ip{u}{1} = 0 \bigr\}.
\]
- \end{exampleblock}
+ \end{block}
\textbf{Key idea of the proof:} Apply It\^o's formula to $\phi$
\begin{align*}
\d \phi(q_s, p_s)
@@ -629,13 +634,13 @@
\qquad \leadsto \text{\emph{independent} of $\gamma$}.
\]
- \begin{exampleblock}{Scaling of the mean square error when using $J$ realizations}
+ \begin{block}{Scaling of the mean square error when using $J$ realizations}
Assuming an asymptotic scaling as $\gamma^{-\sigma}$ of $D_{\vect e}$, we have
\[
\forall \gamma \in (0, 1), \qquad
\frac{\rm MSE}{D_{\vect e}^2} \leq \frac{C}{\gamma^{4-2 \sigma} T^2} + \frac{2}{J}
\]
- \end{exampleblock}
+ \end{block}
\end{frame}
% \subsection{Variance reduction using control variates}
@@ -786,13 +791,38 @@
\section{Optimal importance sampling for overdamped Langevin dynamics}
+% \begin{frame}
+% \begin{center}
+% \huge Part II: Optimal importance sampling for overdamped Langevin dynamics
+% \end{center}
+% \end{frame}
+
\begin{frame}
- {Collaborators}
+ \begin{center}
+ \Large
+ \color{blue}
+ Part II: importance sampling for overdamped Langevin dynamics
+ \end{center}
+
\begin{figure}
\centering
\begin{minipage}[t]{.2\linewidth}
\centering
\raisebox{\dimexpr-\height+\ht\strutbox}{%
+ \includegraphics[height=\linewidth]{figures/collaborators/martin.jpg}
+ }
+ \end{minipage}\hspace{.03\linewidth}%
+ \begin{minipage}[t]{.21\linewidth}
+ Martin Chak
+ \vspace{0.2cm}
+
+ \includegraphics[height=1.2cm,width=\linewidth,keepaspectratio]{figures/collaborators/sorbonne.png}
+ \flushleft \scriptsize
+ Sorbonne Université
+ \end{minipage}\hspace{.1\linewidth}%%
+ \begin{minipage}[t]{.2\linewidth}
+ \centering
+ \raisebox{\dimexpr-\height+\ht\strutbox}{%
\includegraphics[height=\linewidth]{figures/collaborators/tony.jpg}
}
\end{minipage}\hspace{.03\linewidth}%
@@ -804,6 +834,8 @@
\flushleft \scriptsize
CERMICS \& Inria
\end{minipage}\hspace{.1\linewidth}%%
+
+ \vspace{.5cm}
\begin{minipage}[t]{.2\linewidth}
\centering
\raisebox{\dimexpr-\height+\ht\strutbox}{%
@@ -819,11 +851,6 @@
CERMICS \& Inria
\end{minipage}
\end{figure}
-
- \vspace{.7cm}
- \textbf{Outline:}
- \vspace{.2cm}
- \tableofcontents
\end{frame}
\subsection{Background and problem statement}
@@ -831,7 +858,7 @@
\begin{frame}
{The sampling problem}
- \begin{exampleblock}
+ \begin{block}
{Objective of the sampling problem}
Calculate averages with respect to
\[
@@ -839,7 +866,7 @@
\qquad Z = \int_{\torus^d} \e^{-V}.
\]
\vspace{-.4cm}
- \end{exampleblock}
+ \end{block}
\vspace{-.2cm}
\textbf{Often in applications}:
@@ -851,7 +878,7 @@
\textbf{Markov chain Monte Carlo (MCMC) approach}:
\[
- \mu(f) \approx \mu^T (f) := \frac{1}{T} \int_{0}^{T} f(Y_t) \, \d t
+ I := \mu(f) \approx \mu^T (f) := \frac{1}{T} \int_{0}^{T} f(Y_t) \, \d t
\]
for a Markov process $(Y_t)_{t\geq 0}$ that is \emph{ergodic} with respect to~$\mu$.
@@ -869,7 +896,7 @@
\mu_{U} = \frac{\e^{-V - U}}{Z_U},
\qquad Z_U = \int_{\torus^d} \e^{-V-U},
\]
- then $\mu(f)$ may be approximated by
+ then $I = \mu(f)$ may be approximated by
\begin{equation*}
\label{eq:estimator}
\mu^T_U(f) :=
@@ -878,24 +905,182 @@
{\displaystyle \frac{1}{T} \int_0^T(\e^U)(X_t) \, \d t}.
\end{equation*}
+ \textbf{Markov process}: \emph{overdamped Langevin} dynamics
+ \[
+ \d X_t = -\nabla (V+U)(X_t) \, \d t + \sqrt{2} \, \d W_t,
+ \qquad X_0 = x_0.
+ \]
+
\textbf{Asymptotic variance}:
Under appropriate conditions,
it holds that
\[
- \sqrt{T} \bigl( \mu^T_U(f) - \mu(f)\bigr)
+ \sqrt{T} \bigl( \mu^T_U(f) - I \bigr)
\xrightarrow[T \to \infty]{\rm Law} \mathcal N\bigl(0, \sigma^2_f[U]\bigr).
\]
- \begin{exampleblock}
+ \begin{block}
{Objective}
Find $U$ such that the asymptotic variance $\sigma^2_f[U]$ is minimized.
- \end{exampleblock}
+ \end{block}
+\end{frame}
+
+\begin{frame}
+ {Background: importance sampling in the i.i.d.\ setting (1/2)}
+ Given i.i.d.\ samples $\{X^1, X^2, \dotsc\}$ from $\mu_U$,
+ we define
+ \[
+ \mu_U^N(f) :=
+ \displaystyle \frac
+ {\sum_{n=1}^{N} (f \e^U)(X^{n})}
+ {\sum_{n=1}^{N} (\e^U)(X^{n})}
+ = I + \displaystyle \frac
+ {\frac{1}{N} \sum_{n=1}^{N} \left((f-I) \e^U\right)(X^{n})}
+ {\frac{1}{N} \sum_{n=1}^{N} (\e^U)(X^{n})},
+ \]
+
+ \textbf{Numerator:} by the \emph{central limit theorem},
+ \[
+ \frac{1}{\sqrt{N}} \sum_{n=1}^{N} \left((f-I) \e^U\right) (X^{n})
+ \xrightarrow[N \to \infty]{\rm Law} \mathcal N\left(0, \int_{\torus^d} \abs*{(f-I) \e^U}^2 \, \d \mu_{U}\right)
+ \]
+
+ \textbf{Denominator:} by the strong law of large numbers,
+ \[
+ \frac{1}{N} \sum_{n=1}^{N} \left(\e^U\right)\left(X^{n}\right) \xrightarrow[N \to \infty]{\rm a.s.}
+ \frac{Z}{Z_U}.
+ \]
+
+ \textbf{Therefore}, by Slutsky's theorem,
+ \[
+ \sqrt{N} \bigl( \mu^N_U(f) - I\bigr)
+ \xrightarrow[T \to \infty]{\rm Law} \mathcal N\bigl(0, s^2_f[U]\bigr),
+ \qquad
+ s^2_f[U] := \frac{2 Z_U^2}{Z^2} \int_{\torus^n} \bigl\lvert (f-I) \e^U \bigr\rvert^2 \, \d \mu_{U}.
+ \]
+\end{frame}
+
+\begin{frame}
+ {Background: importance sampling in the i.i.d.\ setting (2/2)}
+ By the Cauchy--Schwarz inequality,
+ it holds that
+ \[
+ s^2_f[U]
+ \geq \frac{2Z_U^2}{Z^2} \left( \int_{\torus^d} \abs{f-I} \e^U \, \d \mu_{U} \right)^2,
+ \]
+ with equality when $\abs{f-I} \e^U$ is constant.
+
+ \begin{block}
+ {Optimal importance distribution}
+ The \emph{optimal $\mu_U$} in the i.i.d.\ setting is
+ \[
+ \mu_{U} \propto \abs{f-I} \e^{-V}
+ \]
+ \end{block}
+
+ \textbf{Objectives}:
+ \begin{itemize}
+ \item Is there a counterpart of this formula in the \emph{MCMC setting}?
+ \item If not, can we approximate the optimal distribution numerically?
+ \end{itemize}
+\end{frame}
+
+\subsection{Minimizing the asymptotic variance for one observable}
+\begin{frame}
+ {Formula for the asymptotic variance}
+ Let $\mathcal L_U$ denote the generator of the Markov semigroup associated to the modified potential;
+ \[
+ \mathcal L_U = - \nabla (V + U) \cdot \nabla + \Delta.
+ \]
+ \begin{block}
+ {Limit theorem}
+ Under appropriate conditions,
+ it holds that
+ \[
+ \sqrt{T} \bigl( \mu^T_U(f) - I\bigr)
+ \xrightarrow[T \to \infty]{\rm Law} \mathcal N\bigl(0, \sigma^2_f[U]\bigr).
+ \]
+ The \emph{asymptotic variance} is given by
+ \[
+ \sigma^2_f[U]
+ = \frac{2Z_U^2}{Z^2}\int_{\torus^d} \phi_U (f-I) \, \e^U \, \d\mu_{U},
+ \]
+ where $\phi_U$ is the unique solution in~$H^1(\mu_{U}) \cap L^2_0(\mu_{U})$ to
+ \[
+ -\mathcal L_U \phi_{U} = (f- I) \e^U.
+ \]
+ \end{block}
+ \textbf{Main ideas of the proof:} central limit theorem for martingales, Slutsky's theorem.
\end{frame}
\begin{frame}
- {Background}
+ {Explicit optimal $U$ in dimension 1}
+ In \emph{dimension one}, it holds that
+ \begin{equation}
+ \label{eq:lower_bound_asymvar}
+ \sigma^2_f[U] \geq \frac{2}{Z^2} \inf_{A \in \real} \bigg(\int_{\torus} \bigl\lvert F(x) + A \bigr\rvert \d x \bigg)^2.
+ \end{equation}
+ where
+ \[
+ F(x) := \int_0^x \bigl( f(\xi)-I \bigr) \e^{-V(\xi)}\d \xi.
+ \]
+ This inequality~\eqref{eq:lower_bound_asymvar} is an equality for
+ \[
+ U(x) = U_*(x) = - V(x) -\ln\abs*{F(x) + A_*},
+ \]
+ where $A_*$ is the constant achieving the infimum in~\eqref{eq:lower_bound_asymvar}.
+
+ \begin{itemize}
+ \item The potential $U_*$ is generally \alert{singular}: impractical for numerics\dots
+ \item The lower bound in~\eqref{eq:lower_bound_asymvar} can be approached by a smooth~$U$.
+ \end{itemize}
\end{frame}
+\begin{frame}
+ {Example (1/2)}
+ Assume that $V = 0$ and $f(x) = \cos(x)$.
+ \begin{figure}[ht]
+ \centering
+ \includegraphics[width=0.8\linewidth]{figures/driftopt/1d_optimal_cosine.pdf}
+ \label{fig:optimal_perturbation_potential}
+ \end{figure}
+ $\rightsquigarrow$ The optimal potential ``divides'' the domain into two parts.
+\end{frame}
+
+\begin{frame}
+ {Example (2/2)}
+ Assume that $V(x) = 5\cos(2 x)$ and~$f(x) = \sin(x)$.
+ The target measure is \alert{multimodal}.
+ \begin{figure}[ht]
+ \centering
+ \includegraphics[width=0.8\linewidth]{figures/driftopt/1d_optimal_metastable.pdf}
+ \label{fig:optimal_perturbation_potential_1d_metastable}
+ \end{figure}
+ \emph{Variance reduction} by a factor $> 1000!$
+\end{frame}
+
+\begin{frame}
+ {Finding the optimal $U$ in the multidimensional setting}
+ In \emph{dimension one}, it holds that
+ \begin{equation}
+ \label{eq:lower_bound_asymvar}
+ \sigma^2_f[U] \geq \frac{2}{Z^2} \inf_{A \in \real} \bigg(\int_{\torus} \bigl\lvert F(x) + A \bigr\rvert \d x \bigg)^2.
+ \end{equation}
+ where
+ \[
+ F(x) := \int_0^x \bigl( f(\xi)-I \bigr) \e^{-V(\xi)}\d \xi.
+ \]
+ This inequality~\eqref{eq:lower_bound_asymvar} is an equality for
+ \[
+ U(x) = U_*(x) = - V(x) -\ln\abs*{F(x) + A_*},
+ \]
+ where $A_*$ is the constant achieving the infimum in~\eqref{eq:lower_bound_asymvar}.
+
+ \begin{itemize}
+ \item The potential $U_*$ is generally \alert{singular}: impractical for numerics\dots
+ \item The lower bound in~\eqref{eq:lower_bound_asymvar} can be approached by a smooth~$U$.
+ \end{itemize}
+\end{frame}
\appendix