Minor change

author: Urbain Vaes <urbain@vaes.uk> 2022-10-12 14:05:03 +0200
committer: Urbain Vaes <urbain@vaes.uk> 2022-10-12 14:05:03 +0200
commit: d416ad78744d6e7c2a7a07fefda797a216efd25b (patch)
tree: 90f7476f3bc3426d2e245a186c8f66aa29a6cd54
parent: cf2a2c6d28328f087de67efdba22230c2beaff72 (diff)
1 files changed, 206 insertions, 21 deletions
diff --git a/main.tex b/main.tex
index fb194c7..e1fbc9e 100755
--- a/main.tex
+++ b/main.tex
@@ -110,7 +110,12 @@
 
 \section{Mobility estimation for Langevin dynamics using control variates}
 \begin{frame}
-    {Collaborators and reference}
+    % {Part I:  Mobility estimation for Langevin dynamics using control variates}
+    \begin{center}
+      \Large
+      \color{blue}
+      Part I: Mobility estimation for Langevin dynamics
+    \end{center}
     \begin{figure}
         \centering
         \begin{minipage}[t]{.2\linewidth}
@@ -161,8 +166,8 @@
   \begin{frame}<beamer>
     % \frametitle{Outline for section \thesection}
     \frametitle{Outline}
-    \tableofcontents[currentsubsection,sectionstyle=show/shaded,subsectionstyle=show/shaded/hide]
-    % \tableofcontents[currentsubsection]
+    % \tableofcontents[currentsubsection,sectionstyle=show/shaded,subsectionstyle=show/shaded/hide]
+    \tableofcontents[currentsubsection]
   \end{frame}
 }
 
@@ -336,13 +341,13 @@
 \begin{frame}
   {Mathematical expression for the effective diffusion (dimension 1)}
   \vspace{.2cm}
-  \begin{exampleblock}{Expression of $D$ in terms of the solution to a Poisson equation}
+  \begin{block}{Expression of $D$ in terms of the solution to a Poisson equation}
   The effective diffusion coefficient is given by where $D = \emph{ \ip{\phi}{p}}$ and $\phi$ is the solution to
   \[
     \emph{- \mathcal L \phi = p},
     \qquad \phi \in L^2_0(\mu) := \bigl\{ u \in L^2(\mu): \ip{u}{1} = 0 \bigr\}.
   \]
-  \end{exampleblock}
+  \end{block}
   \textbf{Key idea of the proof:} Apply It\^o's formula to $\phi$
   \begin{align*}
     \d \phi(q_s, p_s)
@@ -629,13 +634,13 @@
     \qquad \leadsto \text{\emph{independent} of $\gamma$}.
   \]
 
-  \begin{exampleblock}{Scaling of the  mean square error when using $J$ realizations}
+  \begin{block}{Scaling of the  mean square error when using $J$ realizations}
     Assuming an asymptotic scaling as $\gamma^{-\sigma}$ of $D_{\vect e}$, we have
     \[
       \forall \gamma \in (0, 1), \qquad
       \frac{\rm MSE}{D_{\vect e}^2} \leq \frac{C}{\gamma^{4-2 \sigma} T^2} + \frac{2}{J}
     \]
-  \end{exampleblock}
+  \end{block}
 \end{frame}
 
 % \subsection{Variance reduction using control variates}
@@ -786,13 +791,38 @@
 
 \section{Optimal importance sampling for overdamped Langevin dynamics}
 
+% \begin{frame}
+%   \begin{center}
+%     \huge Part II: Optimal importance sampling for overdamped Langevin dynamics
+%   \end{center}
+% \end{frame}
+
 \begin{frame}
-    {Collaborators}
+    \begin{center}
+      \Large
+      \color{blue}
+      Part II: importance sampling for overdamped Langevin dynamics
+    \end{center}
+
     \begin{figure}
         \centering
         \begin{minipage}[t]{.2\linewidth}
             \centering
             \raisebox{\dimexpr-\height+\ht\strutbox}{%
+              \includegraphics[height=\linewidth]{figures/collaborators/martin.jpg}
+            }
+        \end{minipage}\hspace{.03\linewidth}%
+        \begin{minipage}[t]{.21\linewidth}
+          Martin Chak
+          \vspace{0.2cm}
+
+          \includegraphics[height=1.2cm,width=\linewidth,keepaspectratio]{figures/collaborators/sorbonne.png}
+          \flushleft \scriptsize
+          Sorbonne Université
+        \end{minipage}\hspace{.1\linewidth}%%
+        \begin{minipage}[t]{.2\linewidth}
+            \centering
+            \raisebox{\dimexpr-\height+\ht\strutbox}{%
               \includegraphics[height=\linewidth]{figures/collaborators/tony.jpg}
             }
         \end{minipage}\hspace{.03\linewidth}%
@@ -804,6 +834,8 @@
           \flushleft \scriptsize
           CERMICS \& Inria
         \end{minipage}\hspace{.1\linewidth}%%
+
+        \vspace{.5cm}
         \begin{minipage}[t]{.2\linewidth}
             \centering
             \raisebox{\dimexpr-\height+\ht\strutbox}{%
@@ -819,11 +851,6 @@
           CERMICS \& Inria
         \end{minipage}
     \end{figure}
-
-    \vspace{.7cm}
-    \textbf{Outline:}
-    \vspace{.2cm}
-    \tableofcontents
 \end{frame}
 
 \subsection{Background and problem statement}
@@ -831,7 +858,7 @@
 \begin{frame}
   {The sampling problem}
 
-  \begin{exampleblock}
+  \begin{block}
     {Objective of the sampling problem}
     Calculate averages with respect to
     \[
@@ -839,7 +866,7 @@
       \qquad Z = \int_{\torus^d} \e^{-V}.
     \]
     \vspace{-.4cm}
-  \end{exampleblock}
+  \end{block}
 
   \vspace{-.2cm}
   \textbf{Often in applications}:
@@ -851,7 +878,7 @@
 
   \textbf{Markov chain Monte Carlo (MCMC) approach}:
   \[
-    \mu(f) \approx \mu^T (f) := \frac{1}{T} \int_{0}^{T} f(Y_t) \, \d t
+      I := \mu(f) \approx \mu^T (f) := \frac{1}{T} \int_{0}^{T} f(Y_t) \, \d t
   \]
   for a Markov process $(Y_t)_{t\geq 0}$ that is \emph{ergodic} with respect to~$\mu$.
 
@@ -869,7 +896,7 @@
       \mu_{U} = \frac{\e^{-V - U}}{Z_U},
       \qquad Z_U = \int_{\torus^d} \e^{-V-U},
   \]
-  then $\mu(f)$ may be approximated by
+  then $I = \mu(f)$ may be approximated by
   \begin{equation*}
       \label{eq:estimator}
       \mu^T_U(f) :=
@@ -878,24 +905,182 @@
       {\displaystyle \frac{1}{T} \int_0^T(\e^U)(X_t) \, \d t}.
   \end{equation*}
 
+  \textbf{Markov process}: \emph{overdamped Langevin} dynamics
+  \[
+      \d X_t = -\nabla (V+U)(X_t) \, \d t + \sqrt{2} \, \d W_t,
+      \qquad X_0 = x_0.
+  \]
+
   \textbf{Asymptotic variance}:
   Under appropriate conditions,
   it holds that
   \[
-      \sqrt{T} \bigl( \mu^T_U(f) - \mu(f)\bigr)
+      \sqrt{T} \bigl( \mu^T_U(f) - I \bigr)
       \xrightarrow[T \to \infty]{\rm Law} \mathcal N\bigl(0, \sigma^2_f[U]\bigr).
   \]
 
-  \begin{exampleblock}
+  \begin{block}
     {Objective}
     Find $U$ such that the asymptotic variance $\sigma^2_f[U]$ is minimized.
-  \end{exampleblock}
+  \end{block}
+\end{frame}
+
+\begin{frame}
+  {Background: importance sampling in the i.i.d.\ setting (1/2)}
+  Given i.i.d.\ samples $\{X^1, X^2, \dotsc\}$ from $\mu_U$,
+  we define
+  \[
+      \mu_U^N(f) :=
+      \displaystyle \frac
+      {\sum_{n=1}^{N} (f \e^U)(X^{n})}
+      {\sum_{n=1}^{N} (\e^U)(X^{n})}
+      = I + \displaystyle \frac
+      {\frac{1}{N} \sum_{n=1}^{N} \left((f-I) \e^U\right)(X^{n})}
+      {\frac{1}{N} \sum_{n=1}^{N} (\e^U)(X^{n})},
+  \]
+
+  \textbf{Numerator:} by the \emph{central limit theorem},
+  \[
+    \frac{1}{\sqrt{N}} \sum_{n=1}^{N} \left((f-I) \e^U\right) (X^{n})
+    \xrightarrow[N \to \infty]{\rm Law} \mathcal N\left(0, \int_{\torus^d} \abs*{(f-I) \e^U}^2 \, \d \mu_{U}\right)
+  \]
+
+  \textbf{Denominator:} by the strong law of large numbers,
+  \[
+    \frac{1}{N} \sum_{n=1}^{N} \left(\e^U\right)\left(X^{n}\right) \xrightarrow[N \to \infty]{\rm a.s.}
+    \frac{Z}{Z_U}.
+  \]
+
+  \textbf{Therefore}, by Slutsky's theorem,
+  \[
+      \sqrt{N} \bigl( \mu^N_U(f) - I\bigr)
+      \xrightarrow[T \to \infty]{\rm Law} \mathcal N\bigl(0, s^2_f[U]\bigr),
+      \qquad
+      s^2_f[U] := \frac{2 Z_U^2}{Z^2} \int_{\torus^n} \bigl\lvert (f-I) \e^U \bigr\rvert^2 \, \d \mu_{U}.
+  \]
+\end{frame}
+
+\begin{frame}
+  {Background: importance sampling in the i.i.d.\ setting (2/2)}
+  By the Cauchy--Schwarz inequality,
+  it holds that
+  \[
+      s^2_f[U]
+      \geq \frac{2Z_U^2}{Z^2} \left( \int_{\torus^d} \abs{f-I} \e^U \, \d \mu_{U} \right)^2,
+  \]
+  with equality when $\abs{f-I} \e^U$ is constant.
+
+  \begin{block}
+    {Optimal importance distribution}
+    The \emph{optimal $\mu_U$} in the i.i.d.\ setting is
+    \[
+        \mu_{U} \propto \abs{f-I} \e^{-V}
+    \]
+  \end{block}
+
+  \textbf{Objectives}:
+  \begin{itemize}
+    \item Is there a counterpart of this formula in the \emph{MCMC setting}?
+    \item If not, can we approximate the optimal distribution numerically?
+  \end{itemize}
+\end{frame}
+
+\subsection{Minimizing the asymptotic variance for one observable}
+\begin{frame}
+  {Formula for the asymptotic variance}
+  Let $\mathcal L_U$ denote the generator of the Markov semigroup associated to the modified potential;
+  \[
+      \mathcal L_U = - \nabla (V + U) \cdot \nabla + \Delta.
+  \]
+  \begin{block}
+    {Limit theorem}
+    Under appropriate conditions,
+    it holds that
+    \[
+      \sqrt{T} \bigl( \mu^T_U(f) - I\bigr)
+      \xrightarrow[T \to \infty]{\rm Law} \mathcal N\bigl(0, \sigma^2_f[U]\bigr).
+    \]
+    The \emph{asymptotic variance} is given by
+    \[
+      \sigma^2_f[U]
+      = \frac{2Z_U^2}{Z^2}\int_{\torus^d} \phi_U (f-I) \, \e^U \, \d\mu_{U},
+    \]
+    where $\phi_U$ is the unique solution in~$H^1(\mu_{U}) \cap L^2_0(\mu_{U})$ to
+    \[
+      -\mathcal L_U \phi_{U} = (f- I) \e^U.
+    \]
+  \end{block}
+  \textbf{Main ideas of the proof:} central limit theorem for martingales, Slutsky's theorem.
 \end{frame}
 
 \begin{frame}
-  {Background}
+  {Explicit optimal $U$ in dimension 1}
+  In \emph{dimension one}, it holds that
+  \begin{equation}
+    \label{eq:lower_bound_asymvar}
+    \sigma^2_f[U] \geq \frac{2}{Z^2}  \inf_{A \in \real} \bigg(\int_{\torus} \bigl\lvert F(x) + A \bigr\rvert \d x \bigg)^2.
+  \end{equation}
+  where 
+  \[
+    F(x) := \int_0^x \bigl( f(\xi)-I \bigr) \e^{-V(\xi)}\d \xi.
+  \]
+  This inequality~\eqref{eq:lower_bound_asymvar} is an equality for
+  \[
+    U(x) = U_*(x) = - V(x) -\ln\abs*{F(x) + A_*},
+  \]
+  where $A_*$ is the constant achieving the infimum in~\eqref{eq:lower_bound_asymvar}.
+
+  \begin{itemize}
+    \item The potential $U_*$ is generally \alert{singular}: impractical for numerics\dots
+    \item The lower bound in~\eqref{eq:lower_bound_asymvar} can be approached by a smooth~$U$.
+  \end{itemize}
 \end{frame}
 
+\begin{frame}
+  {Example (1/2)}
+  Assume that $V = 0$ and $f(x) = \cos(x)$.
+  \begin{figure}[ht]
+    \centering
+    \includegraphics[width=0.8\linewidth]{figures/driftopt/1d_optimal_cosine.pdf}
+    \label{fig:optimal_perturbation_potential}
+  \end{figure}
+  $\rightsquigarrow$ The optimal potential ``divides'' the domain into two parts.
+\end{frame}
+
+\begin{frame}
+  {Example (2/2)}
+  Assume that $V(x) = 5\cos(2 x)$ and~$f(x) = \sin(x)$.
+  The target measure is \alert{multimodal}.
+  \begin{figure}[ht]
+      \centering
+      \includegraphics[width=0.8\linewidth]{figures/driftopt/1d_optimal_metastable.pdf}
+      \label{fig:optimal_perturbation_potential_1d_metastable}
+  \end{figure}
+  \emph{Variance reduction} by a factor $> 1000!$
+\end{frame}
+
+\begin{frame}
+  {Finding the optimal $U$ in the multidimensional setting}
+  In \emph{dimension one}, it holds that
+  \begin{equation}
+    \label{eq:lower_bound_asymvar}
+    \sigma^2_f[U] \geq \frac{2}{Z^2}  \inf_{A \in \real} \bigg(\int_{\torus} \bigl\lvert F(x) + A \bigr\rvert \d x \bigg)^2.
+  \end{equation}
+  where 
+  \[
+    F(x) := \int_0^x \bigl( f(\xi)-I \bigr) \e^{-V(\xi)}\d \xi.
+  \]
+  This inequality~\eqref{eq:lower_bound_asymvar} is an equality for
+  \[
+    U(x) = U_*(x) = - V(x) -\ln\abs*{F(x) + A_*},
+  \]
+  where $A_*$ is the constant achieving the infimum in~\eqref{eq:lower_bound_asymvar}.
+
+  \begin{itemize}
+    \item The potential $U_*$ is generally \alert{singular}: impractical for numerics\dots
+    \item The lower bound in~\eqref{eq:lower_bound_asymvar} can be approached by a smooth~$U$.
+  \end{itemize}
+\end{frame}
 
 \appendix
author	Urbain Vaes <urbain@vaes.uk>	2022-10-12 14:05:03 +0200
committer	Urbain Vaes <urbain@vaes.uk>	2022-10-12 14:05:03 +0200
commit	d416ad78744d6e7c2a7a07fefda797a216efd25b (patch)
tree	90f7476f3bc3426d2e245a186c8f66aa29a6cd54
parent	cf2a2c6d28328f087de67efdba22230c2beaff72 (diff)