\documentclass[9pt]{beamer} \renewcommand{\emph}[1]{\textcolor{blue}{#1}} \newif\iflong \longfalse \setbeamerfont{footnote}{size=\scriptsize} \input{header} \input{macros} \newcommand{\highlight}[2]{% \colorbox{#1!20}{$\displaystyle#2$}} \newcommand{\hiat}[4]{% \only<#1>{\highlight{#3}{#4}}% \only<#2>{\highlight{white}{#4}}% } \graphicspath{{figures/}} \AtEveryCitekey{\clearfield{pages}} \AtEveryCitekey{\clearfield{eprint}} \AtEveryCitekey{\clearfield{volume}} \AtEveryCitekey{\clearfield{number}} \AtEveryCitekey{\clearfield{month}} \addbibresource{main.bib} \title{Mobility estimation for Langevin dynamics using control variates\\[.3cm] \small \textcolor{yellow}{AMMP Seminar} } \author{% Urbain Vaes \texorpdfstring{\\\texttt{urbain.vaes@inria.fr}}{} } \institute{% MATHERIALS -- Inria Paris \textcolor{blue}{\&} CERMICS -- École des Ponts ParisTech } \date{October 2022} \begin{document} \begin{frame}[plain] \begin{figure}[ht] \centering % \includegraphics[height=1.5cm]{figures/logo_matherials.png} % \hspace{.5cm} \includegraphics[height=1.2cm]{figures/logo_inria.png} \hspace{.5cm} \includegraphics[height=1.5cm]{figures/logo_ponts.png} \hspace{.5cm} \includegraphics[height=1.5cm]{figures/logo_ERC.jpg} \hspace{.5cm} \includegraphics[height=1.5cm]{figures/logo_EMC2.png} \end{figure} \titlepage \end{frame} \begin{frame} {Outline} \tableofcontents \end{frame} % \section{Some background material on fast/slow systems of SDEs}% % \label{sec:numerical_solution_of_multiscale_sdes} % \begin{frame} % {Homogenization result} % \begin{itemize} % \item Effective drift: % \[ % \vect F(x) = \int_{\torus^n} \left(\vect f \, \cdot \, \grad_x \right) \vect \Phi(x,y) \, \rho^{\infty}(y;x) \, \d y. % \] % \item Effective diffusion: % \begin{align*} % & \mat A(x) \, \mat A(x)^T = \frac12 \left(\mat A_0(x) + \mat A_0(x)^T\right), \\ % & \text{with } \mat A_0(x) := 2 \int_{\real^n} \vect f(x,y) \, \otimes \, \vect \Phi(x,y) \, \rho^{\infty}(y;x) \, \d y. % \end{align*} % \end{itemize} % \begin{example} % Multiscale system: % \begin{alignat*}{2} % & \d X^{\varepsilon}_t = \frac{1}{\varepsilon} X^{\varepsilon}_t \, Y^{\varepsilon}_t \, \d t, \quad & X^{\varepsilon}_0 = 1, \\ % & \d Y^{\varepsilon}_t = - \frac{1}{\varepsilon^2} \, Y_t^{\varepsilon} \, \d t % + \frac{\sqrt 2}{\varepsilon} \,\d W_{y}(t), \quad & Y^{\varepsilon}_0 = 0. % \end{alignat*} % Effective equation: % \[ % \d X_t = X_t \, \d t + \, \sqrt{2} \, X_t \, \d W_{y} (t). % \] % \end{example} % \end{frame} % \begin{frame} % {Example: Stratonovich correction} % \begin{figure}[ht] % \centering % \href{run:videos/spectral/slow.avi?autostart&loop}% % {\includegraphics[width=0.8\textwidth]{videos/spectral/slow.png}}% % \href{run:videos/spectral/fast.avi?autostart&loop}% % {\includegraphics[width=0.8\textwidth]{videos/spectral/fast.png}}% % \caption{% % Convergence to the solution of the effective equation as $\varepsilon \to 0$. % } % \end{figure} % \end{frame} \section{Mobility estimation for Langevin dynamics using control variates} \begin{frame} % {Part I: Mobility estimation for Langevin dynamics using control variates} \begin{center} \Large \color{blue} Part I: Mobility estimation for Langevin dynamics \end{center} \begin{figure} \centering \begin{minipage}[t]{.2\linewidth} \centering \raisebox{\dimexpr-\height+\ht\strutbox}{% \includegraphics[height=\linewidth]{figures/collaborators/greg.jpg} } \end{minipage}\hspace{.01\linewidth}% \begin{minipage}[t]{.24\linewidth} Grigorios Pavliotis \vspace{0.2cm} \includegraphics[height=1cm,width=\linewidth,keepaspectratio]{figures/collaborators/imperial.pdf} \flushleft \scriptsize Department of Mathematics \end{minipage}\hspace{.1\linewidth}%% \begin{minipage}[t]{.2\linewidth} \centering \raisebox{\dimexpr-\height+\ht\strutbox}{% \includegraphics[height=\linewidth]{figures/collaborators/gabriel.jpg} } \end{minipage}\hspace{.01\linewidth}% \begin{minipage}[t]{.24\linewidth} Gabriel Stoltz \vspace{0.2cm} \includegraphics[height=1.2cm,width=\linewidth,keepaspectratio]{figures/collaborators/enpc.png} \flushleft \scriptsize CERMICS \end{minipage} \end{figure} \vspace{.7cm} \textbf{Reference:} \fullcite{2022arXiv220609781P} \end{frame} % \begin{frame}[plain] % \frametitle{Outline} % \tableofcontents[subsectionstyle=show] % \end{frame} \subsection{Background and problem statement}% \AtBeginSubsection[] { \begin{frame} % \frametitle{Outline for section \thesection} \frametitle{Outline} % \tableofcontents[currentsubsection,sectionstyle=show/shaded,subsectionstyle=show/shaded/hide] \tableofcontents[currentsubsection] \end{frame} } \begin{frame} {Goals of molecular dynamics} {\large $\bullet$} Computation of \emph{macroscopic properties} from Newtonians atomistic models: \vspace{-.1cm} \begin{minipage}{.51\textwidth} \vspace{-.7cm} \begin{itemize} \item Static properties, such as \begin{itemize} \item the heat capacity and \item the equations of state $P = P(\rho, T)$. \end{itemize} \vspace{.2cm} \item Dynamical properties, such as \emph{transport coefficients}: % mobilité, % viscosité de cisaillement; % conductivité thermique. \begin{itemize} \item the viscosity; \item the thermal conductivity; \item the \emph{mobility} of ions in solution. \end{itemize} \end{itemize} \end{minipage} \hspace{.5cm} \begin{minipage}{.4\textwidth} \begin{figure}[ht] \centering \includegraphics[width=.8\linewidth, angle=270]{figures/loi_argon-crop.pdf} \caption*{\hspace{1.2cm}% Equation of state of argon at 300K. \tiny\hspace{1.2cm}$\bullet$ `+': molecular simulation; \hspace{1.2cm}$\bullet$ Solid line: experimental measurements\footnotemark. } \end{figure} \end{minipage} \footnotetext{\url{https://webbook.nist.gov/chemistry/fluid/}} \vspace{.2cm} {\large $\bullet$} \emph{Numerical microscope}: used in physics, biology, chemistry. \end{frame} \begin{frame} {Some background material on the Langevin equation} Consider the (one-particle) Langevin equation \[ \left\{ \begin{aligned} & \d \vect q_t = \textcolor{blue}{\vect p_t \, \d t}, \\ & \d \vect p_t = \textcolor{blue}{- \grad V(\vect q_t) \, \d t} \, \textcolor{red}{- {\color{black}\gamma} \vect p_t \, \d t + \sqrt{2 {\color{black}\gamma} \beta^{-1}} \, \d \vect W_t}, \end{aligned} \right. \qquad (\vect q_0, \vect p_0) \sim \mu, \] where $\gamma$ is the friction, $V$ is a \emph{periodic} potential, and $\beta = \frac{1}{k_{\rm B} T}$. \begin{itemize} % \item The dynamics is composed of a \textcolor{blue}{Hamiltonian} part and a \textcolor{red}{fluctuation/dissipation} part; \item The invariant probability measure is \[ \mu(\vect q, \vect p) = \frac{1}{Z} \e^{-\beta H(\vect q, \vect p)} = \frac{1}{Z} \e^{-\beta \left(V(\vect q) + \frac{\abs{\vect p}^2}{2}\right)}, \quad \text{on}~ \emph{\torus^d} \times \real^d. \] \item The generator of the associated Markov semigroup \[ \left (\e^{\mathcal L t} \varphi\right) (\vect q, \vect p) = \expect \bigl(\varphi(\vect q_t, \vect p_t) \big| (\vect q_0, \vect p_0) = (\vect q, \vect p) \bigr) \] is the following operator: \begin{align*} \mathcal L &= \textcolor{blue}{\left(\vect p \cdot \grad_{\vect q} - \grad V(q) \cdot \grad_{\vect p} \right)} + \gamma \, \textcolor{red}{\left( - \vect p \grad_{\vect p} + \beta^{-1} \laplacian_{\vect p} \right)} =: \textcolor{blue}{\mathcal L_{\textrm{ham}}} + \gamma \, \textcolor{red}{\mathcal L_{\textrm{FD}}}. \end{align*} \end{itemize} We denote by $\norm{\cdot}$ and $\ip{\cdot}{\cdot}$ the norm and inner product of~$L^2(\mu)$, and \[ L^2_0(\mu) = \Bigl\{\varphi \in L^2(\mu) : \ip{\varphi}{1} = \expect_{\mu} \varphi = 0 \Bigr\}. \] \end{frame} % \begin{frame} % {Common models in molecular simulation} % We consider the following hierarchy of models: % \begin{align} % \label{eq:gle:model:overdamped} \tag{OL} % \dot {\vect q} &= - \grad V(\vect q) + \sqrt{2 \, \beta^{-1}} \, \dot {\vect W}, \\ % \label{eq:gle:model:langevin} \tag{L} % \ddot {\vect q} &= - \grad V(\vect q) - \gamma \, \dot {\vect q} + \sqrt{2 \gamma \, \beta^{-1}} \, \dot {\vect W}, \\ % \label{eq:gle:model:generalized} \tag{GLE} % \ddot {\vect q} &= -\grad V(\vect q) - \int_{0}^{t} \widehat \gamma(t-s) \, \dot {\vect q}(s) \, \d s + \vect F(t). % \end{align} % where % \begin{itemize} % \item $V$ is a potential, in this talk \emph{periodic}; % \item $\gamma$ is the friction coefficient; % \item $\widehat \gamma(\cdot)$ is the memory kernel; % \item $\vect F$ is a stationary non-Markovian noise process. % \end{itemize} % \vspace{.2cm} % The kernel $\widehat \gamma(\cdot)$ and the noise $F$ are related by the \emph{fluctuation/dissipation} relation: % \[ % \expect\bigl[\vect F(t) \otimes \vect F(s)\bigr] = \beta^{-1} \, \widehat \gamma(t-s) \mat I_d. % \] % \end{frame} % \subsection{Mobility and effective diffusion} \begin{frame} {Definition of the mobility} Consider Langevin dynamics with additional forcing in a direction $\vect e$: % \[ % \ddot {\vect q} = - \grad V(\vect q) + \alert{\eta \vect e} - \gamma \, \dot {\vect q} + \sqrt{2 \, \gamma} \, \beta^{-1} \, \dot {\vect W}. % \] % This equation may be rewritten as a system for the position and momentum: \[ \left\{ \begin{aligned} & \d \vect q_t = \vect p_t \, \d t, \\ & \d \vect p_t = - \grad V(\vect q_t) \, \d t + \alert{\eta \vect e} \, \d t - \gamma \vect p_t \, \d t + \sqrt{2 \gamma \beta^{-1}} \, \d \vect W_t. \end{aligned} \right. \] This dynamics admits a unique invariant probability distribution $\mu_{\alert{\eta}} \in \mathcal P(\emph{\torus^d} \times \real^d)$. \begin{definition} [Mobility] The mobility in direction $\vect e$ is defined mathematically as \[ M_{\vect e} = \lim_{\alert{\eta} \to 0} \frac{1}{\alert{\eta}}\expect_{\mu_{\alert{\eta}}} [\vect e^\t \vect p] \] $\approx $ factor relating the mean momentum to the strength of the inducing force. \end{definition} \begin{itemize} \item There is a symmetric mobility tensor $\mat M$ such that $M_{\vect e} = \vect e^\t \mat M \vect e$. \item \textbf{Einstein's relation:} \( \mat D = \beta^{-1} \mat M, \) with $\mat D$ the \emph{effective diffusion coefficient}. \end{itemize} \end{frame} \begin{frame} {Effective diffusion} It is possible to show a \emph{functional central limit theorem} for the Langevin dynamics\footfullcite{MR663900}: \begin{equation*} \varepsilon \vect q_{s/\varepsilon^2} \xrightarrow[\varepsilon \to 0]{} \sqrt{2 \mat D} \, \vect W_s \qquad \text{weakly on } C([0, \infty)). \end{equation*} In particular, $\vect q_t/\sqrt{t} \xrightarrow[t \to \infty]{} \mathcal N(0, 2 \mat D)$ weakly. \vspace{-.25cm} \begin{figure}[ht] \centering \href{run:videos/gle/effective-diffusion.webm?autostart&loop}% {\includegraphics[width=0.75\textwidth]{videos/gle/effective-diffusion.png}}% \caption{Histogram of $q_t/\sqrt{t}$. The potential $V(q) = - \cos(q) / 2$ is illustrated in the background.} \end{figure} \end{frame} \begin{frame} {Mathematical expression for the effective diffusion (dimension 1)} \vspace{.2cm} \begin{block}{Expression of $D$ in terms of the solution to a Poisson equation} The effective diffusion coefficient is given by where $D = \emph{ \ip{\phi}{p}}$ and $\phi$ is the solution to \[ \emph{- \mathcal L \phi = p}, \qquad \phi \in L^2_0(\mu) := \bigl\{ u \in L^2(\mu): \ip{u}{1} = 0 \bigr\}. \] \end{block} \textbf{Key idea of the proof:} Apply It\^o's formula to $\phi$ \begin{align*} \d \phi(q_s, p_s) % &= \frac{1}{\varepsilon^2} \mathcal L_{L} \phi (q_t, p_t) + \frac{1}{\varepsilon} \, \sqrt{2 \gamma \beta^{-1}} \, \derivative{1}[\phi]{p}(q_t, p_t) \, \d W_t, \\ &= - p_s \, \d s + \sqrt{2 \gamma \beta^{-1}} \, \derivative{1}[\phi]{p}(q_s, p_s) \, \d W_s \end{align*} and then rearrange: \begin{align*} \alert\varepsilon (q_{t/\alert\varepsilon^2} - q_{0}) &= \alert\varepsilon \int_{0}^{t/\alert\varepsilon^2} p_s \, \d s \\ &= \underbrace{\alert\varepsilon \bigl(\phi(q_0, p_0) - \phi(q_{t/\alert\varepsilon^2}, p_{t/\alert\varepsilon^2})\bigr)}_{\to 0 % ~\text{in $L^p(\Omega, C([0, T], \real))$} } + \underbrace{\sqrt{2 \gamma \beta^{-1}} \alert\varepsilon \int_{0}^{t/\alert\varepsilon^2} \derivative{1}[\phi]{p}(q_s, p_s) \, \d W_s}_{\to \sqrt{2 D} W_t~\text{weakly by MCLT}}. \end{align*} % where % \begin{align*} % D &= \gamma \beta^{-1} \, \int \abs{\textstyle \derivative{1}[\phi]{p}(q, p)}^2 \, \mu(\d q \, \d p) % = - \int \phi (\mathcal L \phi) \, \d \mu % = \ip{\phi}{p}. % \end{align*} \vspace{.3cm} \textbf{In the multidimensional setting}, $D_{\vect e} = \ip{\phi_{\vect e}}{\vect e^\t \vect p}$ with $- \mathcal L \phi_{\vect e} = \vect e^\t \vect p$ \end{frame} \begin{frame} {Open question: surface diffusion when $\gamma \ll 1$\footnote{Source of the video: \url{https://en.wikipedia.org/wiki/Surface_diffusion}}} \begin{figure}[ht] \centering \href{run:videos/surface_diffusion.webm?autostart&loop}% {\includegraphics[width=0.4\linewidth]{videos/surface_diffusion.png}} \hspace{1cm} % \href{run:videos/diffusion.webm?autostart&loop}% % {\includegraphics[width=0.4\linewidth]{figures/mean_square.pdf}} \end{figure} \vspace{-.3cm} Applications: \begin{itemize} \item integrated circuits; \item catalysis. \end{itemize} \textbf{Open question}: behavior of the effective diffusion coefficient when $\gamma \ll 1$? \[ D = \lim_{t \to \infty} \frac{\langle \abs{\vect q(t)}^2 \rangle}{4 t} \sim \gamma^{-\alert{\sigma}}, \qquad \alert{\sigma} =\, ??? \] % \vspace{-.3cm} % \textbf{Difficulty}: slow convergence of Monte Carlo methods when $\gamma$ is small. % \vspace{.3cm} \end{frame} % \subsection{Some background material on the Langevin equation} \begin{frame}{Langevin dynamics: \textcolor{yellow}{underdamped} and \textcolor{yellow}{overdamped} regimes\footfullcite{MR2394704}} \begin{figure}[ht] \centering \href{run:videos/particles_underdamped.webm?autostart&loop}% {\includegraphics[width=0.49\textwidth]{videos/particles_underdamped.png}}% \href{run:videos/particles_overdamped.webm?autostart&loop}% {\includegraphics[width=0.49\textwidth]{videos/particles_overdamped.png}}% \caption{Langevin dynamics with friction $\gamma = 0.1$ (left) and $\gamma = 10$ (right)} \end{figure} \vspace{-.3cm} \begin{itemize} \item The \alert{underdamped} limit as $\gamma \to 0$ is well understood in dimension 1 but not in the \alert{multi-dimensional setting}. \item \emph{Overdamped} limit: as $\gamma \to \infty$, the rescaled process $t \mapsto q_{\gamma t}$ converges weakly to the solution of the \emph{overdamped Langevin equation}: \[ \dot {\vect q} = - \grad V(q) + \sqrt{2 \, \beta^{-1}} \, \dot {\vect W}. \] \end{itemize} \end{frame} \begin{frame} {The \textcolor{yellow}{underdamped} limit in \textcolor{yellow}{dimension 1}} As \emph{$\gamma \to 0$}, the Hamiltonian of the rescaled process \begin{equation*} \left\{ \begin{aligned} q_{\gamma}(t) = q(t/\gamma), \\ p_{\gamma}(t) = p(t/\gamma), \end{aligned} \right. \end{equation*} converges weakly to a diffusion process on a graph. \vspace{-.6cm} \begin{figure}[ht!] % \centering % #1f77b4', u'#ff7f0e', u'#2ca02c \definecolor{c1}{RGB}{31,119,180} \definecolor{c2}{RGB}{255,127,14} \definecolor{c3}{RGB}{44,160,44} \begin{tikzpicture}% \node[anchor=south west,inner sep=0] at (0,0) {% \includegraphics[width=.7\textwidth]{figures/separatrix.eps} }; \coordinate (origin) at (10,0); \coordinate (Emin) at ($ (origin) + (0,.5) $); \coordinate (E0) at ($ (origin) + (0,2) $); \coordinate (E1) at ($ (origin) + (-1,4) $); \coordinate (E2) at ($ (origin) + (1,4) $); \node at ($ (Emin) + (.7,0) $) {$E_{\min}$}; \node[color=red] at ($ (E0) + (.5,0) $) {$E_{0}$}; \node at ($ (E1) + (0,.3) $) {$p < 0$}; \node at ($ (E2) + (0,.3) $) {$p > 0$}; \draw[thick,color=c2] (Emin) -- (E0) node [color=black, midway, right] {}; \draw[thick,color=c1] (E0) -- (E1) node [color=black, midway, left] {}; \draw[thick,color=c3] (E0) -- (E2) node [color=black, midway, right] {}; \node at (E0) [circle,fill,inner sep=1.5pt,color=red]{}; \node at (Emin) [circle,fill,inner sep=1.5pt]{}; \end{tikzpicture}% \end{figure} \vspace{-.5cm} In this limit, it holds that \[ % \norm{\mathcal L^{-1}}_{\mathcal B\left(L^2_0(\mu)\right)} = \mathcal O \left( \alert{\gamma^{-1}} \right), % \qquad \phi = - \mathcal L^{-1} p = \alert{\gamma^{-1}} \phi_{\rm und} + \mathcal O(\gamma^{-1/2}). \] % The limiting function $\phi_{\rm und}$ is continuous but \alert{not in $H^1(\mu)$}. \end{frame} \begin{frame} {Scaling of the effective diffusion coefficient for \textcolor{yellow}{Langevin} dynamics\footfullcite{MR2427108}} In \alert{dimension 1}, \( \lim_{\gamma \to 0} \gamma D^{\gamma} = D_{\rm und} \) and \( \lim_{\gamma \to \infty} \gamma D^{\gamma} = D_{\rm ovd}. \) \begin{figure}[ht] \centering \includegraphics[width=0.5\linewidth,height=0.33\linewidth]{figures/scaling_diffusion_langevin.png} \end{figure} \textbf{\emph{Our aims in this work:}} \begin{itemize} \item How can we efficiently estimate the effective diffusion coefficient when \alert{$\gamma \ll 1$}? \item How does the mobility scale as \alert{$\gamma \to 0$} in the multidimensional setting? \end{itemize} \end{frame} \subsection{Efficient mobility estimation}% \begin{frame} {Brief literature review} % Consider the Langevin dynamics with $(\vect q_t, \vect p_t) \in (\real^{\alert{d}} \times \real^{\alert{d}})$: % \begin{equation*} % \left\{ % \begin{aligned} % & \d \vect q_t = \vect p_t \,\d t, \\ % & \d \vect p_t = - \grad V (\vect q_t) \, \d t - \gamma \vect p_t \, \d t + \sqrt{2 \gamma \beta^{-1}} \, \d \vect W_t. % \end{aligned} % \right. % \end{equation*} In dimension $> 1$, it \alert{does not hold} that $\gamma D^{\gamma}_{\vect e} \xrightarrow[\gamma \to 0]{} D_{\rm und}$ when $V$ is \alert{non separable}, e.g. \[ V(\vect q) = - \frac{1}{2} \Big( \cos(q_1) + \cos(q_2) \Big) - \alert{\delta} \cos(q_1) \cos(q_2) \] \textbf{Open question:} how does $D^\gamma_{\vect e}$ behave when $\gamma \ll 1$ and \alert{$d = 2$}? % \begin{block} % {Open question: how does $D^\gamma_{\vect e}$ behave when $\gamma \ll 1$ and \alert{$d = 2$}?} Various answers are given in the literature: \begin{itemize} \item $D^{\gamma}_{\vect e} \propto \gamma^{-1/2}$ for specific potentials\footfullcite{chen1996surface}; \item $D^{\gamma}_{\vect e} \propto \gamma^{-1/3}$ for specific potentials~\footfullcite{Braun02}; \item $D^{\gamma}_{\vect e} \propto \gamma^{-\sigma}$ with $\sigma$ depending on the potential~\footfullcite{roussel_thesis}. \end{itemize} % \end{block} \vspace{.5cm} \end{frame} \begin{frame}[label=continue] {Numerical approaches for calculating the effective diffusion coefficient} \begin{itemize} \itemsep.5cm \item \emph{Linear response approach}: \begin{equation*} D_{\vect e} = \lim_{\eta \to 0} \frac{1}{\beta \alert{\eta}} \expect_{\alert{\mu_\eta}} \, (\vect e^\t \vect p). \end{equation*} where $\mu_{\eta}$ is the invariant distribution of the system with external forcing. \item \emph{Green--Kubo formula}: Since $-\mathcal L^{-1} = \int_{0}^{\infty} \e^{t \mathcal L} \, \d t$, \begin{align*} D_{\vect e} &= \int - \mathcal L^{-1}(\vect e^\t \vect p) \, (\vect \e^\t \vect p) \, \d \mu = \int_{0}^{\infty} \! \! \! \int \e^{t \mathcal L} (\vect e^\t \vect p) (\vect e^\t \vect p) \, \d \mu \, \d t \\ &= \int_{0}^{\infty} \expect_{\mu}\bigl((\vect e^\t \vect p_0) (\vect e^\t \vect p_t)\bigr) \, \d t. \end{align*} \item \emph{Einstein's relation}: \[ D_{\vect e} = \lim_{t \to \infty} \frac{1}{2t} \expect_{\mu} \Bigl[ \bigl|\vect e^\t (\vect q_t - \vect q_0)\bigr|^2 \Bigr]. \] \item Deterministic method, e.g. \emph{Fourier/Hermite Galerkin}, for the Poisson equation \[ - \mathcal L \phi_{\vect e} = \vect e^\t \vect p, \qquad D_{\vect e} = \ip{\phi_{\vect e}}{p}. \] \end{itemize} \end{frame} % \begin{frame} % {Fourier/Hermite Galerkin method for one-dimensional Langevin dynamics} % % Saddle-point formulation\footfullcite{roussel2018spectral}: % find $(\Phi_N, \alpha_N) \in V_N \times \real$ such that % \begin{align} % \notag % - \Pi_N \, \mathcal L \, \Pi_N \alert{\Phi_N} + \alert{\alpha_N} u_N &= \Pi_N p, \\ % \label{eq:constraint} % \ip{\Phi_N}{u_N} &= 0, % \end{align} % where % \begin{itemize} % \item $\Pi_N$ is the $L^2(\mu)$ projection operator on a finite-dimensional subspace $V_N$, % \item $u_N = \Pi_N 1 / \norm{\Pi_N 1}$. % Eq.~\eqref{eq:constraint} ensures that the system is \emph{well-conditioned}. % \end{itemize} % % \vspace{.2cm} % For $V_N$, we use the following basis functions: % \[ % e_{i,j} = {\left( Z \, \e^{\beta \left( H(q,p) + |z|^2 \right)} \right)}^{\frac{1}{2}} \, G_i(q) \, H_j(p), \qquad 0 \leq i,j \leq N, % \] % where $(G_i)_{i \geq 0}$ are \emph{trigonometric functions} and $(H_j)_{i \geq 0}$ are \emph{Hermite polynomials}. % % $\rightarrow$ \alert{Impractical} in two or more spatial dimensions. % \end{frame} \begin{frame} {Estimation of the effective diffusion coefficient from Einstein's relation} Consider the following estimator of the effective diffusion coefficient $D_{\vect e}$: \[ \emph{u(T) = \frac{\abs{\vect e^\t (\vect q_T - \vect q_0)}^2}{2T}}, \qquad (\vect q_0, \vect p_0) \sim \mu. \] \textbf{Bias of this estimator:} \begin{align*} \notag \expect \bigl[u(T)\bigr] % &= \int_{0}^{\infty} \ip{\e^{t \mathcal L}(\vect e^\t \vect p)}{\vect e^\t \vect p} \d t % - \int_{0}^{\infty} \ip{\e^{t \mathcal L} (\vect e^\t \vect p)}{\vect e^\t \vect p} \min\left\{1, \frac{t}{T}\right\} \, \d t \\ &= D_{\vect e} - \int_{0}^{\infty} \ip{\e^{t \mathcal L} (\vect e^\t \vect p)}{\vect e^\t \vect p} \min\left\{1, \frac{t}{T}\right\} \, \d t. \end{align*} Using the decay estimate for the semigroup\footfullcite{roussel2018spectral} \[ \norm{\e^{t \mathcal L}}_{\mathcal B\left(L^2_0(\mu)\right)} \leq L \e^{- \ell \min\{\gamma, \gamma^{-1}\}t}, \] we deduce \[ \left\lvert \expect[u(T)] - D_{\vect e} \right\rvert \leq \frac{C \textcolor{red}{\max\{\gamma^2, \gamma^{-2}\}}}{T}. \] \end{frame} \begin{frame} {Variance of the estimator $u(T)$ for large $T$} For $T \gg 1$, it holds approximately that \[ \frac{\vect e^\t (\vect q_T - \vect q_0)}{\sqrt{2T}} \sim \mathcal N(0, D_{\vect e}) \qquad \leadsto \qquad u(T) = \frac{\abs{\vect e^\t (\vect q_T - \vect q_0)}^2}{2 D_{\vect e} T} \sim \chi^2 (1). \] Therefore, we deduce \[ \lim_{T \to \infty} \var \bigl[u(T)\bigr] = 2 D_{\vect e}^2. \] The relative standard deviation (asymptotically as $T \to \infty$) is therefore \[ \lim_{T \to \infty} \frac{\sqrt{\var \bigl[u(T)\bigr]}}{\expect \bigl[u(T)\bigr]} = \sqrt{2} \qquad \leadsto \text{\emph{independent} of $\gamma$}. \] \begin{block}{Scaling of the mean square error when using $J$ realizations} Assuming an asymptotic scaling as $\gamma^{-\sigma}$ of $D_{\vect e}$, we have \[ \forall \gamma \in (0, 1), \qquad \frac{\rm MSE}{D_{\vect e}^2} \leq \frac{C}{\gamma^{4-2 \sigma} T^2} + \frac{2}{J} \] \end{block} \end{frame} % \subsection{Variance reduction using control variates} \begin{frame} {Variance reduction using \textcolor{yellow}{control variates}} Let $\phi_{\vect e}$ denote the solution to the \emph{Poisson equation} \[ - \mathcal L \phi_{\vect e}(\vect q, \vect p) = \vect e^\t \vect p, \qquad \phi_{\vect e} \in L^2_0(\mu). \] and let $\psi_{\vect e}$ denote an approximation of $\phi_{\vect e}$. By It\^o's formula, we obtain \[ \phi_{\vect e}(\vect q_T, \vect p_T) - \phi_{\vect e}(\vect q_0, \vect p_0) = - \int_{0}^{T} \vect e^\t \vect p_t \, \d t + \sqrt{2 \gamma \beta^{-1}} \int_{0}^{T} \grad_{\vect p} \phi_{\vect e}(\vect q_t, \vect p_t) \cdot \d \vect W_t. \] Therefore \begin{align*} \vect e^\t (\vect q_T - \vect q_0) &= \int_{0}^{T} \vect e^\t \vect p_t \, \d t \\ &\approx - \psi_{\vect e}(\vect q_T, \vect p_T) + \psi_{\vect e}(\vect q_0, \vect p_0) + \sqrt{2 \gamma \beta^{-1}} \int_{0}^{T} \grad_{\vect p} \psi_{\vect e}(\vect q_t, \vect p_t) \cdot \d \vect W_t =: \emph{\xi_T}. \end{align*} which suggests the \emph{improved estimator} \[ v(T) = \frac{\abs{\vect e^\t (\vect q_T - \vect q_0)}^2}{2T} - \left( \frac{\abs{\xi_T}^2}{2T} - \lim_{T\to \infty}\expect \left[ \frac{\abs{\xi_T}^2}{2T} \right] \right). \] \end{frame} \begin{frame} {Properties of the improved estimator} \textbf{Smaller bias} if $-\mathcal L \psi_{\vect e} \approx \vect e^\t \vect p$: \begin{align*} \label{eq:basic_bound_bias} \abs{\expect \bigl[ v(T) \bigr] - D^{\gamma}_{\vect e}} &\leq \frac{L \max\{\gamma^2, \gamma^{-2}\}}{T \ell^2 } \, \emph{\norm{\vect e^\t \vect p + \mathcal L \psi_{\vect e}}} \left(\beta^{-1/2} + \norm{\mathcal L \psi_{\vect e}} \right). \end{align*} \textbf{Smaller variance}: \begin{equation*} \begin{aligned}[b] \var \bigl[v(T)\bigr] \leq C &\left( T^{-1} \emph{\norm{\phi_{\vect e} - \psi_{\vect e}}[L^4(\mu)]}^2 + \gamma \emph{\norm{\grad_{\vect p} \phi_{\vect e} - \grad_{\vect p} \psi_{\vect e}}[L^4(\mu)]}^2 \right) \\ &\quad \times \left( T^{-1} \norm{\phi_{\vect e} + \psi_{\vect e}}[L^4(\mu)]^2 + \gamma \norm{\grad_{\vect p} \phi_{\vect e} + \grad_{\vect p} \psi_{\vect e}}[L^4(\mu)]^2 \right). \end{aligned} \end{equation*} \textbf{Construction of $\psi_{\vect e}$ in the \alert{one-dimensional setting}}. We consider two approaches: \begin{itemize} \item Approximate the solution to the Poisson equation by a Galerkin method. \item Use asymptotic result for the Poisson equation: \[ \gamma \phi \xrightarrow[\gamma \to 0]{L^{2}(\mu)} \phi_{\rm und}, \] which suggests letting $\psi = \phi_{\rm und} / \gamma$. \end{itemize} \end{frame} \begin{frame} {Construction of the approximate solution $\psi_{\vect e}$ \textcolor{yellow}{in dimension 2}} We consider the potential \[ V(\vect q) = - \frac{1}{2} \Big( \cos(q_1) + \cos(q_2) \Big) - \alert{\delta} \cos(q_1) \cos(q_2). \] \begin{itemize} \item For this potential, $\mat D$ is isotropic $\leadsto$ sufficient to consider $\vect e = (1, 0)$, \[ D_{(1,0)} = \ip{\phi_{(1, 0)}}{p_1}, \qquad - \mathcal L \phi_{(1,0)}(\vect q, \vect p) = p_1. \] \item If \emph{$\delta = 0$}, then the solution is $\phi_{(1, 0)}(\vect q, \vect p) = \phi_{\rm 1D} (q_1, p_1)$, where $\phi_{\rm 1D}$ solves \[ - \mathcal L_{\rm 1D} \phi_{\rm 1D}(q, p) = p, \qquad V_{\rm 1D}(q) = \frac{1}{2} \cos (q). \] \item We take $\emph{\psi_{(1,0)}(\vect q, \vect p) = \psi_{\rm 1D}(q_1, p_1)}$, where $\psi_{\rm 1D} \approx \phi_{\rm 1D}$. \end{itemize} \end{frame} \subsection{Numerical experiments}% \begin{frame} {Numerical experiments for the one-dimensional case (1/2)} \begin{figure}[ht] \centering \includegraphics[width=0.99\linewidth]{figures/underdamped_1d.pdf} \end{figure} \end{frame} \begin{frame} {Numerical experiments for the one-dimensional case (2/2)} \begin{figure}[ht] \centering \includegraphics[width=0.99\linewidth]{figures/time.pdf} \caption{Evolution of the sample mean and standard deviation, estimated from $J = 5000$ realizations for $\gamma = 10^{-3}$.} \end{figure} \end{frame} \begin{frame} {Performance of the control variates approach in dimension 2} \begin{figure}[ht] \centering \includegraphics[width=0.49\linewidth]{figures/var-delta-galerkin.pdf} \includegraphics[width=0.49\linewidth]{figures/var-delta-underdamped.pdf} \label{fig:time_bias_deviation_2d} \end{figure} \begin{itemize} \item Variance reduction is possible if $\abs{\delta}/\gamma \ll 1$; \item Control variates are \alert{not very useful} when $\gamma \ll 1$ and $\delta$ is fixed. \end{itemize} \end{frame} \begin{frame} {Scaling of the mobility in dimension 2} \begin{figure}[ht] \centering \includegraphics[width=0.9\linewidth]{figures/diffusion.pdf} \label{fig:time_bias_variance_2d} \end{figure} \end{frame} \begin{frame}{Summary and perspectives for future work} In this talk, we presented \begin{itemize} \item a variance reduction approach for efficiently estimating the mobility; \item numerical results showing that the scaling of the mobility is \emph{not universal}. \end{itemize} \textbf{Perspectives for future work:} \begin{itemize} \item Use alternative methods (PINNs, Gaussian processes) to solve the Poisson equation; \item Improve and study variance reduction approaches for other transport coefficients. \end{itemize} \vspace{1cm} \begin{center} Thank you for your attention! \end{center} \end{frame} \section{Optimal importance sampling for overdamped Langevin dynamics} % \begin{frame} % \begin{center} % \huge Part II: Optimal importance sampling for overdamped Langevin dynamics % \end{center} % \end{frame} \begin{frame} \begin{center} \Large \color{blue} Part II: importance sampling for overdamped Langevin dynamics \end{center} \begin{figure} \centering \begin{minipage}[t]{.2\linewidth} \centering \raisebox{\dimexpr-\height+\ht\strutbox}{% \includegraphics[height=\linewidth]{figures/collaborators/martin.jpg} } \end{minipage}\hspace{.03\linewidth}% \begin{minipage}[t]{.21\linewidth} Martin Chak \vspace{0.2cm} \includegraphics[height=1.2cm,width=\linewidth,keepaspectratio]{figures/collaborators/sorbonne.png} \flushleft \scriptsize Sorbonne Université \end{minipage}\hspace{.1\linewidth}%% \begin{minipage}[t]{.2\linewidth} \centering \raisebox{\dimexpr-\height+\ht\strutbox}{% \includegraphics[height=\linewidth]{figures/collaborators/tony.jpg} } \end{minipage}\hspace{.03\linewidth}% \begin{minipage}[t]{.21\linewidth} Tony Lelièvre \vspace{0.2cm} \includegraphics[height=1.2cm,width=\linewidth,keepaspectratio]{figures/collaborators/enpc.png} \flushleft \scriptsize CERMICS \& Inria \end{minipage}\hspace{.1\linewidth}%% \vspace{.5cm} \begin{minipage}[t]{.2\linewidth} \centering \raisebox{\dimexpr-\height+\ht\strutbox}{% \includegraphics[height=\linewidth]{figures/collaborators/gabriel.jpg} } \end{minipage}\hspace{.01\linewidth}% \begin{minipage}[t]{.24\linewidth} Gabriel Stoltz \vspace{0.2cm} \includegraphics[height=1.2cm,width=\linewidth,keepaspectratio]{figures/collaborators/enpc.png} \flushleft \scriptsize CERMICS \& Inria \end{minipage} \end{figure} \end{frame} \subsection{Background and problem statement} \begin{frame} {The sampling problem} \begin{block} {Objective of the sampling problem} Calculate averages with respect to \[ \mu = \frac{\e^{-V}}{Z}, \qquad Z = \int_{\torus^d} \e^{-V}. \] \vspace{-.4cm} \end{block} \vspace{-.2cm} \textbf{Often in applications}: \begin{itemize} \item The dimension $d$ is large; \item The normalization constant $Z$ is unknown; \item We cannot generate i.i.d.\ samples from~$\mu$. \end{itemize} \textbf{Markov chain Monte Carlo (MCMC) approach}: \[ I := \mu(f) \approx \mu^T (f) := \frac{1}{T} \int_{0}^{T} f(Y_t) \, \d t \] for a Markov process $(Y_t)_{t\geq 0}$ that is \emph{ergodic} with respect to~$\mu$. \textbf{Example}: \emph{overdamped Langevin} dynamics \[ \d Y_t = -\nabla V(Y_t) \, \d t + \sqrt{2} \, \d W_t, \qquad Y_0 = y_0. \] \end{frame} \begin{frame} {Importance sampling in the MCMC context} If $(X_t)_{t \geq 0}$ is a Markov process ergodic with respect to \[ \mu_{U} = \frac{\e^{-V - U}}{Z_U}, \qquad Z_U = \int_{\torus^d} \e^{-V-U}, \] then $I = \mu(f)$ may be approximated by \begin{equation*} \label{eq:estimator} \mu^T_U(f) := \frac {\displaystyle \frac{1}{T} \int_0^T (f \e^U)(X_t) \, \d t} {\displaystyle \frac{1}{T} \int_0^T(\e^U)(X_t) \, \d t}. \end{equation*} \textbf{Markov process}: \emph{overdamped Langevin} dynamics \[ \d X_t = -\nabla (V+U)(X_t) \, \d t + \sqrt{2} \, \d W_t, \qquad X_0 = x_0. \] \textbf{Asymptotic variance}: Under appropriate conditions, it holds that \[ \sqrt{T} \bigl( \mu^T_U(f) - I \bigr) \xrightarrow[T \to \infty]{\rm Law} \mathcal N\bigl(0, \sigma^2_f[U]\bigr). \] \begin{block} {Objective} Find $U$ such that the asymptotic variance $\sigma^2_f[U]$ is minimized. \end{block} \end{frame} \begin{frame} {Background: importance sampling in the i.i.d.\ setting (1/2)} Given i.i.d.\ samples $\{X^1, X^2, \dotsc\}$ from $\mu_U$, we define \[ \mu_U^N(f) := \displaystyle \frac {\sum_{n=1}^{N} (f \e^U)(X^{n})} {\sum_{n=1}^{N} (\e^U)(X^{n})} = I + \displaystyle \frac {\frac{1}{N} \sum_{n=1}^{N} \left((f-I) \e^U\right)(X^{n})} {\frac{1}{N} \sum_{n=1}^{N} (\e^U)(X^{n})}, \] \textbf{Numerator:} by the \emph{central limit theorem}, \[ \frac{1}{\sqrt{N}} \sum_{n=1}^{N} \left((f-I) \e^U\right) (X^{n}) \xrightarrow[N \to \infty]{\rm Law} \mathcal N\left(0, \int_{\torus^d} \abs*{(f-I) \e^U}^2 \, \d \mu_{U}\right) \] \textbf{Denominator:} by the strong law of large numbers, \[ \frac{1}{N} \sum_{n=1}^{N} \left(\e^U\right)\left(X^{n}\right) \xrightarrow[N \to \infty]{\rm a.s.} \frac{Z}{Z_U}. \] \textbf{Therefore}, by Slutsky's theorem, \[ \sqrt{N} \bigl( \mu^N_U(f) - I\bigr) \xrightarrow[T \to \infty]{\rm Law} \mathcal N\bigl(0, s^2_f[U]\bigr), \qquad s^2_f[U] := \frac{2 Z_U^2}{Z^2} \int_{\torus^n} \bigl\lvert (f-I) \e^U \bigr\rvert^2 \, \d \mu_{U}. \] \end{frame} \begin{frame} {Background: importance sampling in the i.i.d.\ setting (2/2)} By the Cauchy--Schwarz inequality, it holds that \[ s^2_f[U] \geq \frac{2Z_U^2}{Z^2} \left( \int_{\torus^d} \abs{f-I} \e^U \, \d \mu_{U} \right)^2, \] with equality when $\abs{f-I} \e^U$ is constant. \begin{block} {Optimal importance distribution} The \emph{optimal $\mu_U$} in the i.i.d.\ setting is \[ \mu_{U} \propto \abs{f-I} \e^{-V} \] \end{block} \textbf{Objectives}: \begin{itemize} \item Is there a counterpart of this formula in the \emph{MCMC setting}? \item If not, can we approximate the optimal distribution numerically? \end{itemize} \end{frame} \subsection{Minimizing the asymptotic variance for one observable} \begin{frame} {Formula for the asymptotic variance} Let $\mathcal L_U$ denote the generator of the Markov semigroup associated to the modified potential; \[ \mathcal L_U = - \nabla (V + U) \cdot \nabla + \Delta. \] \begin{block} {Limit theorem} Under appropriate conditions, it holds that \[ \sqrt{T} \bigl( \mu^T_U(f) - I\bigr) \xrightarrow[T \to \infty]{\rm Law} \mathcal N\bigl(0, \sigma^2_f[U]\bigr). \] The \emph{asymptotic variance} is given by \[ \sigma^2_f[U] = \frac{2Z_U^2}{Z^2}\int_{\torus^d} \phi_U (f-I) \, \e^U \, \d\mu_{U}, \] where $\phi_U$ is the unique solution in~$H^1(\mu_{U}) \cap L^2_0(\mu_{U})$ to \[ -\mathcal L_U \phi_{U} = (f- I) \e^U. \] \end{block} \textbf{Main ideas of the proof:} central limit theorem for martingales, Slutsky's theorem. \end{frame} \begin{frame} {Explicit optimal $U$ in dimension 1} In \emph{dimension one}, it holds that \begin{equation} \label{eq:lower_bound_asymvar} \sigma^2_f[U] \geq \frac{2}{Z^2} \inf_{A \in \real} \bigg(\int_{\torus} \bigl\lvert F(x) + A \bigr\rvert \d x \bigg)^2. \end{equation} where \[ F(x) := \int_0^x \bigl( f(\xi)-I \bigr) \e^{-V(\xi)}\d \xi. \] This inequality~\eqref{eq:lower_bound_asymvar} is an equality for \[ U(x) = U_*(x) = - V(x) -\ln\abs*{F(x) + A_*}, \] where $A_*$ is the constant achieving the infimum in~\eqref{eq:lower_bound_asymvar}. \begin{itemize} \item The potential $U_*$ is generally \alert{singular}: impractical for numerics\dots \item The lower bound in~\eqref{eq:lower_bound_asymvar} can be approached by a smooth~$U$. \end{itemize} \end{frame} \begin{frame} {Example (1/2)} Assume that $V = 0$ and $f(x) = \cos(x)$. \begin{figure}[ht] \centering \includegraphics[width=0.8\linewidth]{figures/driftopt/1d_optimal_cosine.pdf} \label{fig:optimal_perturbation_potential} \end{figure} $\rightsquigarrow$ The optimal potential ``divides'' the domain into two parts. \end{frame} \begin{frame} {Example (2/2)} Assume that $V(x) = 5\cos(2 x)$ and~$f(x) = \sin(x)$. The target measure is \alert{multimodal}. \begin{figure}[ht] \centering \includegraphics[width=0.8\linewidth]{figures/driftopt/1d_optimal_metastable.pdf} \label{fig:optimal_perturbation_potential_1d_metastable} \end{figure} \emph{Variance reduction} by a factor $> 1000!$ \end{frame} \begin{frame} {Finding the optimal $U$ in the multidimensional setting} In \emph{dimension one}, it holds that \begin{equation} \label{eq:lower_bound_asymvar} \sigma^2_f[U] \geq \frac{2}{Z^2} \inf_{A \in \real} \bigg(\int_{\torus} \bigl\lvert F(x) + A \bigr\rvert \d x \bigg)^2. \end{equation} where \[ F(x) := \int_0^x \bigl( f(\xi)-I \bigr) \e^{-V(\xi)}\d \xi. \] This inequality~\eqref{eq:lower_bound_asymvar} is an equality for \[ U(x) = U_*(x) = - V(x) -\ln\abs*{F(x) + A_*}, \] where $A_*$ is the constant achieving the infimum in~\eqref{eq:lower_bound_asymvar}. \begin{itemize} \item The potential $U_*$ is generally \alert{singular}: impractical for numerics\dots \item The lower bound in~\eqref{eq:lower_bound_asymvar} can be approached by a smooth~$U$. \end{itemize} \end{frame} \appendix \begin{frame}[noframenumbering,plain] {Connection with the asymptotic variance of MCMC estimators} \textbf{Ergodic theorem\footfullcite{MR885138}}: for an observable $\varphi \in L^1(\mu)$, \[ \widehat \varphi_t = \frac{1}{t} \int_{0}^{t} \varphi(\vect q_s, \vect p_s) \, \d s \xrightarrow[t \to \infty]{a.s.} \expect_{\mu} \varphi. \] \textbf{Central limit theorem\footfullcite{MR663900}}: If the following \emph{Poisson equation} has a solution $\phi \in L^2(\mu)$, \[ - \mathcal L \phi = \varphi - \expect_{\mu} \varphi, \] then a central limit theorem holds: \[ \sqrt{t} \bigl(\widehat \varphi_t - \expect_{\mu}\varphi\bigr) \xrightarrow[t \to \infty]{\rm Law} \mathcal N(0, \sigma^2_{\varphi}), \qquad \sigma^2_{\varphi} = \ip{\phi}{\varphi - \expect_{\mu} \varphi}. \] \textbf{Connection with effective diffusion}: Apply this result with $\varphi(\vect q, \vect p) = \vect e^\t \vect p$. \end{frame} \end{document} % vim: ts=2 sw=2