%%
%% Original author:
%% Sean Bone
%% http://weblog.zumguy.com
%%
% Use extarticle for the smaller font size:
% https://tex.stackexchange.com/questions/5339/how-to-specify-font-size-less-than-10pt-or-more-than-12pt
\documentclass[a4paper, 8pt]{extarticle}
\usepackage{multicol}
\usepackage[margin=0.5cm]{geometry}
\usepackage{enumitem}
% Change list margins - https://tex.stackexchange.com/questions/10684/vertical-space-in-lists
\setlist{leftmargin=3mm, nosep}
% or \setlist{noitemsep} to leave space around whole list
\usepackage{amsmath}
\usepackage{mathtools}
\usepackage{amssymb}
\usepackage{graphicx}
\title{Dynamic Programming and Optimal Control HS18}
\author{Sean Bone \\ http://weblog.zumguy.com/}
% Expected value
\DeclareMathOperator*{\E}{\mathbb{E}}
% argmin
\DeclareMathOperator*{\argmin}{arg\,min}
% argmax
\DeclareMathOperator*{\argmax}{arg\,max}
\begin{document}
\begin{multicols*}{3}
\maketitle
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{General problem}
\subsubsection*{Dynamics}
$$x_{k+1} = f_k(x_k, u_k, w_k), \quad k = 0,...,N-1$$
\begin{itemize}
\item $k$: discrete time index, or stage;
\item $N$: given time horizon;
\item $x_k \in \mathcal{S}_k$: system state vector at time $k$;
\item $u_k \in \mathcal{U}_k$: control input vector at time $k$;
\item $w_k$: random disturbance vector at time $k$, conditionally independent with all prior $x_l, u_l, w_l, l < k$. The conditional probability distribution of $w_k$ is known given $x_k, u_k$;
\item $f_k(\cdot,\cdot,\cdot)$: function capturing system evolution at time $k$.
\end{itemize}
\subsubsection*{Cost function}
$$\underbrace{g_N(x_N)}_{\text{terminal cost}} + \underbrace{\sum_{k=0}^{N-1}\underbrace{g_k(x_k,u_k,w_k)}_\text{stage cost}}_\text{accumulated cost}$$
\subsection{Control strategies}
\subsubsection*{Open-loop}
Given an initial state $x_0$, find a \emph{fixed} sequence of control inputs $U = (u_0,...,u_{N-1})$ that minimizes the expected cost:
$$\E_{(X_1,W_0|x_0)} \left[ g_N(x_N) + \sum_{k=0}^{N-1}g_k(x_k,u_k,w_k) \right]$$
\subsubsection*{Closed-loop}
Define
\begin{align*}
u_k = &\mu_k(x_k), \quad u_k\in\mathcal{U}_k, k = 0, ..., N-1,\\
\pi \coloneqq &(\mu_0(\cdot), ..., \mu_{N-1}(\cdot)),
\end{align*}
\noindent where $\pi$ is called \emph{admissible policy}. Given an initial state $x_0$, the expected cost is now:
\begin{multline*}
J_\pi \coloneqq \E_{(X_1,W_0|x_0)} \\
\left[ g_N(x_N) + \sum_{k=0}^{N-1}g_k(x_k,\mu_k(x_k),w_k) \right]
\end{multline*}
Let $\Pi$ be the set of all admissible policies. We seek the the \emph{optimal policy} $\pi^*$ with the \emph{optimal cost}:
$$J^* \coloneqq J_{\pi^*}(x) \leq J_\pi(x) \quad \forall \pi\in\Pi, \forall x\in\mathcal{S}_0.$$
\subsection{Discrete states}
If $x_k$ takes on discrete values or is finite, we usually express the dynamics in terms of \emph{transition probabilities}:
\begin{multline*}
P_{ij}(u,k) \coloneqq \text{Pr}(x_{k+1} = j | x_k=i,u_k=u)\\
= p_{x_{k+1} | x_k,u_k}(j|i,u),
\end{multline*}
where $p_{x_k+1 | x_k,u_k}(\cdot|\cdot,\cdot)$ denotes the PDF of $x_{k+1}$ given $x_k$ and $u_k$. This is equivalent to the dynamics:
$$x_{k+1} = w_k$$
where $w_k$ has the following probability distribution:
$$p_{w_k | x_k,u_k}(j|i,u) = P_{ij}(u,k)$$
Conversely, given $x_{k+1} = f_k(x_k,u_k,w_k)$ and $p_{w_k | x_k,u_k}(\cdot|\cdot,\cdot)$, then
$$P_{ij}(u,k) = \sum_{\{\bar w_k | f_k(i,u,\bar w_k) = j\}} p_{w_k | x_k,u_k}(\bar w_k |i,u),$$
that is, $P_{ij}(u,k)$ is equal to the sum over the probabilities of all possible disturbances $\bar w_k$ that get us to state $j$ from state $i$ with control input $u$ at time $k$.
\subsection{DPA}
\subsubsection*{Principle of optimality}
If $\pi^* = (\mu_0^*(\cdot), ..., \mu_{N-1}^*(\cdot))$ is an optimal policy, then the truncated policy $\pi = (\mu_i(\cdot), ..., \mu_{N-1}(\cdot))$ minimizes the cost from stage $i$ to $N$.
\noindent This provides the intuition as to why the control inputs selected by the following algorithm constitute the optimal policy:
\begin{enumerate}
\item Initialization
$$J_N(x) = g_N(x), \quad \forall x \in \mathcal{S}_N$$
\item Recursion
\end{enumerate}
\begin{multline*}
J_k(x) \coloneqq \min_{u\in\mathcal{U}_k(x)} \E_{(w_k | x_k=x,u_k=u)} \\
\left[ g_k(x_k,u_k,w_k) + J_{k+1}(f_k(x_k,u_k,w_k)) \right],\\
\forall y \in \mathcal{S}_k, k=N-1,...,0.
\end{multline*}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Infinite Horizon Problems}
Consider the same problem as before, but with time-invariant state evolution and stage costs:
\begin{align*}
x_{k+1} &= f(x_k,u_k,w_k), \forall x_k\in \mathcal{S}, u_k\in \mathcal{U},\\
\text{cost } &= \sum_{k=0}^{N-1}g(x_k,u_k,w_k).
\end{align*}
Assuming the cost converges as $N \to \infty$, problem simplifies to finding the optimal \emph{stationary} policy by means of the \emph{Bellman Equation} (BE):
\begin{multline*}
J(x) = \min_{u\in\mathcal{U}(x)} \E_{(w|x=x,u=u)}\\
\left[ g(x,u,w) + J(f(x,u,w))\right], \quad \forall x\in \mathcal{S}
\end{multline*}
\subsection{SSP}
The Stochastic Shortest Path problem is a class of problems for which solving the BE yields the optimal cost-to-go and stationary policy.
\subsubsection*{Dynamics}
Given a finite set $\mathcal{S}$ and $\mathcal{U}(x)$ for all $x \in \mathcal{S}$, consider the finite state, time-invariant system:
\begin{align*}
&x_{k+1} = w_k, \quad x_k \in \mathcal{S},\\
&\text{Pr}(w_k = j | x_k=i,u_k=u) = P_{ij}(u),\; u\in\mathcal{U}(i)
\end{align*}
\subsubsection*{Terminal state}
In order for the cost to be meaningful, there must be a \emph{terminal state}, designated as $0$, with:
$$P_{00}(u) = 1 \text{ and } g(0,u,0) = 0, \quad \forall u \in \mathcal{U}(0).$$
\noindent We assume there is at least one proper policy, and that improper policies will have infinite cost for at least one starting state. For a policy to be \emph{proper}, there must be at least one $m$ for which
$$\text{Pr}(x_m = 0 | x_0 = i) > 0, \quad \forall i \in \mathcal{S}.$$
\subsection{Discounted problems}
Consider an SSP with no explicit termination state and a cost discount factor $\alpha \in ]0,1[$:
$$\tilde J_{\tilde\pi}(i) = \E_{(\tilde X_1,\tilde W_0 | \tilde x_0=i)} \left[ \sum_{k=0}^{N-1} \alpha^k \tilde g(\tilde x_k,\tilde \mu_k,\tilde w_k) \right]$$
We can convert this problem to an SSP by adding a virtual termination state.
\noindent\textbf{State:} $x_k \in \mathcal{S} = \mathcal{\tilde S} \cup \{0\}$\\
\textbf{Control:} $\mathcal U(x_k) = \tilde{\mathcal U}(x_k),\, \mathcal U(0) = \{\texttt{stay}\}$\\
\textbf{Dynamics:} $x_{k+1} = w_k$ where $\forall u,\forall i,j$:
\begin{align*}
p_{w|x,u}(j|i,u) &= P_{ij}(u) = \alpha \tilde P_{ij}(u),\\
p_{w|x,u}(0|i,u) &= P_{i0}(u) = 1-\alpha,\\
p_{w|x,u}(j|0,\texttt{stay}) &= P_{0j}(\texttt{stay}) = 0,\\
p_{w|x,u}(j|i,u) &= P_{00}(u) = 1.
\end{align*}
\noindent\textbf{Cost:} $\forall u_k,\forall x_k,w_k$
\begin{align*}
g(x_k,u_k,w_k) &= \frac{1}{\alpha}\tilde g(x_k,u_k,w_k),\\
g(x_k,u_k,0) &= 0,\\
g(0,\texttt{stay},0) &= 0.
\end{align*}
From this we can derive the Bellman Equation for the original problem:
$$\mkern-36mu
J^*(i) = \min_{u\in\tilde{\mathcal{U}}(i)}\left( q(i,u) + \alpha\sum_{j=1}^{n} \tilde P_{ij}(u)J^*(j) \right),\, \forall i \in \tilde{\mathcal{S}}$$
$$q(i,u) = \sum_{j=1}^n\tilde P_{ij}(u)\tilde g(i, \mu_k(i), j)$$
\subsection{Solving the BE}
\subsubsection*{Value Iteration (VI)}
Given any initial conditions $V_0(\cdot)$, the following sequence converges to the optimal cost $J^*(\cdot)$ which uniquely solves the BE, and the corresponding $u$ for each $i$ constitute the optimal policy:
\begin{multline*}
V_{l+1} = \min_{u\in\mathcal{U}(i)} \left[ q(i,u) + \sum_{j=1}^n P_{ij}(u)V_l(j) \right] \\
\forall i \in \mathcal{S}^+ = \mathcal{S} \setminus \{0\}
\end{multline*}
$$q(i,u) \coloneqq \E_{(w|x=i,u=u)} [g(i,u,w)]$$
\subsubsection*{Policy Iteration (PI)}
\textbf{0. Initialize} with a proper policy.\\
\textbf{1. Policy evaluation:} $\forall i \in \mathcal{S}^+$,
$$J_{\mu^h}(i) = q(i,\mu^h(i)) + \sum_{j=1}^nP_{ij}(\mu^h(i))J_{\mu^h}(j)$$
\noindent \textbf{2. Policy improvement:} $\forall i \in \mathcal{S}^+$,
$$\mu^{h+1}(i) = \argmin_{u \in\mathcal{U}(i)}
\left( q(i,u) + \sum_{j=1}^n P_{ij}(u)J_{\mu^h}(j) \right)$$
\noindent Repeat 1 and 2 until $J_{\mu^{h+1}}(i) = J_{\mu^h}(i) \; \forall i$
\subsubsection*{Linear Programming (LP)}
The solution to the following optimization program solves the BE:
\begin{align*}
maximize& \; \sum_{i\in\mathcal{S}^+} V(i) \\
subject\ to& \; V(i) \leq q(i,u) + \sum_{j=1}^n P_{ij}(u)V(j),\\
&\; \forall u\in\mathcal{U}(i), \, \forall i\in\mathcal{S}^+
\end{align*}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Shortest paths}
\subsubsection*{SP problem}
Consider a graph with vertices $\mathcal V$ and weighted edges $\mathcal C$. $\mathbb{Q}_{s,t}$ is the set of possible $s,t$ paths, and a path $Q$ has cost $J_Q$. We seek the path with lowest cost $Q^* = \argmin_{Q\in\mathbb Q_{s,t}} J_Q$. For this problem to have a solution the graph may not contain negatives cycles: $\forall i\in\mathcal V, \forall Q\in\mathbb Q_{i,i}: J_Q \geq 0$.
\subsubsection*{DFS problem}
A Deterministic Finite State is a problem like the general case, but without $w_k$ and where each $\mathcal{S}_k$ is finite. A closed loop approach offers no advantage in a deterministic problem, but we can still solve it with DPA.
\subsubsection*{DFS to SP}
A DFS problem can be converted to an SP problem by creating a ``layered" graph with layers $k = 0,...,N+1$. The first layer only contains the node $(0,x_0)$ and is the starting position. Nodes in layers $k = 1,...,N$ have nodes $(k,x_k),\, x_k\in\mathcal S_k$. Connections are between consecutive layers $k\rightarrow k+1$ and have weights
$$c = \min_{u\in\mathcal U_k(x_k) | x_{k+1}=f_k(x_k,u_k)} g_k(x_k,u)$$
The final layer contains just a virtual termination state $t$, and connections $N\rightarrow N+1$ are weighted with terminal costs $g_N(x_N)$.
\subsubsection*{SP to DFS}
Since there are no negative cycles, an optimal $s,t$ path will have at most $|\mathcal V|$ elements. We set $c_{i,i} = 0$ and formulate the problem as an $N = |\mathcal V|-1$ stage DFS:
\begin{itemize}
\item $\mathcal S_0 = \{s\},\, \mathcal S_k = \mathcal V \setminus \{t\},\, \mathcal S_N = \{t\}$
\item $\mathcal U_k = \mathcal V\setminus \{t\},\, \mathcal U_{N-1} = \{t\}$
\item $x_{k+1} = u_k,\, u_k\in \mathcal U_k,\, k=0,...,N-1$
\item $g_k(x_k,u_k) = c_{x_k,u_k}, \, g_N(t) = 0$
\end{itemize}
\subsection{LCA}
The SP problem can be solved more efficiently with the Label Correcting Algorithm for a single starting node.
\begin{enumerate}
\setcounter{enumi}{-1}
\item Place node $s$ in \texttt{OPEN}, set $d_s = 0,\, d_j=\infty \, \forall j \in \mathcal V \setminus \{s\}$.
\item Remove node $i$ from \texttt{OPEN} and run step 2 for every child $j$ of $i$.
\item If $d_i + c_{i,j} < d_j$ and $d_i + c_{i,j} < d_t$ set $d_j = d_i + c_{i,j}$ and set $i$ as the parent of $j$. If $j \neq t$, put $j$ in \texttt{OPEN}.
\item Repeat from step 1 while $\texttt{OPEN} \neq \emptyset$.
\end{enumerate}
\subsubsection*{Traversal order}
\begin{itemize}
\item Depth-first: always remove the newest element of \texttt{OPEN};
\item Breadth-first: always remove the oldest element of \texttt{OPEN};
\item Best-first: always remove the element with lowest cost $d_i$.
\end{itemize}
\subsection{A* algorithm}
Perform LCA, and at each step 2, formulate some lower bound $h_j \geq 0$ of the $j,t$ distance. Then change the condition $d_i + c_{i,j} < d_t$ to $d_i + c_{i,j} + h_j < d_t$.
\subsection{HMMs and Viterbi algorithm}
Consider a \emph{Markov chain}: $\mathcal S = \{1, ..., n\}$ finite and $p_{w_k|x_k}$ given,
\begin{align*}
x_{k+1} = w_k,\quad x_k \in \mathcal S,\\
P_{ij} \coloneqq p_{w|x}(j|i), \quad \forall i,j\in \mathcal S.
\end{align*}
When a state transition occurs, we obtain measurements
$$M_{ij}(z) \coloneqq p_{z|x,w}(z|i,j),\quad z \in \mathcal Z.$$
Note that, given $x_k$ and $x_{k-1}$, $z_k$ is independent of all prior variables.
Defining $X_i = (x_i, ..., x_N)$ and $Z_i = (z_i, ..., z_N)$, we wish to find:
\begin{align*}
\hat X_0 = \argmax_{X_0}p(X_0|Z_1)\\
p(X_0, Z_1) = ... = p(x_0) \prod_{k=1}^N P_{x_{k-1}x_k}M_{x_{k-1}x_k}(z_k)
\end{align*}
This problem is solved by the SP problem:
$$\min_{X_0} \left( c_{(\texttt{s},x_0)} + \sum_{k = 1}^N c_{(k-1,x_{k-1}),(k,x_k)} \right)$$
where
\begin{align*}
c_{(\texttt{s},x_0)} &= \left\{ \begin{matrix}
-\ln(p(x_0)) & \text{if } p(x_0) > 0 \\
\infty & \text{if } p(x_0) = 0
\end{matrix} \right. \\
c_{(k-1,x_{k-1}),(k,x_k)} &=
\left\{ \begin{matrix}
-\ln(P_{x_{k-1}x_k}M_{x_{k-1}x_k}(z_k)) \\
\text{if } P_{x_{k-1}x_k}M_{x_{k-1}x_k}(z_k) > 0 \\
\infty \quad \text{otherwise}
\end{matrix} \right.
\end{align*}
This is a ``layered" graph where each node represents a state $x$ at time $k$. An artificial terminal node is added, connected to by layer $k = N$ at zero cost.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Deterministic continuous time}
$$\dot x(t) = f(x(t), u(t)), \quad 0 \leq t \leq T$$
$$u(t) = \mu(\text x, t), \quad u(t)\in\mathcal U, \forall t \in [0,T], \forall \text x\in\mathcal S$$
$$J_{\mu}(t,\text x) = h(x(T)) + \int_0^T g(x(\tau), u(\tau)) \text d\tau$$
\textbf{Assumption:} for any admissible control law $\mu$, initial time $t$ and initial condition $x(t)\in\mathcal S$, there exists a unique state trajectory $x(\tau)$ that satisfies
$$\dot x(\tau) = f(x(\tau), u(\tau)), \quad t\leq\tau\leq T$$
\subsection{HJB Equation}
The Hamilton-Jacobi-Bellman equation is a sufficient but not necessary condition for optimality. If $V(t,x)$ is a solution to
\begin{multline*}
\min_{u\in\mathcal U} \left[ g(x,u) + \frac{\partial V}{\partial t} + \frac{\partial V}{\partial x}f(x,u)\right] = 0 \\
\forall x\in\mathcal S, 0\leq t\leq T \\
\text{s.t. } V(T,x) = h(x) \quad \forall x\in\mathcal S
\end{multline*}
then it is the optimal cost-to-go function, and the minimizing $\mu(t,x)$ is an optimal feedback law.
\subsection{Minimum principle}
\textbf{Assumption:} $f,g,h \in C^1$ in $\text x$
\noindent \textbf{Pontryagin's minimum principle:} for a given i.c. $x(0) = \text x \in \mathcal{S}$, let $u(t)$ be an optimal control trajectory with associated state trajectory $x(t)$. Then there exists a trajectory $p(t)$ such that with $H(\text{x, u, p}) \coloneqq g(\text{x, u}) + \text p^\top f(\text{x, u})$:
\vspace{-1em}
\begin{multline*}
\dot p(t) = \left. -\frac{\partial H}{\partial x} \right|_{x(t),u(t),p(t)}^\top , \, p(T) = \left. \frac{\partial h(\text x)}{\partial x}\right|_{x(T)}^\top \\
u(t) = \argmin_{\text u\in\mathcal U} H(x(t),\text u, p(t)) \\
H(x(t),u(t),p(t)) = const. \, \forall t\in[0,T]
\end{multline*}
\textbf{Fixed terminal state:} Replace $p(T) = \left. \frac{\partial h(\text x)}{\partial x}\right|_{x(T)}^\top$ with $x(T) = x_T$.
\textbf{Free initial state:} instead of $x(0) = x_0$, a cost $l(x(0))$ is given. Add the condition: $p(0) = \left. \frac{\partial l}{\partial x} \right|_{x(0)}^\top$.
\textbf{Free terminal time:} we get $H(x(t),u(t),p(t)) = 0 \, \forall t\in[0,T]$
\textbf{Time-varying system and cost:} if $f$ and/or $g$ depend on $t$, we lose that $H = const.$
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Non-standard problems}
Some problems are not in the general (discrete-time) form, but can be reformulated as such.
\subsection{Time lags}
If the dynamics have a similar form: $x_{k+1} = f_k(x_k, x_{k-1}, u_k, u_{k-1}, w_k)$ we can contruct a state vector $\tilde x_k = (x_k, y_k, s_k)$ and modify the dynamics:
$$\tilde x_{k+1} = \tilde f_k(\tilde x_k, u_k, w_k) \coloneqq \left[\begin{matrix}
f_k(x_k, y_k, u_k, s_k, w_k)\\
x_k\\
u_k
\end{matrix}\right]$$
\subsection{Correlated Disturbances}
Disturbances $w_k$ correlated over time can sometimes be modeled as
$$w_k = C_ky_{k+1}, \quad y_{k+1} = A_ky_k + \xi_k$$
Where $A_k$ and $C_k$ are given and $\xi_k, \, k = 0,...,N-1$ are independent random variables. Then we can augment the state as $\tilde x_k \coloneqq (x_k, y_k)$ and update the dynamics:
$$\tilde x_{k+1} = \tilde f_k(\tilde x_k, u_k, \xi_k) \coloneqq \begin{bmatrix}
f_k(x_k, u_k, C_k(A_ky_k + \xi_k))\\
A_ky_k + \xi_k
\end{bmatrix}$$
\subsection{Forecasts}
$w_k$ is independent of $x_k$ and $u_k$, and we get a forecast $y_k$ that $w_k$ will attain a distribution from a given family $\{p_{w_k|y_k}(\cdot | 1),...p_{w_k|y_k}(\cdot | m)\}$. If $y_k = i$, then $w_k \sim p_{w_k|y_k}(\cdot | i)$. The forecast itself has a given \emph{a priori} distribution:
$$y_{k+1} = \xi_k$$
where $\xi_k \sim p_{\xi_k}(i)$ are independent random variables.
Augmented state vector: $\tilde x_k \coloneqq (x_k, y_k)$, disturbance $\tilde w_k \coloneqq (w_k, \xi_k)$ with distribution
$$p(\tilde w_k|\tilde x_k, u_k) = p(w_k|y_k)p(\xi_k)$$
Dynamics:
$$\tilde x_{k+1} = \tilde f_k(\tilde x_k, u_k, \tilde w_k) \coloneqq \begin{bmatrix}
f_k(x_k, u_k, w_k)\\
\xi_k
\end{bmatrix}$$
The DPA then becomes:
\begin{align*}
J_N(\tilde{\text x}) &= J_N(\text x, \text y) = g_N(\text x), \quad \text x\in\mathcal{S}_N, \text y \in \{1, ..., m\}\\
J_k(\tilde{\text x}) &= J_k(\text x, \text y) = \min_{\text u\in\mathcal{U}_k(x_k)} \E_{(w_k | y_k = \text y)} \\
&\left[ g_k(\text x, \text u, w_k) + \sum_{i=1}^m p_{\xi_k}(i) J_{k+1}(f_k(\text x, \text u, w_k), i) \right] \\
&\forall \text x \in \mathcal S_k, \, \forall \text y \in \{1, ..., m\}, \, \forall k = N-1, ..., 0.
\end{align*}
\end{multicols*}
\end{document}