\documentclass[10pt]{article}
\usepackage{amsfonts,amsthm,amsmath,amssymb}
\usepackage{array}
\usepackage{epsfig}
\usepackage{fullpage}
\usepackage{color}
\usepackage[colorlinks=true,citecolor=blue]{hyperref}
\usepackage{tikz}
\usepackage{subfig}
%%%%%%%%%%%%%% VINOD's DEFS %%%%%%%%%%%%%%%%%%%%
\newcommand{\eqdef}{\stackrel{\small \sf def}{=}}
\newcommand{\lat}{\mathcal{L}}
\newcommand{\Span}{\mathsf{Span}}
\newcommand{\ppd}{\mathcal{P}}
\newcommand{\Z}{\mathbb{Z}}
\newcommand{\Q}{\mathbb{Q}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\N}{\mathbb{N}}
\newcommand{\matB}{\mathbf{B}}
\newcommand{\matBt}{\tilde{\matB}}
\newcommand{\matU}{\mathbf{U}}
\newcommand{\matV}{\mathbf{V}}
\newcommand{\vecbt}{\tilde{\vecb}}
% Inner product
\newcommand{\inner}[1]{\langle {#1} \rangle}
% Red text
\newcommand{\red}[1]{\textcolor{red}{#1}}
\begin{document}
\input{preamble.tex}
\lecture{1}{September 13, 2011}{Vinod Vaikuntanathan}{Vinod Vaikuntanathan}
%%%% body goes in here %%%%
Lattices are amazing mathematical objects with applications all over mathematics and theoretical computer science. Examples include
\begin{itemize}
\item \textbf{Sphere Packing:} A classical problem called the ``Sphere Packing Problem'' asks for a way to pack the largest number of spheres of equal volume in $3$-dimensional space (in an asymptotic sense, as the volume of the available space goes to infinity). The so-called {\em Kepler's Conjecture}, recently turned into a theorem by Hales, states that the face-centered cubic lattice offers the optimal packing of spheres in $3$ dimensions.
The optimal sphere packing in $2$ and $3$ dimensions are lattice packings -- could this be the case in higher dimensions as well? This remains a mystery.
\item \textbf{Error Correcting Codes:} Generalizing to $n$ dimensions, the sphere packing problem and friends have applications to constructing {\em error-correcting codes} with the optimal rate.
\item \textbf{Number Theory:} In mathematics, the study of lattices is called the ``Geometry of Numbers'', a term coined by Hermann Minkowski. Minkowski's Theorem and subsequent developments have had an enormous impact on Number Theory, Functional Analysis and Convex Geometry. Lattices have been used to test various number theoretic conjectures, the most famous being a disproof of Merten's Conjecture by Odlyzko and te Riele in 1985.
\end{itemize}
\medskip \noindent
Lattices have also been quite influential in Theoretical Computer Science:
\begin{itemize}
\item In {\bf Algorithms:} The famed Lenstra-Lenstra-Lov\'{a}sz algorithm for the shortest vector problem has generated a treasure-trove of algorithmic applications. Lattices have been used to construct an Integer Linear Programming algorithm in constant dimensions, in factoring polynomials over the rationals, and algorithms to find small solutions to systems of polynomial equations.
\item In {\bf Complexity Theory:} Lattices provide one of the most striking sources of problems with a worst-case to average-case connection. NP-hard problems are widely believed to be hard in the worst case, but are they hard on typical or average instances? For many problems and many average-case distributions, we know that this is not the case. In contrast, for the (approximate) shortest vector problem, we can show that finding a solution in a ``random lattice'' chosen from a certain easily sampleable distribution is as hard as finding a solution in the worst case, namely for arbitrary lattices.
\item In {\bf Cryptography:} The first applications of lattices in Cryptography have been in breaking cryptosystems, for example, variants of the knapsack cryptosystem, the NTRU cryptosystem and special cases of the RSA function. More recently, however, lattices have been used quite successfully in constructing secure cryptographic algorithms that achieve highly expressive functionalities such as fully homomorphic encryption.
\end{itemize}
In this course, we will study lattices from the point of view of theoretical computer science, first the mathematics of lattices, then the algorithms and complexity theory and finally lattice-based cryptography.
\paragraph{Notation.} We will denote the natural numbers by $\N$, integers by $\Z$, rationals by $\Q$ and the reals by $\R$.
\section{Lattices}
\begin{definition}[Lattices]\label{def:lattices}
Given $n$ {\em linearly independent} vectors $\vecb_1,\ldots,\vecb_n \in \mathbb{R}^m$, the lattice generated by them is defined as
\[ \lat(\vecb_1,\ldots,\vecb_n) \eqdef \bigg\{ \sum_{i=1}^n x_i \vecb_i \ | \ x_i \in \Z \bigg\} \]
\end{definition}
We call $\vecb_1,\ldots,\vecb_n$ a {\em basis} of the lattice.
Note that the definition requires $\vecb_1,\ldots,\vecb_n$ to be linearly independent over $\R$ (and not over $\Z$).
We call $n$ the {\em rank} of the lattice, and $m$ the {\em dimension} of the lattice. In general, $n \leq m$. When $n=m$, we call the lattice a {\em full-rank} lattice. Throughout this course, we will focus on full-rank lattices -- most results we prove can be generalized to the non full-rank case.
We will use a notational short-hand when dealing with bases, denoting them by a matrix $\matB$ whose columns are the basis vectors $\vecb_1,\ldots,\vecb_n$. That is, we will write
\[ \matB = \left( \begin{array}{ccc} \vert & & \vert \\
\vecb_1 & \ldots & \vecb_n \\
\vert & & \vert \end{array} \right)
\]
and thus, in this notation,
\[ \lat(\matB) \eqdef \{\matB \vecx \ |\ \vecx\in\Z^n\}\]
In general, we treat all vectors as column vectors unless otherwise specified. For a matrix $\matB$ (resp. row vector $\vecv$), $\matB^T$ (resp. $\vecv^T$) denotes the transpose of $\matB$ (resp. $\vecv$).
\begin{center}
\begin{figure}
%%%% Figure 1
\subfloat[The lattice $\Z^2$ with basis vectors $(0,1)$ and $(1,0)$.]{
\begin{tikzpicture}[scale=3]
\clip (-0.9,-1.2) rectangle (1.75,1.55);
\draw[step=.4cm,gray,very thin] (-0.9,-1.1) grid (1.4,1.4);
\draw[->] (-1.4,0) -- (1.4,0);
\draw[->] (0,-1.1) -- (0,1.5);
(0:30:3mm) -- cycle;
\draw[->,red,very thick] (30:1cm) (0,0) -- ++(0,0.4) node[anchor=west] {$b_2$};
\draw[->,blue,very thick] (30:1cm) (0,0) -- ++(0.4,0) node[anchor=south] {$b_1$};
\foreach \x in {-0.8,-0.4,0,0.4,0.8,1.2}
\foreach \y in {-0.8,-0.4,0,0.4,0.8,1.2}
\filldraw (\x,\y) circle (0.5pt);
\end{tikzpicture}
}
%%%% Figure 2
\subfloat[The lattice $\Z^2$ with a different basis consisting of vectors $(1,2)$ and $(2,3)$. In fact, any lattice has infinitely many bases.]{
\begin{tikzpicture}[scale=3]
\clip (-1.0,-1.2) rectangle (1.6,1.55);
\draw[step=.4cm,gray,very thin] (-0.9,-1.1) grid (1.4,1.4);
\draw[->] (-1.0,0) -- (1.4,0);
\draw[->] (0,-1.1) -- (0,1.5);
(0:30:3mm) -- cycle;
\draw[->,blue,very thick] (30:1cm) (0,0) -- ++(0.8,1.2) node[anchor=west] {$b_1$};
\draw[->,red,very thick] (30:1cm) (0,0) -- ++(0.4,0.8) node[anchor=south] {$b_2$};
\foreach \x in {-0.8,-0.4,0,0.4,0.8,1.2}
\foreach \y in {-0.8,-0.4,0,0.4,0.8,1.2}
\filldraw (\x,\y) circle (0.5pt);
\end{tikzpicture}
}
\newline
%%%% Figure 3
\subfloat[A full-rank lattice generated by the basis vectors $(1,1)$ and $(2,0)$. Note that this is a sub-lattice of $\Z^2$.]{
\begin{tikzpicture}[scale=3]
\clip (-0.9,-1.2) rectangle (1.75,1.55);
\draw[step=.4cm,gray,very thin] (-0.9,-1.1) grid (1.4,1.4);
\draw[->] (-1.4,0) -- (1.4,0);
\draw[->] (0,-1.1) -- (0,1.5);
(0:30:3mm) -- cycle;
\draw[->,blue,very thick] (30:1cm) (0,0) -- ++(0.8,0) node[anchor=west] {$b_1$};
\draw[->,red,very thick] (30:1cm) (0,0) -- ++(0.4,0.4) node[anchor=south] {$b_2$};
\foreach \x in {-0.8,0,0.8}
\foreach \y in {-0.8,0,0.8}
\filldraw (\x,\y) circle (0.5pt);
\foreach \x in {-0.4,0.4,1.2}
\foreach \y in {-0.4,0.4,1.2}
\filldraw (\x,\y) circle (0.5pt);
\end{tikzpicture}
}
%%%% Figure 4
\subfloat[A {\em non full-rank} lattice with basis vector $(1,1)$]{
\begin{tikzpicture}[scale=3]
\clip (-1.0,-1.2) rectangle (1.6,1.55);
\draw[step=.4cm,gray,very thin] (-0.9,-1.1) grid (1.4,1.4);
\draw[->] (-1.4,0) -- (1.4,0);
\draw[->] (0,-1.1) -- (0,1.5);
(0:30:3mm) -- cycle;
\draw[->,blue,very thick] (30:1cm) (0,0) -- ++(0.4,0.4) node[anchor=west] {$b_1$};
%\draw[->,red,very thick] (30:1cm) (0,0) -- ++(0.4,0.8) node[anchor=south] {$b_2$};
\foreach \x in {-0.8,-0.4,0,0.4,0.8,1.2}
\filldraw (\x,\x) circle (0.5pt);
\end{tikzpicture}
}
\caption{Various lattices and their bases.}
\end{figure}\label{fig:lat}
\end{center}
\paragraph{Examples of Lattices.}
\begin{enumerate}
\item Figure~\ref{fig:lat}\red{(a)} shows the lattice in $2$ dimensions generated by the vectors $(1,0)^T$ and $(0,1)^T$. This lattice is the set of all points in $\R^2$ with integer coordinates.
This can be generalized to $n$ dimensions, where the lattice $\Z^n$ is called the {\em integer lattice}.
\item Figure~\ref{fig:lat}\red{(b)} shows a different basis for the same lattice, namely the basis consisting of the vectors $(1,2)^T$ and $(2,3)^T$.
\item Figure~\ref{fig:lat}\red{(c)} shows a different lattice in $2$ dimensions, generated by the basis vectors $(2,0)^T$ and $(1,1)^T$. Note that this is a sub-lattice of $\Z^2$, namely a subset of $\Z^2$ which is also a lattice. (We will formally define sublattices later in the course).
\item In one dimension, all lattices are multiples of a single number. For example, the lattice generated by $(2)$ is the set of all even numbers.
\item All the examples we saw so far are full-rank lattices. Figure~\ref{fig:lat}\red{(d)} shows a lattice in $2$ dimensions generated by the vector $(1,1)^T$ -- this lattice has rank $1$. We will not deal with non full-rank lattices in this course.
\item The set of points generated by $(1)$ and $(\sqrt{2})$ in one dimension is not a lattice. First, this example does not conform to Definition~\ref{def:lattices} since $1$ and $\sqrt{2}$ are {\em linearly dependent} over $\R$. Secondly, any $n$-dimensional lattice is a discrete subset of $\Z^n$ (see Lecture $2$ for why this is the case). However, the set generated by $(1)$ and $(\sqrt{2})$ is not a discrete subset of $\Z$ since one can generate arbitrarily small numbers as linear combinations of $1$ and $\sqrt{2}$.
\end{enumerate}
It is instructive to compare the definition of a lattice generated by $n$ linearly independent vectors $\vecb_1,\ldots,\vecb_n$ to the definition of the span of these vectors.
\begin{definition}[Span]
Given $n$ {\em linearly independent} vectors $\vecb_1,\ldots,\vecb_n \in \mathbb{R}^m$, their {\em span} is defined as
\[ \Span(\vecb_1,\ldots,\vecb_n) \eqdef \bigg\{ \sum_{i=1}^n x_i \vecb_i \ | \ x_i \in \R \bigg\} \]
\end{definition}
Note the difference between Definition~\ref{def:lattices} of a lattice generated by a set of vectors -- which consists of all of its {\em integer} linear combinations -- and the above definition of the span of a set of vectors -- which consists of all of its linear combinations with {\em real} coefficients. The crucial power of lattices comes from the fact that it is a discrete set (which the span is not).
Clearly, $\Span(\vecb_1,\ldots,\vecb_n) \supset \lat(\vecb_1,\ldots,\vecb_n)$.
\section{Same Lattice, Many Bases}
We already saw from the examples above (Figure~\ref{fig:lat}\red{(a)} and Figure~\ref{fig:lat}\red{(b)}) that the same lattice can have many different bases. For example, it turns out that all the bases given below generate the same lattice, namely $\Z^2$:
\[ \matB_1 = \left( \begin{array}{cc} 1 & 0 \\ 0 & 1 \end{array} \right)
\hspace{0.2in} \mbox{and} \hspace{0.2in}
\matB_2 = \left( \begin{array}{cc} 2 & 1 \\ 1 & 1 \end{array} \right)
\hspace{0.2in} \mbox{and} \hspace{0.2in}
\matB_3 = \left( \begin{array}{cc} 647 & 64 \\ 91 & 9 \end{array} \right)
\]
but the following basis {\em does not} generate $\Z^2$, but only a proper {\em sub-lattice} of $\Z^2$.
\[
\matB_4 = \left( \begin{array}{cc} 42 & 41 \\ 9 & 8 \end{array} \right)
\]
In fact, any lattice has infinitely many bases. In particular, the bases can have arbitrarily large coefficients.
\medskip \noindent
A natural question to ask is: {\em how can we efficiently tell if two given bases $\matB$ and $\matB'$ generate the same lattice?} We will give two answers to this question -- an algebraic answer and a geometric answer.
\subsection{An Algebraic Characterization using Unimodular Matrices}
Our first characterization provides an efficient algorithm to determine if two bases generate the same lattice. In order to present the characterization, we first need to define the notion of a {\em unimodular matrix}.
\paragraph{Notation.} For any $x \in \R$, we will let $|x|$ represent the absolute value of $x$.
\begin{definition}\label{def:unimodular}
A matrix $\matU \in \Z^{n\times n}$ is unimodular if $|\det(\matU)| = 1$.
\end{definition}
\noindent
Here, $\det(\matU)$ denotes the determinant of the (square) matrix $\matU$, and $|\cdot|$ denotes the absolute value.
For example, the matrix $\bigl(\begin{smallmatrix} 2&1\\ 1&1 \end{smallmatrix} \bigr)$ is unimodular, and so is $\bigl(\begin{smallmatrix} 647&64\\ 91&9 \end{smallmatrix} \bigr)$, but not $\bigl(\begin{smallmatrix} 42&41\\ 9&8 \end{smallmatrix} \bigr)$.
\begin{proposition}\label{fact:unimodular}
If $\matU$ is unimodular, so is $\matU^{-1}$.
\end{proposition}
\begin{proof}
This follows from the way inverses are computed. In particular,
\begin{itemize}
\item Each entry in $\matU^{-1}$ is the ratio of the determinant of a {\em minor} of $\matU$ to the determinant of $\matU$ itself. Since the determinant of any minor of $\matU$ is an integer, and the determinant of $\matU$ is $\pm 1$, each entry of $\matU^{-1}$ is an integer. Thus, $\matU^{-1} \in \Z^{n\times n}$.
\item $\det(\matU^{-1}) = 1/\det(\matU) = \pm 1$. Thus, $|\det(\matU^{-1})| = 1$.
\end{itemize}
Together, these two observations mean that $\matU$ is unimodular.
\end{proof}
We can now state the characterization of equivalent bases.
\begin{theorem}\label{thm:unimodular}
Given two full-rank bases $\matB \in \R^{n\times n}$ and $\matB' \in \R^{n\times n}$,
the following two conditions are equivalent:
\begin{itemize}
\item $\lat(\matB) = \lat(\matB')$
\item There exists a unimodular matrix $\matU$ such that $\matB' = \matB \matU$.
\end{itemize}
\end{theorem}
\begin{proof}
(``$\Rightarrow$'') First, assume that $\lat(\matB) = \lat(\matB')$. Then,
there are integer matrices $\matV$ and $\matV'$ such that
\[ \matB' = \matB\matV \hspace{0.3in} \mbox{and} \hspace{0.3in} \matB = \matB'\matV' \]
It suffices to show that $|\det(\matV)| = |\det(\matV')| = 1$.
Putting these two equations together, we have $\matB' = \matB \matV = \matB' (\matV'\matV)$.
Since $\matB'$ is non-singular (remember: $\matB$ is a full-rank matrix, and so is $\matB'$) we can
multiply both sides of the equation by $(\matB')^{-1}$ and we get
\begin{eqnarray} \matV' \matV = \mathbf{1}_{n} \end{eqnarray}
were $\mathbf{1}_n$ denotes the $n$-by-$n$ identity matrix.
Since determinant is multiplicative, we get $\det(\matV')\det(\matV) = 1$. Since $\matV$ and
$\matV'$ are integer matrices, their determinant is also an integer.
Putting these two facts together, we see that the only two choices are:
\begin{itemize}
\item $\det(\matV) = \det(\matV') = 1$, or
\item $\det(\matV) = \det(\matV') = -1$
\end{itemize}
In either case, $|\det(\matV)| = |\det(\matV')| = 1$, and we are done.
\medskip\noindent
(``$\Leftarrow$'') For the other direction, assume that there is a unimodular matrix
$\matU$ such that $\matB' = \matB \matU$. Then, since $\matU$ is an integer matrix,
\[ \lat(\matB') \subseteq \lat(\matB) \]
This is because each vector (column) of $\matB'$ can be written as a linear combination of
vectors in $\matB$. Thus, the set of all integer linear combinations of vectors in $\matB'$ is
contained in the set of all integer linear combinations of vectors in $\matB$.
Now, $\matB = \matB' (\matU^{-1})$ where $\matU^{-1}$ is also unimodular by Proposition~\ref{fact:unimodular}.
This shows that \[ \lat(\matB) \subseteq \lat(\matB') \]
by the same argument as above. Together, we have $\lat(\matB) = \lat(\matB')$.
\end{proof}
\subsection{A Geometric Characterization using the Fundamental Parallelepiped}
We need the notion of a fundamental parallelepiped of a basis $\vecb_1,\ldots,\vecb_n$.
\begin{definition}[Fundamental Parallelepiped]
Given $n$ {\em linearly independent} vectors $\vecb_1,\ldots,\vecb_n \in \mathbb{R}^m$, their {\em fundamental parallelepiped} is defined as
\[ \ppd(\vecb_1,\ldots,\vecb_n) \eqdef \bigg\{ \sum_{i=1}^n x_i \vecb_i \ | \ x_i \in \R, 0 \leq x_i < 1 \bigg\} \]
\end{definition}
Thus, pictorially, a fundamental parallelepiped is the (half-open) region enclosed by the vectors $\vecb_1,\ldots,\vecb_n$. Clearly, different bases of the same lattice generate different fundamental paralellepipeds. See Figure~\ref{fig:ppd}\red{(a)} and \ref{fig:ppd}\red{(b)}.
\begin{center}
\begin{figure}\label{fig:ppd}
%%%% Figure 1
\subfloat[The lattice $\Z^2$ with basis vectors $(0,1)$ and $(1,0)$ and\newline the associated fundamental parallelepiped.]{
\begin{tikzpicture}[scale=3]
\clip (-0.9,-1.2) rectangle (2,1.55);
\draw[step=.4cm,gray,very thin] (-0.9,-1.1) grid (1.4,1.4);
\filldraw[fill=green!20!white, draw=green!50!white] (0,0) -- (0,0.399) -- (0.399,0.399) -- (0.399,0) -- (0,0);
\draw[->] (-1.4,0) -- (1.4,0);
\draw[->] (0,-1.1) -- (0,1.5);
(0:30:3mm) -- cycle;
\draw[->,red,very thick] (30:1cm) (0,0) -- ++(0,0.4) node[anchor=east] {$b_2$};
\draw[->,blue,very thick] (30:1cm) (0,0) -- ++(0.4,0) node[anchor=north] {$b_1$};
\draw[green,dotted, very thick] (30:1cm) (0,0.4) -- (0.4,0.4);
\draw[green,dotted, very thick] (30:1cm) (0.4,0) -- (0.4,0.4);
\foreach \x in {-0.8,-0.4,0,0.4,0.8,1.2}
\foreach \y in {-0.8,-0.4,0,0.4,0.8,1.2}
\filldraw (\x,\y) circle (0.5pt);
\end{tikzpicture}
}
%%%% Figure 2
\subfloat[The lattice $\Z^2$ with a different basis consisting of vectors $(1,1)$ and $(2,1)$, and the associated fundamental parallelepiped.]{
\begin{tikzpicture}[scale=3]
\clip (-1.0,-1.2) rectangle (1.6,1.55);
\draw[step=.4cm,gray,very thin] (-0.9,-1.1) grid (1.4,1.4);
\filldraw[fill=green!20!white, draw=green!50!white] (0,0) -- (0.399,0.399) -- (1.199,0.799) -- (0.799,0.399) -- (0,0);
\draw[->] (-1.0,0) -- (1.4,0);
\draw[->] (0,-1.1) -- (0,1.5);
(0:30:3mm) -- cycle;
\draw[->,blue,very thick] (30:1cm) (0,0) -- ++(0.8,0.4) node[anchor=west] {$b_1$};
\draw[->,red,very thick] (30:1cm) (0,0) -- ++(0.4,0.4) node[anchor=south] {$b_2$};
\draw[green,dotted, very thick] (30:1cm) (0.4,0.4) -- (1.2,0.8);
\draw[green,dotted, very thick] (30:1cm) (0.8,0.4) -- (1.2,0.8);
\foreach \x in {-0.8,-0.4,0,0.4,0.8,1.2}
\foreach \y in {-0.8,-0.4,0,0.4,0.8,1.2}
\filldraw (\x,\y) circle (0.5pt);
\end{tikzpicture}
}
\caption{Parallelepipeds for various bases of the lattice $\Z^2$. Note that the parallelepipeds in either case do not contain any non-zero lattice point.}
\label{fig:ppd}
\end{figure}
\end{center}
\begin{center}
\begin{figure}
\hspace{1.75in}
\begin{tikzpicture}[scale=3]
\clip (-1.0,-1.2) rectangle (1.6,1.55);
\draw[step=.4cm,gray,very thin] (-0.9,-1.1) grid (1.4,1.4);
\filldraw[fill=green!20!white, draw=green!50!white] (0,0) -- (0.399,0.399) -- (0.799,0) -- (0.399,-0.399) -- (0,0);
\draw[->] (-1.0,0) -- (1.4,0);
\draw[->] (0,-1.1) -- (0,1.5);
(0:30:3mm) -- cycle;
\draw[->,blue,very thick] (30:1cm) (0,0) -- ++(0.4,-0.4) node[anchor=west] {$b_1$};
\draw[->,red,very thick] (30:1cm) (0,0) -- ++(0.4,0.4) node[anchor=south] {$b_2$};
\draw[green,dotted, very thick] (30:1cm) (0.4,0.4) -- (0.8,0);
\draw[green,dotted, very thick] (30:1cm) (0.4,-0.4) -- (0.8,0);
\foreach \x in {-0.8,-0.4,0,0.4,0.8,1.2}
\foreach \y in {-0.8,-0.4,0,0.4,0.8,1.2}
\filldraw (\x,\y) circle (0.5pt);
\end{tikzpicture}
\caption{$\vecb_1$ and $\vecb_2$ do not form a basis of $\Z^2$. Note that the parallelepiped of $\vecb_1$ and $\vecb_2$ contains a non-zero lattice point, namely $(1,0)$.}
\label{fig:ppd2}
\end{figure}
\end{center}
Note that in Figures~\ref{fig:ppd}\red{(a)} and \ref{fig:ppd}\red{(b)}, the vectors $\vecb_1$ and $\vecb_2$ form a basis of the lattice, and the parallelepiped associated to the basis does not contain any lattice point other than $\mathbf{0}$. On the other hand, in Figure~\ref{fig:ppd2}, the vectors $\vecb_1$ and $\vecb_2$ do not form a basis of the lattice, and the parallelepiped associated to the basis contains a non-zero lattice point. In fact, this is not a coincidence as our next theorem shows.
\begin{theorem}\label{thm:ppd}
Let $\lat$ be a full-rank $n$-dimensional lattice, and let $\vecb_1,\ldots,\vecb_n \in \mathbb{R}^n$ denote {\em linearly independent} vectors in $\lat$.
Then, $\vecb_1,\ldots,\vecb_n$ form a basis of $\lat$ {\em if and only if} $\ppd(\vecb_1,\ldots,\vecb_n) \cap \lat = \{\mathbf{0}\}$.
\end{theorem}
\begin{proof}
(``$\Rightarrow$'') Suppose that $\vecb_1,\ldots,\vecb_n$ is a basis of $\lat$. Let \[ \veca = \sum_{i=1}^n x_i \vecb_i \in \lat(\vecb_1,\ldots,\vecb_n) \cap \ppd(\vecb_1,\ldots,\vecb_n) \] We will show that $\veca = \mathbf{0}$.
Since $\veca \in \lat(\vecb_1,\ldots,\vecb_n)$, $x_i \in \Z$ for all $i$. Since $\veca \in \ppd(\vecb_1,\ldots,\vecb_n)$, $x_i \in [0,1)$ for all $i$. Together, this means that $x_i = 0$ for all $i$, and thus, $\veca = \mathbf{0}$.
\medskip \noindent
(``$\Leftarrow$'') Suppose that $\ppd(\vecb_1,\ldots,\vecb_n) \cap \lat = \{\mathbf{0}\}$. We would like to show that $\vecb_1,\ldots,\vecb_n$ form a basis of $\lat$.
The vectors $\vecb_1,\ldots,\vecb_n$ are linearly independent. Since they belong to $\lat$, $\lat(\vecb_1,\ldots,\vecb_n) \subseteq \lat$. What remains is to show that $\lat \subseteq \lat(\vecb_1,\ldots,\vecb_n)$. Pick any vector $\veca \in \lat$ and write it as
\[ \veca = \sum_{i=1}^n x_i\vecb_i \hspace{0.2in} \mbox{where } x_i \in \R \]
Consider now the vector
\[ \veca' = \sum_{i=1}^n \floor{x_i}\vecb_i \hspace{0.2in} \in \lat(\vecb_1,\ldots,\vecb_n) \]
which is clearly in the lattice $\lat(\vecb_1,\ldots,\vecb_n)$ since the coefficients $\floor{x_i}$ are integers. Therefore, the vector $\veca - \veca'$ is in $\lat(\vecb_1,\ldots,\vecb_n)$ as well.
Now,
\[ \veca - \veca' = \sum_{i=1}^n (x_i - \floor{x_i}) \vecb_i \hspace{0.2in} \in \ppd(\vecb_1,\ldots,\vecb_n) \]
is in the parallelepiped of $\vecb_1,\ldots,\vecb_n$ since $0 \leq x_i - \floor{x_i} < 1$ for all $i$.
Since $\veca - \veca' \in \lat(\vecb_1,\ldots,\vecb_n) \cap \ppd(\vecb_1,\ldots,\vecb_n)$, it must be the case that $\veca-\veca' = 0$ by assumption.
Since the vectors $\vecb_1,\ldots,\vecb_n$ are linearly independent, this means that $x_i - \floor{x_i} = 0$ for all $i$ which in turn means that
$x_i \in \Z$ for all $i$.
Thus, $\veca \in \lat(\vecb_1,\ldots,\vecb_n)$, showing us that $\lat \subseteq \lat(\vecb_1,\ldots,\vecb_n)$.
\end{proof}
\subsection{Determinant of a Lattice}
Another quantity associated to a lattice is its determinant, denoted $\det(\lat)$. The determinant of a lattice is the $n$-dimensional volume of its fundamental parallelepiped, computed as the absolute value of the determinant of its basis matrix $\matB$. A couple of facts about the determinant of a lattice are worth noting:
\begin{enumerate}
\item The parallelepipeds associated with different bases of a lattice have the same volume. Thus, {\em the determinant is a lattice invariant}.
This is easy to see using our characterization of equivalent bases from Theorem~\ref{thm:unimodular}.
Let $\matB$ and $\matB'$ be any two lattice bases. By Theorem~\ref{thm:unimodular}, there is a unimodular matrix $\matU$ such that $\matB'=\matB \matU$. Thus, $|\det(\matB')| = |\det(\matB)| \cdot |\det(\matU)| = |\det(\matB)|$ since $|\det(\matU)| = 1$.
\item Intuitively, the determinant of a lattice is inversely proportional to its ``density''. The larger the determinant, the sparser the lattice.
\end{enumerate}
\section{Gram-Schmidt Orthogonalization}
Gram-Schmidt orthogonalization is a procedure in linear algebra that transforms a set of vectors $\vecb_1,\ldots,\vecb_n$ into a set of
orthogonal vectors $\vecbt_1,\ldots,\vecbt_n$. In two dimensions, this proceeds as follows:
\begin{itemize}
\item The first Gram-Schmidt vector $\vecbt_1$ is $\vecb_1$ itself.
\item The second Gram-Schmidt vector $\vecbt_2$ is the component of $\vecb_2$ that is orthogonal to $\Span(\vecbt_1)$. This can be
computed as
\[ \vecbt_2 = \vecb_2 - \bigg( \frac{\inner{\vecb_2,\vecbt_1}}{\inner{\vecbt_1,\vecbt_1}} \bigg) \vecbt_1 \]
See Figure~\ref{fig:GS} for an illustration of this process.
\end{itemize}
In general, the Gram-Schmidt vectors are obtained by projecting each vector successively on the space orthogonal to the span of all the previous vectors.
\begin{definition}[Gram-Schmidt Orthogonalization]\label{def:GS}
For a sequence of $n$ linearly independent vectors $\vecb_1,\ldots,\vecb_n \in \R^n$, we define their {\em Gram-Schmidt orthogonalization} as the sequence of vectors $\vecbt_1,\ldots,\vecbt_n$ defined as follows:
\[ \vecbt_i = \vecb_i - \sum_{j=1}^{i-1} \mu_{i,j} \vecbt_j \hspace{0.2in} \mbox{where } \mu_{i,j} = \frac{\inner{\vecb_i,\vecbt_j}}{\inner{\vecbt_j,\vecbt_j}} \]
Thus, $\vecbt_j$ is the component of $\vecb_i$ that is orthogonal to $\vecbt_1,\ldots,\vecbt_{i-1}$. The coefficients $\mu_{i,j}$ are called the Gram-Schmidt coefficients.
\end{definition}
\begin{center}
\begin{figure}
%%%% Figure 1
\subfloat[Gram-Schmidt orthogonalization of the vectors $b_1$ and $b_2$ \newline in that order.]{
\begin{tikzpicture}[scale=3]
\clip (-0.9,-1.2) rectangle (2,1.55);
\draw[step=.4cm,gray,very thin] (-0.9,-1.1) grid (1.4,1.4);
\draw[->] (-1.0,0) -- (1.4,0);
\draw[->] (0,-1.1) -- (0,1.5);
(0:30:3mm) -- cycle;
\draw[->,blue,very thick] (30:1cm) (0,0) -- ++(0.8,0.4) node[anchor=west] {\small{$\tilde{b}_1=b_1$}};
\draw[->,red,very thick] (30:1cm) (0,0) -- ++(0.4,0.4) node[anchor=south] {$b_2$};
\draw[->,green,very thick] (30:1cm) (0,0) -- ++(-0.08,0.16) node[anchor=south] {$\tilde{b}_2$};
\draw[black,dotted] (30:1cm) (-0.88,-0.24) -- (1.52,0.96);
\foreach \x in {-0.8,-0.4,0,0.4,0.8,1.2}
\foreach \y in {-0.8,-0.4,0,0.4,0.8,1.2}
\filldraw (\x,\y) circle (0.5pt);
\end{tikzpicture}
}
%%% Figure 2
\subfloat[Gram-Schmidt orthogonalization of the same vectors, \newline but in the opposite order.]{
\begin{tikzpicture}[scale=3]
\clip (-0.9,-1.2) rectangle (2,1.55);
\draw[step=.4cm,gray,very thin] (-0.9,-1.1) grid (1.4,1.4);
\draw[->] (-1.0,0) -- (1.4,0);
\draw[->] (0,-1.1) -- (0,1.5);
(0:30:3mm) -- cycle;
\draw[->,blue,very thick] (30:1cm) (0,0) -- ++(0.8,0.4) node[anchor=west] {$b_2$};
\draw[->,red,very thick] (30:1cm) (0,0) -- ++(0.4,0.4) node[anchor=south] {\small{$\tilde{b}_1=b_1$}};
\draw[->,green,very thick] (30:1cm) (0,0) -- (0.2,-0.2) node[anchor=south] {$\tilde{b}_2$};
\draw[black,dotted] (30:1cm) (-0.4,-0.8) -- (1.4,1.0);
\foreach \x in {-0.8,-0.4,0,0.4,0.8,1.2}
\foreach \y in {-0.8,-0.4,0,0.4,0.8,1.2}
\filldraw (\x,\y) circle (0.5pt);
\end{tikzpicture}
}
\caption{Gram-Schmidt Orthogonalization.}
\label{fig:GS}
\end{figure}
\end{center}
\paragraph{Remarks.}
\begin{enumerate}
\item True to its name, the different Gram-Schmidt vectors $\vecbt_1,\ldots,\vecbt_n$ are orthogonal to each other. That is, for each $i\neq j$, $\inner{\vecbt_i, \vecbt_j} = 0$. This is an easy consequence of Definition~\ref{def:GS}.
\item The span of $\vecbt_1,\ldots,\vecbt_i$ is the same as the span of $\vecb_1,\ldots,\vecb_i$ for all $1 \leq i \leq n$.
\item The vectors $\vecbt_1,\ldots,\vecbt_n$ do not form a lattice basis. In fact, the Gram-Schmidt vectors are not necessarily in the lattice. See Figure~\ref{fig:GS} for example.
\item The (Euclidean) length of the Gram-Schmidt vector $\vecbt_i$ is at most the length of the basis vector $\vecb_i$. Namely, $\|\vecbt_i\| \leq \| \vecb_i \|$.
\item Clearly, as seen in Figure~\ref{fig:GS}, the Gram-Schmidt vectors depend on the order in which the vectors $\vecb_1,\ldots,\vecb_n$ are processed.
\end{enumerate}
Let $\vecbt_1/\|\vecbt_1\|, \ldots, \vecbt_n/\|\vecbt_n\|$ denote the unit vectors in the direction of the Gram-Schmidt vectors. Then, the Gram-Schmidt orthogonalization process can be written in matrix form as
\begin{eqnarray*}
\left( \begin{array}{ccc} \vert & & \vert \\
\vecb_1 & \ldots & \vecb_n \\
\vert & & \vert \end{array} \right) & = &
\left( \begin{array}{ccc} \vert & & \vert \\
\vecbt_1 & \ldots & \vecbt_n \\
\vert & & \vert \end{array} \right) \cdot
\left( \begin{array}{ccccc}
1 & \mu_{2,1} & \mu_{3,1} & \ldots & \mu_{n,1} \\
0 & 1 & \mu_{3,2} & \ldots & \mu_{n,2} \\
0 & 0 & 1 & \ldots & \mu_{n,3} \\
\vdots & \vdots & \vdots & \vdots & \vdots \\
0 & 0 & 0 & 0 & 1 \end{array} \right) \\
& = &
\left( \begin{array}{ccc} \vert & & \vert \\
\frac{\vecbt_1}{\|\vecbt_1\|} & \ldots & \frac{\vecbt_n}{\|\vecbt_n\|} \\
\vert & & \vert \end{array} \right) \cdot
\left( \begin{array}{ccccc}
\|\vecbt_1\| & \mu_{2,1}\|\vecbt_1\| & \mu_{3,1}\|\vecbt_1\| & \ldots & \mu_{n,1}\|\vecbt_1\| \\
0 & \|\vecbt_2\| & \mu_{3,2}\|\vecbt_2\| & \ldots & \mu_{n,2}\|\vecbt_2\| \\
0 & 0 & \|\vecbt_3\| & \ldots & \mu_{n,3}\|\vecbt_3\| \\
\vdots & \vdots & \vdots & \vdots & \vdots \\
0 & 0 & 0 & 0 & \|\vecbt_n\| \end{array} \right)
\end{eqnarray*}
Since the vectors $\frac{\vecbt_i}{\|\vecbt_i\|}$ are orthonormal, the determinant of the matrix
with columns $\frac{\vecbt_i}{\|\vecbt_i\|}$ is $1$.
Thus, we have
\[ \det(\lat(\matB)) = \prod_{i=1}^n \|\vecbt_i \| \]
In other words, the Gram-Schmidt orthogonalization process is a volume-preserving transformation that results in a
set of orthogonal vectors $\vecbt_1,\ldots,\vecbt_n$, whose enclosing parallelepiped is rectangular and
generates a volume of $\prod_{i=1}^n \|\vecbt_i \|$.
\section{Successive Minima of a Lattice}
A basic parameter of the lattice is the length of the shortest non-zero vector in the lattice (since any lattice contains the zero vector which has norm zero, we have to ask for a non-zero vector). This parameter is also called the {\em first successive minimum} of the lattice, and is denoted $\lambda_1(\lat)$. When we speak of length, we mean the Euclidean norm defined as follows: for a vector $\vecx = (x_1,\ldots,x_n) \in \R^n$, the Euclidean norm of $\vecx$, denoted $\|\vecx\|_2$ (or simply as $\|\vecx\|$ is defined as
\[ \|\vecx\| = \sqrt{\sum_{i=1}^n x_i^2} \]
The Euclidean norm is also frequently referred to as the $\ell_2$ norm.
We can speak of other norms such as the $\ell_1$ norm -- $\|\vecx\|_1 = \sum_{i=1}^n |x_i|$ -- and the $\ell_{\infty}$ norm -- $\|\vecx\|_{\infty} = \max_{i=1}^n |x_i|$, but we will stick to the Euclidean norm for most of this course.
Figure~\ref{fig:SH} shows a shortest vector in the lattice generated by $(1,1)$ and $(2,0)$. The shortest vector is not unique in general. There could be many, even exponentially many, shortest vectors. Clearly, there are at least two -- if $\vecv$ is a shortest vector in a lattice, then so is $-\vecv$.
\begin{center}
\begin{figure}
%%%% Figure 3
\hspace{1.75in}
\begin{tikzpicture}[scale=3]
\clip (-0.9,-1.2) rectangle (1.75,1.55);
\filldraw[fill=green!20!white, draw=green!50!black] circle (0.5656);
\draw[step=.4cm,gray,very thin] (-0.9,-1.1) grid (1.4,1.4);
\draw[->] (-1.4,0) -- (1.4,0);
\draw[->] (0,-1.1) -- (0,1.5);
(0:30:3mm) -- cycle;
\draw[->,blue,very thick] (30:1cm) (0,0) -- ++(0.8,0) node[anchor=west] {$b_1$};
\draw[->,red,very thick] (30:1cm) (0,0) -- ++(0.4,0.4) node[anchor=south] {$b_2$};
\foreach \x in {-0.8,0,0.8}
\foreach \y in {-0.8,0,0.8}
\filldraw (\x,\y) circle (0.5pt);
\foreach \x in {-0.4,0.4,1.2}
\foreach \y in {-0.4,0.4,1.2}
\filldraw (\x,\y) circle (0.5pt);
\end{tikzpicture}
\caption{The shortest vector in the lattice generated by $(1,1)$ and $(2,0)$. $\lambda_1(\lat) = \sqrt{2}$.}
\label{fig:SH}
\end{figure}
\end{center}
We will be interested in lower and upper bounds on $\lambda_1$. We first show a lower bound on $\lambda_1$ using Gram-Schmidt orthogonalization. In the next lecture, we will prove Minkowski's theorem which provides an upper bound on $\lambda_1$ in terms of the determinant of the lattice.
\paragraph{Lower Bound on $\lambda_1$.} We show the following theorem. Roughly speaking, the theorem says that the shortest non-zero vector in a lattice is at least as long as the shortest Gram-Schmidt vector of a basis of the lattice. To see why, observe that a lattice can be partitioned into many hyperplanes perpendicular to its Gram-Schmidt vector $\vecbt_n$. See Figure~\ref{fig:hyper} for an illustration in two dimensions.
Now, there are two possibilities:
\begin{itemize}
\item There is a shortest non-zero vector in one of the hyper-planes not passing through the origin. In that case, the vector has to have length at least $\|\vecbt_n\| \geq \min_{j} \|\vecbt_j\|$ since the $i^{th}$ such hyper-plane is at a distance of $i\cdot\|\vecbt_n\|$ from the origin.
\item The shortest non-zero vector lives in the hyper-plane that passes through the origin, in which case, repeat the same argument in dimension $n-1$ with the $(n-1)$-dimensional sublattice partitioned into hyper-planes perpendicular to $\vecbt_{n-1}$.
\end{itemize}
Eventually, if the argument reaches dimension $1$, the shortest non-zero vector has to have length at least $\|\vecb_1\| = \|\vecbt_1\| \geq \min_j \|\vecbt_j\|$.
\medskip\noindent
The formal statement and proof of the theorem follows.
\begin{center}
\begin{figure}
\hspace{1.75in}
\subfloat{
\begin{tikzpicture}[scale=3]
\clip (-0.9,-1.2) rectangle (2,1.55);
\draw[step=.4cm,gray,very thin] (-0.9,-1.1) grid (1.4,1.4);
\draw[->] (-1.0,0) -- (1.4,0);
\draw[->] (0,-1.1) -- (0,1.5);
(0:30:3mm) -- cycle;
\draw[->,blue,very thick] (30:1cm) (0,0) -- ++(0.8,0.4) node[anchor=west] {\small{$\tilde{b}_1=b_1$}};
\draw[->,red,very thick] (30:1cm) (0,0) -- ++(0.4,0.4) node[anchor=south] {$b_2$};
\draw[->,green,very thick] (30:1cm) (0,0) -- ++(-0.08,0.16) node[anchor=south] {$\tilde{b}_2$};
\draw[black,dotted] (30:1cm) (-1.12,0.24) -- (1.28,1.46);
\draw[black,dotted] (30:1cm) (-1.04,0.08) -- (1.36,1.30);
\draw[black,dotted] (30:1cm) (-0.96,-0.08) -- (1.44,1.14);
\draw[black,dotted] (30:1cm) (-0.88,-0.24) -- (1.52,0.96);
\draw[black,dotted] (30:1cm) (-0.8,-0.4) -- (1.6,0.8);
\draw[black,dotted] (30:1cm) (-0.72,-0.56) -- (1.68,0.64);
\draw[black,dotted] (30:1cm) (-0.64,-0.72) -- (1.76,0.48);
\foreach \x in {-0.8,-0.4,0,0.4,0.8,1.2}
\foreach \y in {-0.8,-0.4,0,0.4,0.8,1.2}
\filldraw (\x,\y) circle (0.5pt);
\end{tikzpicture}
}
\caption{The lattice is partitioned into many parallel hyperplanes perpendicular to $\vecbt_2$. Either the shortest vector lives in a hyperplane that does not pass through the origin, in which case its length is at least $\|\vecbt_2\|$ or it lives in the hyperplane that passes through the origin, in which case its length is at least $\vecbt_1 = \|\vecbt_1\|$. In general, in two dimensions, $\lambda_1(\lat) \geq \min\{\|\vecbt_1\|,\|\vecbt_2\|\}$. This argument can be generalized to $n$ dimensions.}
\label{fig:hyper}
\end{figure}
\end{center}
\begin{theorem}
Let $\matB$ be a rank-$n$ lattice basis, and $\matBt$ be its Gram-Schmidt orthogonalization. Then,
\[ \lambda_1(\lat(\matB)) \geq \min_{i=1,\ldots,n} \|\vecbt_i\| > 0 \]
\end{theorem}
\begin{proof}
Let $\vecx \in \Z^n$ be {\em any} non-zero integer vector. We would like to show that the lattice vector $\matB\vecx \in \lat(\matB)$ has length at least $\min_{i} \|\vecbt_i\|$.
The proof follows by calculating the quantity $|\inner{\matB\vecx, \vecbt_j}|$ in two different ways.
\paragraph{$1$.}
Let $j \in \{1,\ldots,n\}$ be the largest index such that $x_j \neq 0$. Then,
\begin{equation}\label{eqn:lb1}
|\inner{\matB\vecx, \vecbt_j}| = |\inner{\sum_{i=1}^n x_i\vecb_i, \vecbt_j}| =
|\sum_{i=1}^n x_i \inner{\vecb_i, \vecbt_j}| = |x_j|\inner{\vecbt_j,\vecbt_j} = |x_j|\cdot \|\vecbt_j\|^2
\end{equation}
where the first equality follows by rewriting $\matB\vecx$ as $\sum_{i=1}^n x_i \vecb_i$, the second follows by the linearity of the inner product, and the third because
\begin{itemize}
\item for $j < i$, $\inner{\vecb_i, \vecbt_j} = 0$
\item for $j > i$, $x_j=0$ by the definition of $j$.
\end{itemize}
The fourth equality follows by the definition of $\|\vecbt_j\|^2 = \inner{\vecbt_j,\vecbt_j}$.
\paragraph{$2$.} On the other hand,
\begin{equation}\label{eqn:lb2}
|\inner{\matB\vecx, \vecbt_j}| \leq \|\matB\vecx\| \cdot \|\vecbt_j\|
\end{equation}
by the Cauchy-Schwarz inequality.
\medskip\noindent
Putting together Equations~\ref{eqn:lb1} and \ref{eqn:lb2}, we get
\[ \|\matB\vecx\| \geq \frac{|\inner{\matB\vecx, \vecbt_j}|}{\|\vecbt_j\|} = |x_j| \cdot \|\vecbt_j\| \geq \|\vecbt_j\| \geq \min_{i=1\ldots n} \|\vecbt_i\|
\]
where the third inequality follows from the fact that $x_j$ is a non-zero integer. Since the length of any lattice vector is at least $\min_{i} \|\vecbt_i\|$,
\[ \lambda_1(\matB) \geq \min_{i=1\ldots n} \|\vecbt_i\| \]
Since $\vecb_1,\ldots,\vecb_n$ are linearly independent, this quantity is strictly positive.
\end{proof}
A corollary of this theorem is that a lattice is a {\em discrete set}. In other words, lattice points cannot be arbitrarily close to one another. Formally:
\begin{corollary}
For every lattice $\lat$, there is an $\epsilon = \epsilon(\lat) > 0$ such that
$\| \vecx - \vecy \| \geq \epsilon$ for any two unequal lattice points $\vecx, \vecy \in \lat$.
\end{corollary}
\begin{proof}
For any two $\vecx \neq \vecy \in \lat$, $\vecx - \vecy \in \lat$. Then, $\|\vecx - \vecy \| \geq \lambda_1(\lat) > 0$. In particular, set $\epsilon = \lambda_1(\lat)$ to obtain the statement of the corollary.
\end{proof}
In fact, this leads us to a {\em basis-independent} characterization of a lattice. Namely, every discrete subset of $\R^n$ that is closed under subtraction is a lattice.
\end{document}