\documentclass[10pt]{article}
\newtheorem{define}{Definition}
%\newcommand{\Z}{{\mathbb{Z}}}
%\usepackage{psfig}
\usepackage{amsfonts}

\oddsidemargin=0.15in
\evensidemargin=0.15in
\topmargin=-.5in
\textheight=9in
\textwidth=6.25in

\newcommand{\pmone}{\{-1,1\}}
\newcommand{\dist}{\mathop{\rm dist}}

\begin{document}
\input{preamble.tex}

%{lecture number}{lecture date}{Ronitt Rubinfeld}{Your name}
\lecture{3}{February  13, 2008}{Ronitt Rubinfeld}{Megumi Ando}

%%%% body goes in here %%%%
% \section*{Announcements}
% \begin{itemize}
% \item HW1 posted
% \item Sign up to scribe
% \end{itemize}

\section*{Last Time}
In the previous lecture, we made a notational switch from using Boolean functions of the form $f:\{0,1\}^n \rightarrow \{0,1\}$ to functions of the form $f:\pmone^n \rightarrow \pmone$. We defined linearity over this form, and what it meant to be $\epsilon$-close to linear. 

\begin{definition}$f: \pmone^n \rightarrow \pmone$ is \emph{linear} if  \thinspace$\forall x, y \in \pmone^n$, $f(x) f(y) f(x \cdot y) = 1$, \\
where $x\cdot y = (x_1... x_n) \cdot (y_1 ... y_n) = (x_1y_1, x_2y_2, ... , x_ny_n)$.
\end{definition}

There are $2^n$ linear functions over $\pmone^n \rightarrow \pmone$. Each of them can be written as
$\chi_S(x) = \prod_{i\in S} x_i$, where $S \subseteq \{1, ..., n\}$.

\begin{definition} A function $f$ is \emph{$\epsilon$-close to linear} if \thinspace$\exists$ linear $g$ such that $\mbox{Pr}_x[f(x)\not=g(x)] \leq \epsilon$. Otherwise, $f$ is \emph{$\epsilon$-far}. 
\end{definition}

Finally, we proposed the following linearity tester:
\begin{itemize}
\item Repeat $O\left(\frac{1}{\rho} \log \frac{1}{\beta}\right)$ times:
\begin{itemize}
\item Pick $x, y \in \pmone^n$.
\item If $f(x)f(y)f(x\cdot y) \not= 1$, output ``FAIL" and halt.
\end{itemize}
\item Output ``PASS."
\end{itemize}
The rejection probability of one pass through the loop is $\delta \equiv E_{x,y} \left[ \frac{1-f(x)f(y)f(x\cdot y)}{2} \right]$.


\section{Fourier Analysis (Basics)}

We will use a few times the following simple fact about linear functions.

\begin{fact}\label{fact0}
$$\chi_S(x) \cdot \chi_T(x) = \prod_{i\in S} x_i  \cdot \prod_{j\in R} x_j
= \prod_{i\in S\triangle T} x_i,$$
where $S\triangle T$ is the symmetric difference of $S$ and $T$, i.e., the set of elements that appear in exactly one of the sets $S$ and $T$.
\end{fact}

% \subsection{Definitions}
% 
% 
% % For two Boolean functions $f,g: \pmone^n \to \pmone$, we define their \emph{inner product} as
% % $$\langle f,g \rangle \equiv \frac{1}{2^n}\sum_{x\in\pmone^n} f(x)g(x).$$
% 
% Furthermore, for any function
% $f: \pmone^n \to \pmone$ and any subset $S\subset \{1,\ldots,n\}$, we define $\hat{f}(S)$ as $$\hat f(S) \equiv 
% %\langle f,\chi_S \rangle = 
% \frac{1}{2^n}\sum_{x\in\pmone^n}f(x)\chi_S(x).$$


% \subsection{Some Facts}
% 
% The following Fourier analysis facts were presented in this lecture.
% 
% 
% 
% 
% 
% \begin{fact}\label{fact4}
% $E[f] = \hat{f} (\emptyset)$ 
% \end{fact}

\subsection{Vector Space of Functions $g:\pmone^n \rightarrow \mathbb R$}

The set $G = \{g  |  g:\pmone^n \rightarrow \mathbb R\}$ is a vector space of dim $2^n$.

\begin{definition}
\emph{Indicator functions} are functions of the form: If $x=a$, then $e_a(x) =  1$. Otherwise, $e_a(x) = 0$. 
\end{definition}

Note that the indicator functions are basis functions of $G$. However, we will not be using them. Instead we will be using the parity functions, $\{\chi_S\}_{S\subseteq [n]}$, described in the previous lecture. 

\begin{definition}
For $f,g :  \pmone \rightarrow \pmone$, 
the ``inner product" $$\langle f,g\rangle \mbox = \frac{1}{2^n} \sum_{x\in \pmone^n} f(x)g(x),$$ 
\end{definition}
where the sum $\sum f(x)g(x)$ is the ``correlation," a measure of how often $f$ and $g$ agree. \\

\noindent Note that:
\begin{enumerate}
\item $<\chi_S, \chi_S> = \frac{1}{2^n} \sum_{x\in\pmone^n} \chi_S^2 (x) = 1.$ (Absolute correlation.) 
\item If $S\not= T$, 
\end{enumerate}
\begin{eqnarray*}
<\chi_S, \chi_T> &=& \frac{1}{2^n} \sum_{x\in\pmone^n} \chi_S (x) \chi_T (x) \\
&=& \frac{1}{2^n} \sum_{x\in\pmone^n} \prod_{i\in S} x_i \prod_{j\in T}x_j \mbox{\qquad(by definition)} \\
&=& \frac{1}{2^n} \sum \prod_{i \in S\triangle T} x_i \mbox{\qquad(by Fact \ref{fact0})}  
\end{eqnarray*}
where $S\triangle T$ is non-empty. Therefore, there exists a $j \in S\triangle T$. Let $x^{\oplus j}$ equals $x$ with the $j$-th bit flipped. 

\begin{eqnarray*}
&=& \frac{1}{2^n} \sum_{\mbox{pairs } (x, x^{\oplus j})} \left( x_j \prod_{i \in S\triangle T \setminus\{j\}} x_i + \overline{x_j} \prod_{i \in S\triangle T \setminus\{j\}} x_i \right) \\
&=& 0
\end{eqnarray*}
From Notes 1 and 2 above, we see that every parity function $X_S$ is normal to the others, and thus, the parity functions form an orthonormal basis. 
\bigskip

% \subsection{Digression: Matrix Multiplication Tester (Freivald's Algorithm)}
% For one of the non-handin problems, we were asked to figure out a randomized matrix multiplication tester that runs in $O(n^2)$ time. If matrix $C$ is equivalent to the product of matrices $A$ and $B$, then the tester should always output ``$A\cdot B \equiv C$." Otherwise, the tester should correctly output ``$A\cdot B \not\equiv C$" at least three-fourths of the time. \\
% 
% \noindent The answer is Freivald's Algorithm. \\
% On input $<A, B, C>$, where $A, B, C$ are $n$-by-$n$ matrices in $Z_2$: 
% \begin{enumerate}
% \item Let $r \in_R \{0,1\}^n$. 
% \item Compute $r' \leftarrow A\times (B\times r) - C\times r \mbox{ (mod }2)$.
% \item If $r '= \vec{0}$, repeat Steps 1-2. Else, output ``$A\times B \not\equiv C$.''
% \item If $r '= \vec{0}$ again, output ``$A\times B \equiv C$.'' Else, output ``$A\times B \not\equiv C$.''
% \end{enumerate}
% 
% Running Time. Instead of multiplying $A$ and $B$ first, we multiply $B$ and $r$ first. So the total running time is $O(n^2)$ rather than $O(n^3)$. \\
% 
% \noindent Correctness: \\
% Case 1: $A\times B \equiv C$.  When $A\times B \equiv C$, $[A\times (B\times r)] - (C\times r)$ is equivalent to zero because of matrix associativity. Thus, the algorithm always correctly outputs ``$A\times B \equiv C$." \\
% 
% \noindent Case 2: $A\times B \not\equiv C$. 
% \begin{eqnarray}
% && \mbox{Let } D = (A\times B) - C \mbox{ (mod $2$)} \nonumber\\
% && \mbox{Let } D_{i,j} \mbox{ be the element of $D$ in the $i$-th row, $j$-th column.} \nonumber\\
% && A\times B \not\equiv C \nonumber \nonumber\\
% &\longrightarrow& D \not= \matrix{0} \nonumber\\
% &\longrightarrow& \exists D_{i,j}\not= 0 \nonumber
% \end{eqnarray}
% Let $r_j$ be the $j$-th element of $r$. If the $r_j$ is one, then $a_i$ will not equal $c_i$, and $a$ will not equal $c$. However, if $r_j$ is zero, then $a$ could still be $c$ (if no other column in $D$ contains a non-zero element). Therefore, the probability that $a$ will not equal $b$ is bounded below by $\frac{1}{2}$, the probability that randomly picked $r_j$ is one. \\
% 
% \noindent Since the algorithm constructs two independent $r$'s, the probability of correctly outputting ``$A\times B\not\equiv C$'' is bounded below by $\frac{3}{4}$. 

\subsection{Fourier Coefficients}

The following corollary follows.

\begin{corollary}
$$\forall f \mbox{, } f(x) = \sum_{S \subseteq [n]} \hat{f}(S) \chi_S(x),$$
where $\hat{f}(z)$ is the Fourier coefficient, which can be calculated as follows:
\begin{eqnarray*}
\hat{f}(S) &=& \langle f, \chi_S \rangle \\
&=& \frac{1}{2^n} \sum_{x\in \{0,1\}^n} f(x) \chi_S(x)
\end{eqnarray*}
\end{corollary}

In particular, a parity function has all but one coefficients equal zero.

\begin{fact}[Fourier Coefficients of Parity Functions $\chi_T$]\label{fact1}
\begin{eqnarray*}
f = \chi_T &\Longleftrightarrow& \hat{f}(T) = 1. \\
&& \mbox{Furthermore, } \forall S\not=T, \hat{f}(S) = 0. 
\end{eqnarray*}
\end{fact}


\noindent \textbf{A few more examples of Fourier coefficients:}  \\ \\
\begin{tabular}{| l | l |} \hline
\begin{textbf}{Function}\end{textbf} & \begin{textbf}{Fourier Representation}\end{textbf} \\ \hline \hline
$f(x) = 1$ & $1 \cdot \chi_\emptyset$  \\ \hline
$f(x) = x_i$ & $1 \cdot \chi_{\{i\}}$ \\ \hline  
and$(x_1, x_2)$ & $\frac{1}{2}\chi_{\emptyset} + \frac{1}{2}\chi_{\{1\}} + \frac{1}{2}\chi_{\{2\}} - \frac{1}{2}\chi_{\{1,2\}}$ \\ \hline
maj$(x_1, x_2, x_3)$ & $\frac{1}{2}\chi_{\{1\}} + \frac{1}{2}\chi_{\{2\}} + \frac{1}{2}\chi_{\{3\}} - \frac{1}{2}\chi_{\{1,2,3\}}$ \\ \hline
\end{tabular} 
\bigskip

\subsection{Fourier Coefficients and Distance to Linearity}

Let $\dist(f,g)$ denote the fraction of inputs on which two Boolean functions $f,g : \pmone^n \to \pmone$
disagree. That is, $\dist(f,g) = \Pr_{x \in \pmone^n}[f(x) \ne g(x)]$. For instance, the distance between two different parity functions is $1/2$.

\begin{fact}\label{fact3}
For $S \not=T$, $\dist(\chi_S, \chi_T) = \frac{1}{2}$. \\
\end{fact}

It turns out that Fourier coefficients can be used to express the distance of a function to a given linear function.


\begin{fact}[Agreement of $f$ with Linear Functions]\label{fact2}
For $f: \pmone^n \rightarrow \pmone$,
$$\hat{f}(S) = 1 - 2 \dist(f_1, \chi_S).$$ 
\end{fact}


\begin{proof}
\begin{eqnarray*}
2^n \hat{f}(s) &=& \sum_x f(x)\chi_S(x) \\
&=& \sum_{\mbox{$x$ s.t. $f(x) = \chi_S(x)$}} f(x) \chi_S(x) + \sum_{\mbox{$x$ s.t. $f(x) \not= \chi_S(x)$}} f(x) \chi_S(x) \\
&=& 2^n - 2|\{x| f(x) \not= \chi_S(x)\}| \\
&=& 2^n \left(1 - 2\frac{|\{x| f(x) \not= \chi_S(x)\}|}{2^n} \right) \\
\hat{f}(s) &=&1 - 2 \dist(f_1\cdot \chi_S)
\end{eqnarray*} 
\end{proof}

\subsection{Plancherel's Theorem}

The following simple theorem holds.

\begin{theorem}[Plancherel's Theorem]
For $f,g: \pmone \rightarrow \mathbb R$, 
$$\langle f,g \rangle = E_x[f(x)\cdot g(x)] = \sum_{S\subseteq [n]} \hat{f}(S) \hat{g}(S).$$
\end{theorem}

\begin{proof}
\begin{eqnarray*}
\langle f,g \rangle &=& \left\langle\sum_S \hat{f} (S) \chi_S (x), \sum_T \hat{g}(T) \chi_T (x)\right\rangle \\
&=& \sum_S \sum_T \hat{f} (S) \hat{g}(T) \left\langle\chi_S(x), \chi_T(x) \right\rangle \\
&=& \sum_{S=T} \hat{f}(S) \hat{g}(T) \cdot 1 = \sum_{S} \hat{f}(S) \hat{g}(S)
\end{eqnarray*}
\end{proof}

The theorem yields multiple useful properties.

\begin{corollary}[Parseval's identity]
For $f: \pmone^n \rightarrow R$, $\langle f,f\rangle = \sum \hat{f}^2 (S)$.
\end{corollary}

\begin{corollary} 
For $f: \pmone^n \rightarrow \pmone$, $\sum \hat{f}^2 (S) = \langle f,f\rangle =1$.
\end{corollary}

\begin{corollary} 
\begin{eqnarray*}
E_x[\chi_S(x)] =  \left \{ \begin{array}{ll}
1 & \mbox{ if $S = \emptyset$, } \\
0, & \mbox{ otherwise. } \end{array} \right. 
\end{eqnarray*}
\end{corollary}

\section{Analysis of the Proposed Linearity Tester}

Recall that $\delta$ is the probability that a single pass through the loop detects that the input function $f$ is not linear, and it can be expressed as
$$\delta = E_{x,y} \left[ \frac{1-f(x)f(y)f(x\cdot y)}{2} \right].$$

\begin{lemma}[Main Lemma]
$1-\delta = \frac{1}{2} + \frac{1}{2} \sum_{S\subseteq [n]} \hat{f}^3 (S)$ \\
\end{lemma}

\begin{proof}
\begin{eqnarray*}
1 - \delta &=& E_{x,y} \left[ \frac{1 + f(x)f(y)f(xy)}{2} \right] \\
&=&  \frac{1}{2} + \frac{1}{2} E_{x,y} [f(x)f(y)f(xy)]  \\ \\
E_{x,y} [f(x)f(y)f(xy)] &=& E_{x,y}[(\sum_S \hat{f} (S) \chi_S (x)) (\sum_T \hat{f}(T) \chi_T (y))(\sum_U \hat{f}(U) \chi_U(x\cdot y)] \\
&=& \sum_{S,T,U} \hat{f}(S) \hat{f}(T) \hat{f}(U) E_{x,y}[\chi_S(x) \chi_T(y) \chi_U (x\cdot y)] \\ \\
E_{x,y}[\chi_S(x) \chi_T(y) \chi_U (x\cdot y)]  &=& E_{x,y}[\prod_{i \in S} x_i \prod_{j\in T} y_j \prod_{k \in U} x_ky_k] \\
&=& E_{x,y}[\prod_{i \in S\triangle U} x_i \prod_{j \in T\triangle U} y_j] \\
&=& E_x[\chi_{S\triangle U} (x)] E_y[\chi_{T\triangle U} (y)] \\
E_x[\chi_{S\triangle U} (x)]  &=& \left \{ \begin{array}{ll}
1 & \mbox{ if $S = U$,} \\
0, & \mbox{otherwise } \end{array} \right. \\
E_y[\chi_{T\triangle U} (y)]  &=& \left \{ \begin{array}{ll}
1 & \mbox{ if $T = U$,} \\
0, & \mbox{ otherwise } \end{array} \right.
\end{eqnarray*}

So, $E_{x,y}[\chi_S(x) \chi_T(y) \chi_U (x\cdot y)]$ is non-zero if and only if $S=T=U$.
If $S=T=U$, then the expectation is 1.
Hence,
$$E_{x,y} [f(x)f(y)f(xy)] = \sum_{S,T,U} \hat{f}(S) \hat{f}(T) \hat{f}(U) E_{x,y}[\chi_S(x) \chi_T(y) \chi_U (x\cdot y)] = \sum_S \hat{f}^3 (S)$$
and
$$1-\delta = \frac{1}{2} + \frac{1}{2} E_{x,y} [f(x)f(y)f(xy)] = \frac{1}{2} + \frac{1}{2}\sum_S \hat{f}^3 (S).$$
\end{proof}

\begin{theorem}
If $f $ is $\epsilon$-far from linear, then $\delta=\Pr_{x,y}[f(x)f(y)f(x\cdot y) \not= 1] \geq \epsilon$. 
\end{theorem}

\begin{proof}\\
We will prove Theorem 10 by proving its contrapositive; we will assume that $\delta < \epsilon$,
and demonstrate that this assumption implies that $f$ is $\epsilon$-close. \\

The Main Lemma implies that
\begin{eqnarray*}
1 - \delta &\leq& \frac{1}{2} + \frac{1}{2} \sum_S \hat{f}^3 (S) \\
1 - 2\delta &\leq& \sum_S \hat{f}^3 (S) \\
&\leq& \left( \max_S \hat{f}(S) \right) \sum_S \hat{f}^2 (S) = \max_S \hat{f}(S),
\end{eqnarray*}
Let $T = \mathop{\arg\max}\limits_S \hat{f}(S)$. 
We have
$$1 - 2\delta \le \hat{f}(T),$$
and by Fact \ref{fact2},
\begin{eqnarray*}
\dist(f,\chi_T) = \frac{1}{2} - \frac{1}{2}\hat{f}(T) < \frac{1}{2} - \frac{1}{2}(1 - 2\delta) = \delta < \epsilon.
\end{eqnarray*}
Therefore, $f$ is $\epsilon$-close to a linear function; an impossibility.
\end{proof}

\end{document}