\documentclass[10pt]{article}
\usepackage{amssymb}

\newtheorem{define}{Definition}
%\newcommand{\Z}{{\mathbb{Z}}}
%\usepackage{psfig}

\oddsidemargin=0.15in
\evensidemargin=0.15in
\topmargin=-.5in
\textheight=9in
\textwidth=6.25in

\newcommand{\plurality}{{\rm plurality}}

\begin{document}
\input{preamble.tex}

%{lecture number}{lecture date}{Ronitt Rubinfeld}{Your name}
\lecture{3}{February 15, 2006}{Ronitt Rubinfeld}{David Shin}

%%%% body goes in here %%%%

\section{Linearity testing (continued)}

We begin with some definitions and notation from last time.  Throughout these notes,
$G$ will always represent a finite group, and $F$ will represent a (not necessarily finite)
group.  
\begin{definition}
A function $f: G \rightarrow F$ is {\em linear} if $f(x)+f(y)=f(x+y)$ $\forall x,y \in G$
(also referred to as a {\em homomorphism}).  
\end{definition}

\begin{definition}
The {\em probability of group law failure} of a function $f: G \rightarrow F$ is
$\Pr_{x,y}[f(x)+f(y)\neq f(x+y)]$.  We use $\delta_f$ to denote
this probability.  
\end{definition}

\begin{definition}
A function $f: G \rightarrow F$ is {\em $\epsilon$-linear} if 
there exists a linear function $g : G \rightarrow F$ s.t. 
$\Pr_x[f(x)=g(x)] \geq 1-\epsilon$.  Let $\epsilon_f$ denote the smallest value of
$\epsilon$ for which the given inequality holds.  
\end{definition}
Recall the $\epsilon$-linear testing problem: we wish to 
determine whether a function $f: G \rightarrow F$ is $\epsilon$-linear.  The algorithm
should adhere to the following specifications:
\begin{enumerate}
\item If $f$ is linear, it should output $pass$ with probability 1.
\item If $f$ is not $\epsilon$-linear, it should output $fail$ with high probability.
\end{enumerate}
The proposed test from last time was the following:

\medskip

\noindent
\textbf{Algorithm $A$} \\
\DO~$r$ times:\\
\indent pick $x,y\in_R G$\\
\indent \IF~$f(x)+f(y)\neq f(x+y)$ output $fail$\\
output $pass$

\medskip

It is straightforward to show by the sampling theorem that we referred to 
last time as the ``gap bound'' that 
if we set $r=\frac{c}{\delta}\log\frac{1}{\beta}$, algorithm $A$ outputs $pass$ with
probability 1 if $f$ is linear, and outputs $fail$ with probability at least
$1-\beta$ if $\delta_f \geq \delta$.\footnote{Using Chernoff bounds, one can actually
distinguish the case when $f$ is $\epsilon'$-linear from the case when $f$ is
not even $\epsilon''$-linear for constants $\epsilon'<\epsilon''$ in a similar
number of samples.}
To complete the analysis of this algorithm, then, we must simply establish some relationship between
$\delta_f$ and $\epsilon_f$.  Such a relationship, however,
is not as straightforward as one might expect.  To illustrate, Coppersmith's
example from last time shows that $\epsilon_f$ can be as big as $1/3$ when
$\delta_f = 2/9$, while the following theorem, which we will not prove,
implies that $\epsilon_f$ must be smaller than $1/9$ if $\delta_f < 2/9$:
\begin{theorem}[Coppersmith] 
If $\delta_f < 2/9$, then $f$ is $\delta/2$-linear.
\end{theorem}
One might capture this unusual phenomenon by stating that the function 
$$\alpha(\delta) = \sup_{\delta_f \leq \delta} \epsilon_f$$ is discontinuous, where the
supremum is taken over all groups $G$ and all functions $f$ with domain $G$.  

Instead of proving Coppersmith's stronger result, we prove the following:
\begin{theorem}
If $\delta_f < 1/16$, then $f$ is $2\delta_f$-linear.
\end{theorem}
\begin{remark}
Professor Rubinfeld points out that the first proof of this kind applied only to
infinite groups.  As more proofs came along (using a variety of techniques), they 
increased in generality with respect
to the domains that they applied to -
next came finite cyclic groups, then finite abelian groups, and finally
finite non-abelian groups.  Later results decreased in generality with respect
to the domains, but applied to more general ranges of $\delta$.
\end{remark}

\begin{proof}
We introduce some further definitions and notation.  For any function $q: G \rightarrow F$, 
let $\plurality_{s}[q(s)]$ denote the element of $F$ that occurs most often in the
multiset obtained by applying $q$ to each element of $G$.  Ties are broken arbitrarily. 
Then, define the function $g : G \rightarrow F$ by $g(x)=\plurality_y[f(x+y)-f(y)]$.
Finally, call an element $x\in G$ {\em $\rho$-good} if
$\Pr_y[g(x)=f(x+y)-f(y)] > 1-\rho$.

We now give some claims.
\begin{quote}
\begin{claim}\label{claim1}
If $\rho<1/2$, then $\Pr_x[x \hbox{ is } \rho\hbox{-good and } g(x)=f(x)] > 1-\delta_f/\rho$.
\end{claim}

\begin{claim}\label{claim2}
If $\rho<1/4$ and $x,y$ are $\rho$-good,
\begin{enumerate}
\item $x+y$ is $2\rho$-good
\item $g(x)+g(y)=g(x+y)$.  
\end{enumerate}
\end{claim}

\begin{claim}\label{claim3}
If $\delta_f <1/16$, then $x$ is $4\delta_f$-good $\forall x$.  
\end{claim}

Before we prove these claims, let us see how we can use them to prove the theorem.
If $\delta_f<1/16$, Claim \ref{claim3} implies that $x$ is $4\delta_f$-good
for all $x \in G$.  If we set $\rho$ to any number between $4\delta_f < 1/4$
and $1/2$, then, Claim \ref{claim1} implies that $\Pr_x[g(x)=f(x)] > 1-\delta_f/\rho$.  
In particular, setting $\rho$ arbitrarily close to $1/2$ yields 
$\Pr_x[g(x)=f(x)] > 1-2\delta_f$.  Finally, since $4\delta_f<1/4$, Claim \ref{claim2}
implies that $g$ is in fact linear.  These last two facts imply that $f$ is $2\delta_f$-linear
as desired.  

We now proceed with the proofs of the claims.  

\begin{proofof}{Claim \ref{claim1}}
Suppose $\rho<1/2$.  
Let $\alpha_x = \Pr_y[f(x)\neq f(x+y)-f(y)]$.  Note that if $\alpha_x < 1/2$, then
$f(x)=f(x+y)-f(y)$ with probability strictly greater than 1/2; i.e.,
$f(x) = \plurality_y[f(x+y)-f(y)] = g(x)$.  If $\alpha_x < \rho$, then, we 
have that $x$ is $\rho$-good.  It suffices to show that 
$\Pr[\alpha_x \geq \rho] < \delta_f/\rho$.  But this follows directly from
Markov's inequality\footnote{Markov's inequality states that for any positive  random variable $Y$
and constant $a>0$, $\Pr[Y\geq a\cdot \E[Y]] \leq 1/a$.}, since
\begin{eqnarray*}
\E[\alpha_x] &=& \frac{1}{|G|} \sum_{x\in G} \Pr_y[f(x)\neq f(x+y)-f(y)] \\
&=& Pr_{x,y} [f(x)\neq f(x+y)-f(y)] \\
&=& \delta_f.
\end{eqnarray*}
\end{proofof}

\begin{proofof}{Claim \ref{claim2}}
Assume that $\rho<1/4$ and that $x,y$ are $\rho$-good.  
This means that
\begin{eqnarray}
\label{eq1}\Pr_z[g(y)\neq f(y+z)-f(z)] &\leq& \rho \\
\label{eq2}\Pr_z[g(x)\neq f(x+y+z)-f(y+z)] &\leq& \rho
\end{eqnarray}
The latter equation simply follows from the fact that for any fixed $y$,
the distribution of $z+y$ over uniformly chosen $z$ is again uniform.  
Now let $h = g(x)+g(y)$.
With some basic manipulation, we find:
\begin{eqnarray}
\nonumber && \Pr_z[h \neq f(x+y+z)-f(z)] \\
\nonumber &=& \Pr_z[g(x)+g(y) \neq (f(x+y+z)-f(y+z))+(f(y+z)-f(z))] \\
\label{union}&\leq& \Pr_z[g(x) \neq f(x+y+z)-f(y+z)] + \Pr_z[g(y) \neq f(y+z)-f(z)] \\
\label{use12}&\leq& 2\rho.
\end{eqnarray}
Here, (\ref{union}) follows from the union bound, and (\ref{use12}) follows from
(\ref{eq1}) and (\ref{eq2}).  Thus, we have that 
$\Pr_z[h = f(x+y+z)-f(z)] \geq 1-2\rho > 1/2$, implying that 
$g(x+y)=h=g(x)+g(y)$ and that $x+y$ is $2\rho$-good.  
\end{proofof}        

\begin{proofof}{Claim \ref{claim3}}
Let $x$ be fixed.  Claim \ref{claim2} implies that if there exists some $y$ such that
$y$ and $x-y$ are both $2\delta_f$-good, then $x$ is $4\delta_f$-good.  It suffices to 
show that such a $y$ must exist.  We use the probabilistic method.  Applying Claim 
\ref{claim1} twice with $\rho=2\delta_f$ with the union bound, we find
$$\Pr_y[y \hbox{ and } x-y \hbox{ are } 2\delta_f\hbox{-good}] > 
1-\frac{\delta_f}{2\delta_f} - \frac{\delta_f}{2\delta_f} = 0.$$
Here, we are again making use of the fact that for any fixed $y$,
the distribution of $z+y$ over uniformly chosen $z$ is again uniform.  
\end{proofof}        
\end{quote}        
This proves the three claims, and hence proves the theorem. 
\end{proof}        

\paragraph{Testing properties of convolutions of distributions}
As an aside, observe that if we let $\mathcal{F}$ denote the distribution
induced by selecting $x\in_R G$ and outputting $f(x)$, then the linearity condition
($f(x+y)=f(x)+f(y)$) can be expressed as $\mathcal{F} = \mathcal{F} * \mathcal{F}$;
i.e., $\mathcal{F}$ is equal to its own self-convolution.  It can be shown
that the only distributions that satisfy this relationship are
distributions that are uniform over a subgroup of $G$
(including the 
distribution that puts all weight on zero and the uniform distribution).
It can also be shown that 
tests for $\epsilon$-linearity are intimately related to the problem of
testing whether a distribution $\mathcal{F}$
is {\em approximately} equal to its self convolution (according to some
well chosen metric).  

\section{Fourier Representations}

In this setting, we focus on functions $f:G\rightarrow F$ with $G=\{\pm1\}^n$
and $F=\{\pm1\}$.  Our group operator will be component-wise multiplication 
(represented by $\cdot$).  
%In addition, we define the {\em inner product} of $x$ and $y$, notated 
%$\langle x,y\rangle$, as $\prod_i x_iy_i$.  

It turns out that there are exactly $2^n$ linear functions from $G$ to $F$.  Namely,
for each subset $S \subseteq [1\ldots n]$, the function $\chi_S(x) = \prod_{i\in S} x_i$
is linear.  One can easily check that these functions are in fact linear.

What happens if we apply a single iteration of algorithm $A$ to test the linearity of a function $f$ 
in this setting?  It is easy to see that 
$$f(x)f(y)f(x\cdot y) = \cases{1 & \hbox{if test passes} \cr -1 & \hbox{if test fails}}$$
This suggests the introduction of an indicator function:
$$f'(x,y) = \frac{1-f(x)f(y)f(x\cdot y)}{2} = 
\cases{0 & \hbox{if test passes} \cr 1 & \hbox{if test fails,}}$$
giving us the nice relation
$$\Pr[\hbox{algorithm $A$ fails}] = E_{xy}[f'(x,y)].$$
At this point, we must turn to the tools of Fourier analysis.  For now, we lay
out some of the basic concepts.

We begin with widening our current framework to include functions $f:G \rightarrow F$
with $F=\mathbb{R}$ ($G$, as before, is $\{\pm1\}^n$).  If
we let $\mathbb{G}$ represent the set of all such linear functions, it turns
out that $\mathbb{G}$ is a vector space with dimension $2^n$.  We can 
characterize this vector space with the obvious basis $\{e_a(x)\}_{a\in G}$, where
$$e_a(x) = \cases{1 & \hbox{if $x=a$} \cr 0 & \hbox{else}}.$$
A less obvious, but more useful basis, is $\{\chi_S\ |\ S \subseteq [1\ldots n]\}$.
We conclude these notes by proving orthnormality of this proposed basis (but not the required
spanning property).  First, we need some notion of an inner product.  For this,
we define the inner product of $f$ and $g$ to be 
$$\langle f,g \rangle = \frac{1}{2^n}\sum_{x\in\{\pm1\}^n} f(x)g(x).$$
One can easily verify that this satisfies the required properties of an inner product
(namely, linearity, symmetry, and positive definiteness).  
\begin{quote}
\begin{claim}
$\langle \chi_S,\chi_S\rangle = 1$ for all $S \subseteq [1\ldots n]$.  
\end{claim}
\begin{proof}
Note that $\langle \chi_S,\chi_S\rangle = \frac{1}{2^n}\sum_x \chi_S^2(x)$.
Since $\chi_S^2 \equiv 1$, the sum reduces to $\sum_x 1 = 2^n$, which gives the
desired result.  
\end{proof}

\begin{claim}
$\langle \chi_S,\chi_T\rangle = 0$ for all $S,T \subseteq [1\ldots n]$ with $S \neq T$.
\end{claim}
\begin{proof}
Note that 
\begin{eqnarray*}
\langle \chi_S,\chi_T\rangle &=& \frac{1}{2^n}\sum_x \chi_S(x)\chi_T(x) \\
&=& \frac{1}{2^n}\sum_x \prod_{i\in S} x_i \prod_{j\in T} x_j
\end{eqnarray*}
Now, we may write $S \cup T$ as the disjoint union $S \cup T = (S\cap T) \cup \Delta$,
where $\Delta \neq \emptyset$.  Additionally, if we define the {\em $\Delta$-conjugate} of
$x$, $\Delta(x)$, to be the
element of $G$ taken by switching the $\min(\Delta)$-th coordinate of $x$, 
we may note that summing over all $x$
is equivalent to summing over all $\Delta(x)$.  This is because $G$ can be partitioned
into $\Delta$-conjugate pairs.  The sum then becomes
\begin{eqnarray*}
&& \frac{1}{2^n}\sum_x \prod_{i\in S \cap T} x_i^2 \prod_{j\in \Delta} x_j \\
&=& \frac{1}{2^n}\sum_x \prod_{j\in \Delta} x_j \\
&=& \frac{1}{2^{n+1}}\bigg(\sum_x \prod_{j\in \Delta} x_j + \sum_x \prod_{j\in \Delta} \Delta(x)_j \bigg) \\
&=& \frac{1}{2^{n+1}}\sum_x (1+(-1))\prod_{j\in \Delta \atop j \neq \min(\Delta)} 2x_j \\
&=& 0.
\end{eqnarray*}
\end{proof}
\end{quote}

\end{document}