
\documentclass[10pt]{article}
\usepackage{amssymb}
\newtheorem{define}{Definition}
%\newcommand{\Z}{{\mathbb{Z}}}
%\usepackage{psfig}

\oddsidemargin=0.15in
\evensidemargin=0.15in
\topmargin=-.5in
\textheight=9in
\textwidth=6.25in

\begin{document}
\input{preamble.tex}

%{lecture number}{lecture date}{Ronitt Rubinfeld}{Your name}
\lecture{0}{May 8, 2007}{Ronitt Rubinfeld}{Erdong Chen}

%%%% body goes in here %%%%
\section{Linear Test}

Last time, we showed: over some other domain, $\exists f$ s.t.
$\delta_f = 2/9$, but $f$ is $2/3$-far from linear.

This time, we will consider boolean functions, and analyze it via
fourier analysis.

For boolean functions $f: \left\{\pm 1\right\}^n \rightarrow
\left\{\pm 1\right\}$ , we define $\delta_f = Pr[f(x)f(y)\ne
f(x\odot y)]$, where $\odot$ is coordinate-wise multiplication.

The multiplication rules:

\begin{tabular}{|l|c|c|} \hline
   X            &  1 & -1\\ \hline
   1 &  1 & -1  \\ \hline
   -1 &  -1 & 1  \\ \hline
\end{tabular}

Then we will define \textbf{linear function}: $\chi_S(x) =
\sum_{i\in S}x_i$ for any $S\subset \left\{1..n\right\}$.

The following analysis are based on the above domains and functions.

It is easy to see the following,

\begin{equation}
f(x)f(y)f(x\odot y) =  \left\{ \begin{array}{ll}
1 \textrm{ if test accepts on }x,y \\
-1 \textrm{ if test fails on }x,y \\
\end{array} \right.
\end{equation}

Equivalently, we have
\begin{equation}
\frac{1- f(x)f(y)f(x\odot y)}{2} =  \left\{ \begin{array}{ll}
0 \textrm{ if test accepts on }x,y \\
1 \textrm{ if test fails on }x,y \\
\end{array} \right.
\end{equation}

We will prove that this is an indicator variable, so we have the
rejection probability: $\delta_f = E[\frac{1- f(x)f(y)f(x\odot
y)}{2}]$.

\section{Fourier analysis on hypercube}
Let's consider the first class of functions, vector space of all
$n$-bit functions mapping to $\Re$, $G = \left\{g|g:\left\{\pm
1\right\}^n\rightarrow \Re \right\}$.

We also have $\textrm{dim}(G) = 2^n$, and we also know that all
functions can be written in combination of $2^n$ basis functions(We
will not prove this in the class).

Let's see some examples.

\textbf{Example 1}

indicator function:
\begin{equation}
e_a(x) =  \left\{ \begin{array}{ll}
1  \textrm{ if  }x = a \\
0 \textrm{ o.w. }\\
\end{array} \right.
\end{equation}

A function  $g$(can be represented by a $2^n$-bit vector) is a
weighted combination of basis functions, i.e., $g=\sum_{a}
e_a(x)g(a)$, where $g(a)$ is a scalar.

\textbf{Example 1}

Inner product: $<f,g> = \frac{1}{2^n} \sum_{x\in\left\{\pm
1\right\}^n} f(x)g(x)$.

Basis $\chi_S$ is orthonogonal w.r.t. inner product.

First, $<\chi_S,\chi_S> = \frac{1}{2^n} \sum_{x\in\left\{\pm
1\right\}^n} \chi_S(x)\chi_S(x) = 1$.

Secondly, if $S\ne T$, we have
\[<\chi_S,\chi_T> = \frac{1}{2^n} \sum_{x\in\left\{\pm 1\right\}^n}
\chi_S(x)\chi_T(x) = \frac{1}{2^n} \sum_{x\in\left\{\pm 1\right\}^n}
\prod_{i\in S} x_i \prod_{i\in T} x_i \]\[ = \frac{1}{2^n}
\sum_{x\in\left\{\pm 1\right\}^n} \prod_{i\in S\Delta T} x_i (*)
\]

If $i\in S \cap T$, $x_i^2 = 1$, so drop out. The symmetric
difference $S \Delta T$ is non-empty, because $S\ne T$.

pick $j\in S\Delta T$, we have
\[(*) = \frac{1}{2^n} \sum_{x\in\left\{\pm 1\right\}^n}
\prod_{j \in S\Delta T} (x_j^{(1)} + x_j^{(-1)})\], where
\[x_j^{(1)} = (x_1,x_2,\ldots,x_{j-1},1,x_{j+1},\ldots,x_n)\]\[x_j^{(-1)} =
(x_1,x_2,\ldots,x_{j-1},-1,x_{j+1},\ldots,x_n)\]

Thus, we have \[(*) = \frac{1}{2^n} \sum_{\textrm{ pairs
}x^{(1)},x^{(-1)} w.r.t. j} (1 * \prod_{i \in S\Delta T \setminus
\left\{j\right\}} x_i^{(1)} + (-1) * \prod_{i \in S\Delta T
\setminus \left\{j\right\}} x_i^{(-1)})\].

Because $  x_i^{(1)} = x_i^{(-1)}$ for$i \in S\Delta T \setminus
\left\{j\right\}$,we have $(*) = 0$.

Thus, any function $f$ is expressible as linear combination of
${\chi_S}$, a.k.a. "Fourier coefficients".

Def: $\hat{f}(S)\equiv <f,\chi_S> = \frac{1}{2^n} \sum_{x} f(x)
\chi_S(x)$.

\begin{theorem}
$f(x) = \sum_{S} \hat{f}(S) \chi_S(x)$
\end{theorem}

We first consider the Fourier coefficients of linear functions. $f$
is linear $\leftrightarrow \exists S \subseteq [n]$ s.t. $\hat{f}(S)
= 1$ and $\forall T \ne S, \hat{f}(T) = 0$.

\begin{lemma}
$forall S \subseteq [n], \hat{f}(S) = 1- 2 \textrm{dist}(f,\chi_S)$,
 where $\textrm{dist}(f,\chi_S) = \textrm{Pr}_x[f(x)\ne \chi_S(x)]$.
\end{lemma}
\begin{proof}
$2^n \hat{f}(S) = \sum f(x) \chi_S(x) = \sum_{x \textrm{ s.t. }f(x)
= \chi_S(x)} 1 + \sum_{x \textrm{ s.t. } f(x) \ne \chi_S(x)} (-1) =
2^n (1 - \textrm{dist}(f,\chi_S(x))) - 2^n \textrm{dist}(f,\chi_S) =
2^n(1-2 \textrm{dist}(f,\chi_S(x)))$.
\end{proof}

\textbf{Example 2}

$ \forall x, f(x) = -1$

$\forall S \ne \emptyset$, for $j \in S$, $f(x_1,
x_2,\ldots,x_{j-1},1,x_{j+1},\ldots,x_n)=a$, and $f(x_1,
x_2,\ldots,x_{j-1},-1,x_{j+1},\ldots,x_n)=-a$.

thus $\textrm{dist}(f,\chi_S) = 1/2$, so $\hat{f}(S) = 0$.

On the other hand, $\textrm{dist}(f,\chi_\emptyset) = 1,
\hat{f}(\emptyset) = -1$.

\begin{lemma}
Two distinct linear functions differ on exactly half points
\end{lemma}
\begin{proof}
for $S\ne T, 0 = <\chi_S,\chi_T> = 1 - 2
\textrm{dist}(\chi_S,\chi_T)$, so $\textrm{dist}(\chi_S,\chi_T) =
1/2$
\end{proof}

$<f,g> = <\sum \hat{f}(S) \chi_S, \sum \hat{g}(T), \chi_T> =
\sum_{S,T} \hat{f}(S) \hat{g}(T) <\chi_S,\chi_T> = \sum_S
\hat{f}(S)\hat{g}(S)$, it is called "\textbf{Plancherel}".

$<f,f> = \sum_S \hat{f}(S)^2$, it is called "\textbf{Parseval's
Identity}".

$f$ is boolean(i.e. $f(x) \in \left\{\pm 1\right\},<f,f> =
\frac{1}{2^n} \sum \hat{f}(x)^2 = 1$. We'll call it "\textbf{Boolean
Parseval's}"



\begin{theorem}
$f$ is $\delta_f$-close to some linear function.
\end{theorem}
\begin{proof}

If $S=T=U,\chi_S(x) \chi_T(y) \chi_U(x\odot y) = \prod_{i\in S} x_i
y_i (x\odot y)_i = \prod_{i\in S} x_i y_i x_i y_i = 1$.

If $\neg (S=T=U)$, then either $S \ne U$ or $T \ne U$.

$ E_{x,y} [\chi_S(x) \chi_T(y) \chi_U(x \odot y)] = E_{x,y}
[\prod_{i\in S} x_i \prod_{j \in T} y_j, \prod_{k \in U} x_k \prod{l
\in U} y_l] = E_{x,y} [\prod_{i \in S \Delta U} x_i \prod_{j \in
T\Delta U} y_j] = E_x[\prod_{i \in S \Delta U} x_i] E_y[\prod_{j \in
T \Delta U} y_j] (**)$

If $S \ne U$, then $S\Delta U \ne \emptyset$, so $E_x[\chi_{S \Delta
U}(x)] = 0$. If $T \ne U$, then $E_y[\chi_{T \Delta U}(y)] = 0$.

Thus, $(**) = 0$ because either $S \ne U$ or $T \ne U$.

\[E_{x,y}[f(x) f(y) f(x\odot y)] = E_{x,y}[\sum_{S} \hat{f}(S) \chi_S(x) \sum_{T} \hat{f}(T)\chi_T(x) \sum_{U} \hat{f}(U) \chi_U(x\odot
y)]\]
\[= E_{x,y}[\sum_{S,T,U} \hat{f}(S) \hat{f}(T) \hat{f}(U) \chi_S(x) \chi_T{x} \chi_U(x\odot
y)]\]
\[= \sum_{S,T,U} \hat{f}(S) \hat{f}(T) \hat{f}(U) E_{x,y}[\chi_S(x) \chi_T{x} \chi_U(x\odot
y)]\] because the proof above
\[= \sum_{S} \hat{f}(S) ^3\]
\[\le (\max_S \hat{f}(S)) \sum_{S'} \hat{f}(S')^2 \] by Boolean Parseval's, we have $\hat{f}(S')^2 =
1$,
\[=\max_S \hat{f}(S)\]
\[=1 - 2 \min_S \textrm{dist} (f,\chi_S)\]

so $\delta_f \ge 1/2 - 1/2(1 - 2 \max_S \textrm{dist}  (f, \chi_S))
= \min_S \textrm{dist}(f,\chi_S)$.
\end{proof}


\end{document}
