\documentclass[10pt]{article}
\newtheorem{define}{Definition}
\usepackage{newalg}
%\newcommand{\Z}{{\mathbb{Z}}}
%\usepackage{psfig}

\oddsidemargin=0.15in
\evensidemargin=0.15in
\topmargin=-.5in
\textheight=9in
\textwidth=6.25in

\begin{document}
\input{../preamble.tex}

\lecture{4}{February 15, 2007}{Ronitt Rubinfeld}{Huy Nguyen}

\section{Testing uniformity of a monotone distribution over a totally ordered set}
{\definition Let $D = {1, 2, .., n} = [n]$. D is \underline{monotone} if $p_1 \geq p_2 \geq \cdots \geq p_n$.}

We want to test whether a monotone distribution over a totally ordered set is uniform or far from uniform. Since all totally ordered sets are equivalent, for simplicity, we assume that the domain is [n]. The key idea of the algorithm is to take samples and check if the number of sample in the left half is close to the number of sample in the right half.

{\lemma If $p(\{1\cdots k\}) \leq (1+\epsilon)p(\{k+1\cdots 2k\})$ then
\begin{displaymath}
|p-U_{[n]}| \leq \epsilon
\end{displaymath}}

{\proof $\delta_i \leftarrow |p_i - \frac{1}{n}|$\\
$j \leftarrow$ largest i s.t. $p_i \geq \frac{1}{2k} = \frac{1}{n}$

We only consider the case when $j \leq k$. The case when $j > k$ is similar. 

We define $A_1, A_2, A_3$ as follow:
\begin{displaymath}
A_1 = \sum_{i \leq j} \delta_i
\end{displaymath}
\begin{displaymath}
A_2 = \sum_{j < i \leq k} \delta_i
\end{displaymath}
\begin{displaymath}
A_3 = \sum_{n \geq i > k} \delta_i
\end{displaymath}

$A_1 = A_2 + A_3$ because the sum of distribution p and the uniform distribution are both 1.

\begin{eqnarray}
  p(\{1\cdots k\}) & = & \frac{1}{2} + A_1 - A_2\\
  p(\{k+1 \cdots n\}) & = & \frac{1}{2} - A_3
\end{eqnarray}

Substitute (1) and (2) into the assumption of the lemma, we get:

\begin{displaymath}
1+\epsilon \geq \frac{\frac{1}{2}+A_1-A_2}{\frac{1}{2}-A_3} = \frac{\frac{1}{2}+A_3}{\frac{1}{2}-A_3}
\end{displaymath}

Solve for $A_3$, we get:

\begin{displaymath}
A_3 \leq \frac{\epsilon}{4+2\epsilon}
\end{displaymath}

$A_2$ summed over fewer elements than $A_3$ ($<k$ vs $=k$). 
Since $\delta_{j+1}\leq \delta_{j+2} \leq \cdots $, each element in $A_2$ is smaller then any element in $A_3$. Therefore, 
\begin{eqnarray*}
  A_2 \leq A_3 \leq \frac{\epsilon}{4+2\epsilon} \\
  A_1 = A_2 + A_3 \leq \frac{\epsilon}{2+\epsilon}
\end{eqnarray*}

Hence,
\begin{displaymath}
  |p-U_{[n]}| = A_1+A_2+A_3 \leq \frac{\epsilon}{1+\frac{\epsilon}{2}} \leq \epsilon
\end{displaymath}\qed}

Using the lemma, we have the following algorithm for testing uniformity:

\begin{algorithm}{Test-Uniformity}{p}
  \text{Pick m samples from p,} m = \Theta(\epsilon^{-2})\\
  l \= \text{number of samples in }1\cdots k\\
  r \= \text{number of samples in }k+1\cdots n\\
  \begin{IF}{l \geq (1+\frac{\epsilon}{8})r}
    \RETURN fail
    \ELSE
    \RETURN pass
  \end{IF}
\end{algorithm}

Now we analyze the algorithm. We need to show that it return the correct result both when P is uniform and when P is far from uniform.

\underline{Case 1}: P is uniform. $E[l] = E[r] = m/2$.

\underline{Case 2}: P is $\epsilon$ far from uniform.

\begin{eqnarray*}
  p(\{1\cdots k\}) & \geq & p(\{k+1\cdots n\})(1+\epsilon)\\
  E[l] & \geq & m(\frac{1}{2}+\frac{\epsilon}{4+2\epsilon})
\end{eqnarray*}

The rest of the proof is just computing the variance and applying Chernoff bound to get the desired results.

\section{Testing uniformity of a monotone distribution on Boolean cube}
In the previous section, we had an algorithm that can, in constant time, test the uniformity of a monotone distribution on a totally ordered set. It is desirable to generalize the problem to partially ordered set. In this section, we will consider the special case of that general problem. The partially ordered set we will look at is a Boolean cube.

{\definition A Boolean cube is the set $\{\pm1\}^n$ for some n.}

{\definition $x, y \in \{\pm1\}^n$. 
x is \underline{less than or equal to} y iff $~\forall i~ x_i \leq y_i$.}

{\definition A distribution P is \underline{monotone} iff $~\forall x\leq y, p_x \leq p_y$}

{\noindent \bf Notation.}
\begin{eqnarray*}
  bias(x) & = & \sum_i x_i\\
  \delta(x) & = & 2^n p(x) - 1\\
  \epsilon & = & |p-U|_1
\end{eqnarray*}
Note $\frac{\delta(x)}{2^n} = p(x) - \frac{1}{2^n}$

{\observation $\delta$ is monotone.}
{\observation $\sum_x \delta(x) = \sum_x (2^n p(x)-1) = 2^n(\sum_x p(x))-2^n = 0$.}
{\observation $\sum_x |\delta(x)| = 2^n |p-U|_1 = 2^n\epsilon$}

Now we compute the expectation of the bias:
\begin{eqnarray*}
  E_p[bias] & = &\sum_x p(x)\sum_{i=1}^n x_i\\
  & = & \sum_x \sum_{i=1}^n (\frac{\delta(x)+1}{2^n}x_i\\
\end{eqnarray*}
Since $\sum_{i=1}^n\sum_x x_i = 0$, we have:
\begin{eqnarray*}
  E_p[bias] & = & \sum_x \sum_{i=1}^n \frac{\delta(x)}{2^n}x_i\\
  & = & \underbrace{\frac{1}{2^n} \sum_x \sum_{i=1}^n \delta(x)x_i}_{*}
\end{eqnarray*}
{\lemma $E_p[bias] \geq \epsilon$.}

\proof Let us define a few quantities:
\begin{eqnarray*}
  P & = & \{x|\delta(x) \geq 0\}\\
  Q & = & \{x|\delta(x) < 0\}
\end{eqnarray*}

Proof will be continued next time.


\end{document}