\documentclass[10pt]{article}
\newtheorem{define}{Definition}
\usepackage{amssymb, amsmath}
%\newcommand{\Z}{{\mathbb{Z}}}
%\usepackage{psfig}
\oddsidemargin=0.15in
\evensidemargin=0.15in
\topmargin=-.5in
\textheight=9in
\textwidth=6.25in
\begin{document}
\input{preamble.tex}
\lecture{16}{April 5, 2007}{Ronitt Rubinfeld}{Adriana Lopez}
Recall from last lecture:
\begin{definition}
The {\bf density} between two disjoint non-empty sets $A, B \subseteq V$ is given by the formula $d(A,B) = \frac{e(A,B)}{|A||B|}$, where $e(A,B)$ is the number of edges between $A$ and $B$.
\end{definition}
\begin{definition}
We say $A,B \subseteq V$ are {\bf $\gamma$-regular} if $\forall A' \subseteq A, B' \subseteq B$ such that $|A'| \geq \gamma|A|, |B'| \geq \gamma|B|$, the following inequality holds: $|d(A',B') - d(A,B)| < \gamma$.
\end{definition}
\begin{lemma} \label{SRL} {\bf (Szemeredi's Regularity Lemma)} For all $m$ and for any $\epsilon > 0$, there exists $T=T(m, \epsilon)$ such that if $G = (V,E)$ with $|V| > T$, and if $A$ is an equipartition of $V$ into $m$ sets, then there exists an equipartition $B$, a refinement of $A$, with $k$ sets such that $m \leq k \leq T$ and at most $\epsilon {k \choose 2}$ set pairs are not $\epsilon$-regular.
\end{lemma}
This lemma says that any large enough graph can in some sense have a constant-size description that makes it look random with respect to the densities of set pairs. The constant $T$, however, is very large:
$$T \approx 2^{2^{2^{2^{\cdots}}}} \big\} {\mbox {\tiny $\frac{1}{\epsilon^5}$ times}}$$ We will use the regularity lemma to prove the existence of a constant-time tester for triangle freeness, and therefore the running time of the tester will just as large (constant, but large).
\section{Testing Triangle Freeness}
Recall the following lemma from last lecture. We will use this lemma, along with theorem \ref{tri_thm}, to create a constant-time tester for triangle freeness.
\begin{lemma} \label{find_tri}
For all $\eta > 0$, there exist $\gamma^\triangle = \gamma^\triangle(\eta)$ and $\delta^\triangle= \delta^\triangle(\eta)$ such that if $A,B,C$ are disjoint subsets of $V$ such that each pair is $\gamma^\triangle$-regular and has density at least $\eta$, then $G$ contains at least $\delta^\triangle|A||B||C|$ distinct triangles (each triangle having one vertex in each of $A,B,C$).
\end{lemma}
\begin{theorem} \label{tri_thm}
For all $\epsilon$, there exists $\delta = \delta(\epsilon)$ such that $\forall G=(V,E)$ such that $|V|=n$ and $G$ is $\epsilon$-far from triangle-free in the adjacency matrix model (ie. we must remove at least $\epsilon n^2$ edges to make $G$ triangle-free), then $G$ has at least $\delta {n \choose 3}$ triangles.
\end{theorem}
\begin{corollary}
The exists a one-sided property tester $T$ for triangle-freeness whose running time is independent of the size of the graph $G$.
\end{corollary}
\begin{proof}
Consider the following tester $T$:
\begin{itemize}
\item Do $O(\delta(\epsilon)^{-1})$ times:
\begin{itemize}
\item Pick $v_1, v_2, v_3$ independently and uniformly at random from $V$.
\item If the chosen vertices form a triangle, reject and halt.
\end{itemize}
\item If tester hasn't halted, pass.
\end{itemize}
If $G$ is triangle-free then no matter what vertices are chosen, they will never form a triangle and thus, the $T$ will output pass. If, however, $G$ is $\epsilon$-far from being triangle-free, then by theorem \ref{tri_thm} we know that $G$ has at least $\delta {n \choose 3}$ distinct triangles. Thus,
$$\Pr[\mbox{pick a triangle in one round}] \geq \frac{\delta {n \choose 3}}{{n \choose 3}} \geq \delta$$
Let $c$ be the constant implied by the expression $O(\delta(\epsilon)^{-1})$, so that
$$\Pr[\mbox{$T$ outputs pass}] = \Pr[\mbox{never see a triangle}] \leq (1 - \delta)^\frac{c}{\delta} \tilde e^{-c}< \frac{1}{4}$$
(the last inequality follows from choosing $c$ sufficiently large).
\end{proof}
\begin{proofof}{Theorem \ref{tri_thm}} Take an arbitrary partition of $G$ into $\frac{5}{\epsilon}$ sets, and use Szemeredi's regularity lemma (lemma \ref{SRL}) to find a partition $\{V_1, V_2, \cdots, V_k\}$ such that $\frac{5}{\epsilon} \leq k \leq T(\frac{5}{\epsilon}, \epsilon')$ for $\epsilon' = \min\{\frac{\epsilon}{5}, \gamma^\triangle(\frac{\epsilon}{5})\}$ and such that at most $\epsilon' {k \choose 2}$ pairs are not $\epsilon$-regular.
First notice that the number of nodes in each set is roughly $\frac{n}{k}$ (we will assume from now on that it is exactly $\frac{n}{k}$). Thus,
\begin{equation} \label{size_ineq}
\frac{\epsilon n}{5} \geq \frac{n}{k} \geq \frac{n}{T(\frac{5}{\epsilon}, \epsilon')}
\end{equation}
Now construct a graph $G'$ from $G$ as follows:
\begin{enumerate}
\item delete edges of $G$ internal to any $V_i$ \\
(\# edges deleted in step 1 $\leq n\frac{n}{k} \leq \frac{\epsilon n^2}{5}$)
\item delete edges between non $\epsilon'$-regular pairs $V_i,V_j$ \\
(\# edges deleted in step 2 $ \leq \epsilon' {k \choose 2} (\frac{n}{k})^2 \leq \frac{\epsilon'n^2}{2} \leq \frac{\epsilon n^2}{10} $. This is the reason why we want $\epsilon' \leq \frac{\epsilon}{5}$)
\item delete edges between pairs whose density is less than $\frac{\epsilon}{5}$ ("low density" pairs) \\
(\# edges deleted in step 3 $ \leq \displaystyle\sum_{\substack{\mbox{\scriptsize low density} \\ \mbox{\scriptsize pairs}}} \Big(\frac{\epsilon}{5}\Big) \Big(\frac{n}{k}\Big)^2 \leq \Big(\frac{\epsilon}{5}\Big) {n \choose 2}$. The last inequality follows from the fact that $\sum_{\mbox{\scriptsize all pairs}} (\frac{n}{k})^2 \leq {n \choose 2}$).
\end{enumerate}
The total number of edges we delete is therefore at most $\frac{3\epsilon n^2}{5} + (\frac{\epsilon}{5}){n \choose 2} < \epsilon n^2$. Since $G$ is $\epsilon$-far from being triangle-free, this tells us that $G'$ has at least one triangle. We will prove that in fact, $G'$ has many triangles. Let $x,y,z$ be the vertices of such triangle. Then there exist $i,j,l$ such that $x \in V_i, y \in V_j, z \in V_l$. Furthermore, it must be the case that $i,j,l$ are all distinct since we deleted all edges internal to any $V_i$ (step 1). We also know that all possible pairs between $V_i, V_j, V_l$ have density at least $\frac{\epsilon}{5}$ because we deleted all low density pairs (step 3), and they are $\epsilon'$-regular because we deleted all non $\epsilon'$-regular pairs (step 2). We can therefore use lemma \ref{find_tri} with $\eta = \frac{\epsilon}{5}$. Thus,
\begin{equation*}
\mbox{\# triangles in $G$} \geq \delta^\triangle\Big(\frac{\epsilon}{5}\Big)|V_i||V_j||V_l| \geq \delta^\triangle\Big(\frac{n}{T(\frac{5}{\epsilon, \epsilon'})}\Big)^3 \geq \delta' n^3
\end{equation*}
where $\delta'$ is a function of $\epsilon$ but not $n$ (such function exists). The second inequality follows from equation \ref{size_ineq}.
\end{proofof}
We have shown that we can test triangle freeness in constant time. However, this constant is very large since it depends on the ``tower" constant of the regularity lemma. It is still open whether we can test triangle freeness in ``reasonable" constant time.
\section{Testing in Constant Time}
In the adjacency matrix model, what can be tested in constant time with one-sided error? The answer is all {\it hereditary graph properties}, those that are closed under vertex removal but not edge removal (although vertex removal implies the removal of all edges adjacent to it). Examples of these properties are:
\begin{itemize}
\item all monotone graph properties
\begin{itemize}
\item triangle freeness
\item bipartiteness
\item planarity
\item $k$-colorability
\end{itemize}
\item perfect graph
\item interval graph
\end{itemize}
We also want to know, what can we test in constant time with two-sided error?
In this case, we can test properties that can
be reduced to one of finitely many Szemeredi-type partitions.
%not sure about this vocabulary
For example, there exists a constant-time algorithm that answers the following question: ``given a graph $G$, a constant $c$, and densities $\{d_{ij}\}$, is it possible to partition $G$ into $c$ parts $V_1, V_2, \ldots, V_c$ such that for $i \neq j$, $d(V_i, V_j) = d_{ij}$?
\end{document}