\documentclass[10pt]{article}
\usepackage{amsfonts}
\usepackage{url}
\usepackage{graphicx}
\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{amsmath}
\usepackage{latexsym}
\usepackage{subfigure}
\def\pcp{\text{PCP}}
\def\st{\text{ s.t. }}
\def\acc{\text{ accepts }}
\def\rej{\text{ rejects }}
\def\log{\text{log}}
\def\np{\text{NP}}
\def\yes{\text{Yes}}
\def\no{\text{No}}
\def\sat{\text{SAT}}
\def\max{\text{MAX}}
\def\e{\epsilon}
\def\true{\text{TRUE}}
\def\false{\text{FALSE}}
\def\unsat{\text{UNSAT}}
\def\min{\text{min}}
\def\ggc{\text{GGC}}


\input{preamble.tex}
%\input{orourkemac.tex}
%\usepackage{epsfig}

\begin{document}


\lecture{18}{April 18, 2007}{Madhu Sudan}{Nadia Benbernou}
\section{Probabilistically Checkable Proofs (PCP)}
 The goal of a probabilistically checkable proof is to  verify a proof by looking at only a small number of bits, and probabilistically decide whether to accept or reject.  The two resources which PCPs rely on are randomness and queries.
A Restricted $(r(n),q(n),a(n))$ PCP verifier is a probabilistic polynomial time verifier with oracle access to a proof $\pi$ that 
\begin{enumerate}
\item uses at most $r(n)$ random bits.
\item makes $q(n)$ queries to $\pi$.
\item expects answers of size $a(n)$.
\end{enumerate}

\begin{definition}
A language $L$ is in \emph{$\pcp_{s}[r(n),q(n),a(n)]$} if there exists a restricted $(r(n),q(n),a(n))-$\emph{PCP} verifier $V$ with soundness parameter $s$ such that:
\begin{align*}
&x\in L \Rightarrow \exists\,\pi\st \Pr[V^\pi(x) \acc]=1\\
&x\not\in L \Rightarrow \forall\, \pi,\,\,\,\,\,\,\,\,\,\,\, \Pr[V^\pi(x) \acc]< s.
\end{align*}  
\end{definition}
\begin{theorem}[PCP Theorem]
There exists a global constant $Q$ such that $\forall L\in\np$ there is a constant $c$ such that $L\in\,$\emph{ PCP}$_{1/2}[c\,\log\ n,Q,2]$
\end{theorem}
The PCP Theorem was first proved by Arora, Safra, Arora, Lund, Motwani, Sudan, and Szegedy, then later by Dinur.  

It is easy to see that 
$\np = \cup_{c\in\N}\pcp_{0}[0,n^c,2]$,
since the proof of membership to a language in $\np$ is polynomial in size, so can just query entire proof and then accept or reject.  
We also have $\np = \cup_{c\in\N}\pcp_{1/2}[c\,\log\, n,n^c,2]$,
to see the forward inclusion that $\np\subseteq \cup_{c\in\N}\pcp_{1/2}[c\,\log\, n,n^c,2]$ can simulate $\log\,n$ bits of randomness (just enumerate over all random strings, count the number of accepting and rejecting configurations, and then output decision). 
\section{MAX-k-SAT}
\begin{definition}
The promise problem \emph{MAX-k-SAT} is given by:
\begin{align*}
\Pi_{\yes} &= \{\phi\,|\,\phi \text{ is a $k$-cnf formula and all clauses can be simulaneously satisfied}\}\\
\Pi_{\no} &= \{\phi\,|\,\phi \text{ is a $k$-cnf formula and any assignment satisfies $<1-\epsilon$ fraction of the clauses}\}
\end{align*}
\end{definition} 
\begin{corollary}
There are constants $k,\epsilon>0$ such that $\max-k-\sat$ is NP-hard to approximate within $1-\epsilon$.
\end{corollary}

Applying the PCP-theorem to SAT.  There is a proof system $\pi$ such that verifier can query this string in several locations, and if it says one all the time, then $\phi$ is satisfiable, and if it says zero at least $1/2$ the time, then $\phi$ is unsatisfiable.  
Given a formula $\phi$ as input, we'd like to output a $\max-k-\sat$ instance that is in $\Pi_\yes$ if $\phi$ is satisfiable and is in $\Pi_\no$ if the formula is unsatisfiable.  Enumerate all random strings of length $r(n)$:
$(r_1,\dots,r_{2^{r(n)}})$.
On random string $r_i$, the verifier queries some locations of $\pi$ and computes some function $f_i$ of them, and outputs $0$ or $1$. The function $f_i$ depends only on a constant number of variables $Q$ (the number of queries to $\pi$).
% These functions have the property that if $x$ is in the language, then there is a proof $\pi$ which satisfies all $f_i$ simultaneously.  If $x$ is not in the language, for any proof $\pi$, at least half of the $f_i$ will output 0.

For each $i$, take $f_i$ and write it as a $Q$-cnf formula $\psi_{f_i}$.  $f_i$ is a function of $Q$ variables, so there are at most $2^Q$ clauses in $\psi_{f_i}$.  Combining these formulae over all $i$ into the expanded boolean formula $\psi=\psi_{f_1}\wedge\dots\wedge\psi_{f_{2^{r(n)}}}$, there are at most $2^{r(n)}\cdot 2^Q$ clauses in $\psi$.  These clauses are in the variables of the proof.        
If $\phi$ is satisfialbe, then there is a proof $\pi$ for which all of the $f_i$'s will accept, and hence each clause in $\psi$ is satisfied.  
And if $\phi$ is not satisfiable then for all proofs $\pi$, at least $1/2$ of the $f_i$'s will reject.  Hence for at least half of the $f_i$, there is at least one clause out of the $2^Q$ clauses in $\phi_{f_i}$ which is false.  Hence $\forall \pi$, at least $\frac{1}{2}\cdot\frac{1}{2^Q}$ fraction of the clauses of $\psi$ are unsatisfied.   Thus we have an algorithm which given a formula $\phi$ as input outputs a $\max-Q-\sat$ instance.  


\section{Generalized Graph Coloring (GGC)}
A $GGC$ instance is a $4$-tuple $(V,E,\Sigma,\{c_e\}_{e\in E})$ where $V$ is set of vertices, $E$ is set of edges, $\Sigma$ is set of colors, and $c_e:\Sigma \times \Sigma \rightarrow \{\true,\false\}$ is a constraint on edge $e$.
For example, in a $3$-coloring of a graph, the constraint $c_e$ on each edge $e=(v_i,v_j)$ would just be $A(v_i)\neq A(v_j)$ where $A:V\rightarrow\{0,1,2\}$ is the color assignment to the vertices.

Let $G$ be a GGC instance.  $G$ is satisfiable if $\exists A: V\,\rightarrow\,\Sigma$ such that $\forall e=(v_i,v_j)\in E,\,c_e(A(v_i),A(v_j))=\true.$
The interesting parameter for these graphs is the unsatisfiability of $G$.
Define $$\unsat(G):=\min_{A:V\rightarrow\Sigma}\frac{\# \text{ of edges }(v_i,v_j) \st c_e(A(v_i),A(v_j))=\false }{|E|}.$$
Suppose exists a polynomial transformation $T$ that takes Boolean formulae to $\ggc(a)$ instances (where $a$ is the number of colors) such that:  
\begin{itemize}
\item $\phi\in\sat\,\Rightarrow\,T(\phi)$ is satisfiable (i.e. $\unsat(T(\phi))=0$)
\item  $\phi\not\in\sat\,\Rightarrow\,\unsat(T(\phi))>\e$
\end{itemize}
 Then $\np\subseteq\cup_{c\in\N}\pcp_{1-\e}(c\,\log\,n,2,a)$.  To see why this is true:  the proof is the coloring $A$.  The verifier picks a random edge $(v_i,v_j)$ and queries the $2$ elements $A(v_i)$ and $A(v_j)$, then checks whether this assignment $(A(v_i),A(v_j))$ satisfies the constraint $c_{(v_i,v_j)}$.  

\section{Dinur's Main Theorem}
\begin{theorem}
There is a polynomial time transformation $T: \ggc(16)\rightarrow \ggc(16)$ and an $\alpha>0$ such that:
\begin{itemize}
\item If $G$ is satisfiable, then $T(G)$ is satisfiable.
\item If $\unsat(G)<\alpha$, then $\unsat(T(G))>2\cdot\unsat(G)$.
\item Size$(T(G))=O(|G|)$.
\end{itemize}
\end{theorem}
Initially $G$ may have $\unsat(G)\in{0,1/n^2}$ (i.e. there is a coloring $A$ for which all constraints are satisfied, or for all colorings $A$ there is at least one constraint out of the $n^2$ constraints which is not satisfied--note that $n^2$ is an upper bound on the number of constraints since number of edges is at most $n^2$).  Apply $T$ to $G$ once we amplify this gap to $\unsat(T(G))\in\{0,2/n^2\}$. Apply $T$ a second time to get $\unsat(T(T(G)))\in\{0,4/n^2\}$, a third time to get $\unsat(T\circ T\circ T(G))\in\{0,8/n^2\}$, ..., a logarithmic number of times to get $\unsat(T\circ\dots\circ T(G))\in\{0,\e\}$ for a constant $\e$.  

\begin{definition}
A hypergraph is \emph{$q$-uniform} if each hyperedge involves exactly $q$ vertices.  
\end{definition}
\begin{lemma}
There exists a transformation $$T_1: \ggc(c \text{ colors},q-\text{uniform})\rightarrow \ggc(2 \text{ colors},[\log\,c]\cdot q-\text{uniform})$$ such that $$\unsat(T_1(G))=\unsat(G).$$
\end{lemma}
\begin{proof}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%Figure Begin
\begin{figure}[htbp]
\centering
\includegraphics[width=0.75\linewidth]{Scribe2.eps}
\caption{}
\figlab{T1}
\end{figure}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%Figure End
For a vertex $v_i$ of a hyperedge in $G$ with $A(v_i)\in\{1,2,\dots,c\}$, map it to $x$ vertices $(v'_{i_1},\dots,v'_{i_x})$where  $x=x_1\dots x_{\log\,A(v_i)})$ is the binary representation the color $A(v_i)$ (hence $x$ is at most $\log\,c$) and assign each vertex $v'_{i_j}$ the color corresponding to its bit $x_j$ in the binary representation of $c$.  See Fig.~\figref{T1}.  Hence a $q$-hyperedge which is $c$-colored maps to a hyperedge containing $o(\log\,c)\cdot q$ vertices which are $2$-colored. 
\end{proof}

\begin{lemma}
There exists a transformation 
$$T_2: \ggc(2 \text{ colors},q-\text{uniform})\rightarrow \ggc(2 \text{ colors},3-\text{uniform})$$ such that:
$$\unsat(G)=0\,\Rightarrow\,\unsat(T_2(G))=0,$$
$$\unsat(T_2(G))>\frac{1}{2^{q+2}}\cdot\unsat(G).$$
\end{lemma} 

\begin{lemma}
There exists a transformation 
$$T_3: \ggc(c \text{ colors},q-\text{uniform})\rightarrow \ggc(c^q \text{ colors},2-\text{uniform})$$ such that:
$$\unsat(G)=0\,\Rightarrow\,\unsat(T_3(G))=0,$$
$$\unsat(T_3(G))>\frac{1}{q}\unsat(G).$$
\end{lemma}
\begin{proof}
Given a $c$-coloring of a $q$-uniform hypergraph $G$, we create a graph $T_3(G)$ with vertices consisting of $V(G)$ and an additional vertex corresponding to each hyperedge of $G$.  We join each hyperedge vertex $e_h$ to the $q$ vertices involved in the hyperedge $h=(v_1,\dots,v_q)$.  So the number of edges in $T_3(G)$ is $q\cdot|E|$, where $|E|$ is number of edges in $G$.  See Fig.~\figref{T3}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%Figure Begin
\begin{figure}[htbp]
\centering
\includegraphics[width=0.75\linewidth]{Scribe.eps}
\caption{}
\figlab{T3}
\end{figure}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%Figure End

And we assign ``vertex set'' vertices one of $c$ colors, and hyperedge vertices $e_h$ a $q$-tuple of colors where each position of the tuple can take on one of $c$ colors.   Let $A':V\rightarrow [c]\times\dots\times[c]=[c]^q$ be a $c^q$ coloring of $T_3(G)$.   

For an edge $(v_{i},e_{h})$ in $T_3(G)$ where $h=(v_{1},\dots,v_{i},\dots,v_{q})$, define the edge constraint for $T_3(G)$ to be:

\begin{enumerate}
\item $A'(v_{j})$ is the color corresponding to the $j^{th}$ position of $A'(e_{h})$.
\item The $q$-tuple of colors assigned to $e_{h}$  satisfy the hyperedge constraint for $G$ (i.e. $c(A'(e_h))=\true$).  
\end{enumerate}
The first constraint ensures that for an edge $(v_j, e_h)$ we have $A'(e_h)$ of the form $(A(v_1),\dots,A(v_j),\dots, A(v_q))$ where color assigned to the $j^{th}$ position of $A'(e_h)$ matches the color assigned to $v_j$ $A(v_j)$.  

Let $$\unsat(G)=\min_{A:V\rightarrow [c]}\frac{\# \text{of unsatisfied hyperedges in $G$}}{|E|}=\e.$$  Suppose for contradiction that $$\unsat(T_3(G))<\frac{\e}{q}.$$  We have
$$\unsat(T_3(G))=\min_{A':V\rightarrow [c]^q}\frac{\# \text{of unsatisfied edges in $T_3(G)$}}{q\cdot|E|}<\frac{\e}{q}.$$
Hence the $\# \text{unsatisfied edges in } T_3(G)<\# \text{unsatisfied edges in } G.$
If we apply the coloring of the ``vertex set'' vertices in $T_3(G)$  to the vertices of $G$, then each unsatisfied edge in $T_3(G)$ will yield at most one unsatisfied edge in $G$ (namely if the second condition of $T_3(G)$ is violated then the hyperedge in $G$ will not be properly colored).  And all other hyperedges of $G$ will be satisfied since satisfied edges $(v_i,e_h)$ in $T_3(G)$ correspond to satisfied hyperedge $e_h$ in $G$.  But this induces a $c$-coloring of $G$ with $$\unsat(G)\leq \frac{\# \text{of unsatisfied edges in $T_3(G)$}}{|E|}<\e,$$
contradictng the fact that $\unsat(G)=\e$
\end{proof}
\end{document}