\documentclass[10pt]{article}
\usepackage{amssymb}

\newtheorem{define}{Definition}
%\newcommand{\Z}{{\mathbb{Z}}}
%\usepackage{psfig}

\oddsidemargin=0.15in
\evensidemargin=0.15in
\topmargin=-.5in
\textheight=9in
\textwidth=6.25in

\newcommand{\plurality}{{\rm plurality}}

\begin{document}
\input{preamble.tex}

\lecture{8}{March 6,2006}{Ronitt Rubinfeld}{Matthew Ince}

\section{DNF formulas}
 
\begin{definition} 
Given $n$ variables $x_1, \dots , x_n$, a \emph{DNF formula} $F = F_1
 \vee F_2 \vee \dots \vee F_m$ on $m$ clauses and $n$ variables is a
 boolean formula where each clause $F_i$ is of the form $F_i = y_{j_1}
 \wedge y_{j_2} \wedge \dots$, and where the $y_j$ are literals $x_k$
or $\overline{x_k}$.
\end{definition}


Our goal is to uniformly randomly generate satisfying assignments of
\emph{DNF formulas}. Every non-trivial DNF formula has a satisfying
assignment, because satisfying the formula reduces to satisfying a
single clause $F_i$.  For instance, to satisfy the formula
$$F = x_1 x_2 \overline{x_3} \vee \overline{x_1} x_2 x_4, $$ we could
satisfy the first clause by choosing $x_1 = x_2 = T$, $x_3 = F$. As an
aside, note that if the $\vee$ are replaced by XORs $\oplus$, then $F$
becomes a polynomial in the variables over $\mathbb{Z}_2$, and finding
satisfying assignments reduces to random polynomial zero-finding.


Not surprisingly, generating satisfying assignments for a DNF formula
is closely related to counting the number of such assignments. However,
exact answers to this problem are difficult to 
obtain: the negation of a DNF formula is a so-called CNF formula, 
\emph{e.g.} $(x \vee y \vee \overline{z} ) \wedge (x \vee \overline{x} \vee y)$.
CNF formulas are the subject of the famous $3CNF-SAT$ problem, which
shows that finding satisfying assignments for CNF formulas with three
variables per clause is NP-complete. Since counting the number
of satisfying assignments of a DNF formula would reveal the existence
of a satisfying assignment of its negation, counting the number
of assignments is a problem of class \#P. 

We first find satisfying assignments when $m=1$. In this case, $F$
only has a single clause, \emph{e.g.}  $F_1 = x_1 x_2 \overline{x_3}$. We may
generate all satisfying assignments of this clause by choosing $x_1 =
T, x_2 = T,x_3 =F$, and arbitrary values for each other $x_i$.  Note
that there are $2^{n-3}$ satisfying assignments in all.

If we have more than one clause, we could simply pick a clause,
then pick a random satisfying assigment for that clause. However,
this procedure is biased toward assignments satisfying several
different clauses. Because we want a uniform distribution of outputs,
we use a slightly more complicated selection routine.
For convenience, let $S_i$ be the set of assignments satisfying $F_i$.

\medskip

\begin{tabbing}
\bf{Algorithm A} \\
To  randomly \= generate $\pi$ satisfying $F$: \\
\> Step i: \= Pick $i \in [m]$ with probability $\frac{|S_i|}{\sum |S_i|}$. \\
\> \> Then pick a random satisfying assignment $\pi$ of $F_i$. \\
\> Step ii: \= Compute $\ell = |\{ j \in \{1,2,\dots,m\}: \pi \in S_j \}|$.\\
\> \> Then \= toss a coin with bias $1/\ell$. \\
\> \> \> If the coin is ``Heads'', OUTPUT $\pi$ and halt.\\
\> \> \> Otherwise, restart at step I.\\
\end{tabbing}

\medskip

Intuitively, step i is the naive selection routine, and step ii 
compensates for assignments $\pi$ in several $S_i$: if $\pi$ is in
$\ell$ different sets $S_i$, then each of these $S_i$ should be $1/\ell$ times
as likely to select $\pi$ to ensure a uniform distribution. We now
prove some claims about algorithm A:
\begin{claim}\label{claim1}
Algorithm A outputs satisfying assignments uniformly at random.
\end{claim}

\begin{proofof} {Claim \ref{claim1}}
It suffices to show that each loop iteration is equally likely to output
all satisfying assignments $\pi$.  For a given $\pi$,
as before let $\ell = |\{ j \in \{1,2,\dots,m\}: \pi \in S_j \}|$.
By conditional probability,
\begin{eqnarray*}
\Pr[\textrm{$\pi$ picked in step 1}] &=& \sum_{j \in [m] ~s.t.~ \pi \in S_j}
\Pr[\textrm{Step i picks clause $j$}]\frac{1}{|S_j|} \\
& = &  \sum_{j \in [m] ~s.t.~ \pi \in S_j} \frac{|S_j|}{\sum |S_j|} \cdot \frac{1}{S_j}\\
& = & \sum_{j \in [m] ~s.t.~ \pi \in S_j} (\sum |S_j|)^{-1} \\
& = & \frac{\ell}{\sum|S_j|} .
\end{eqnarray*}
So the probability that this loop iteration actually outputs $\pi$
is $\frac{1}{\ell}\frac{\ell}{\sum|S_j|} = \frac{1}{\sum |S_j|}$, which is
independent of $\pi$.
\end{proofof}

\begin{claim}\label{claim2}
The number of loops needed to choose $\pi$ satisfies
$$E[ \textrm{\# loops until OUTPUT}] \le m.$$
\end{claim}

\begin{proofof}{Claim \ref{claim2}}
For each $\pi$ examined, we have $\ell \le m$, giving $1/\ell \ge 1/m$.
A coin with bias $p$ has $1/p$ expected runs until it outputs ``Heads'', so 
$$E[\textrm{\# loops}] = 1/bias \le m.$$
\end{proofof}

\section{P-relations}

\begin{definition}
Let $R$ be a binary relation $R \subset \{0,1\}^* \times \{0,1 \}^* $ 
on strings. We say $R$ is a \emph{P-relation} if 
\begin{enumerate}
\item For each $(x,y) \in R$, we have $|y| = O(poly(|x|))$.
\item There exists a polynomial time procedure for deciding if $(x,y) \in R$. 
\end{enumerate}
\end{definition}

For example, consider $R_{SAT} = \{(x,y)\mid \textrm{$x$ a boolean
formula, $y$ a satisfying assignment of $x$} \}$.

\begin{claim}
We have $L \in NP$ if and only if there exists a P-relation $R$ such that
$x \in L$ holds if and only if there exists $y$ with $(x,y) \in R$.
\end{claim}

The (trivial) proof of this fact comes next time. Note that $y$
can be thought of as ``corroborating'' whether $x\in L$.

\end{document}
