@Article{HRS11,
  author = 	 { Michael J. Higgins and Ronald L. Rivest and Philip B. Stark },
  title = 	 { Sharper $p$-values for stratified election audits },
  doi =          { 10.2202/2151-7509.1031 },                  
  url =          { http://www.bepress.com/spp/vol2/iss1/7 },                  
  OPTpages = 	 { },
  journal = 	 { Statistics, Politics, and Policy },
  date =         { 2011 },                  
  OPTyear = 	 { 2011 },
  volume = 	 { 2 },
  number = 	 { 1, Article 7 },
  abstract =     { Vote-tabulation audits can be used to collect
                  evidence that the set of winners of an election (the
                  outcome) according to the machine count is correct---that
                  it agrees with the outcome that a full hand
                  count of the audit trail would show.  The strength of
                  evidence is measured by the $p$-value of the
                  hypothesis that the machine outcome is
                  wrong.  Smaller $p$-values are stronger evidence that
                  the outcome is correct.  
                  \par
                  Most states that have
                  election audits of any kind require audit samples
                  stratified by county for contests that cross county
                  lines.  Previous work on $p$-values for stratified
                  samples based on the largest weighted overstatement
                  of the margin used upper bounds that can be quite
                  weak. Sharper $p$-values can be found by solving a 0-1
                  knapsack problem. For example, the 2006 U.S. Senate
                  race in Minnesota was audited using a stratified
                  sample of 2-8 precincts from each of 87 counties,
                  202 precincts in all.  Earlier work (Stark 2008b)
                  found that the $p$-value was no larger than 0.042. We
                  show that it is no larger than 0.016: much stronger
                  evidence that the machine outcome was correct.  
                  \par 
                  We also give algorithms for choosing how many batches
                  to draw from each stratum to reduce the counting
                  burden. In the 2006 Minnesota race, a stratified
                  sample about half as large---109 precincts versus
                  202---would have given just as small a $p$-value if
                  the observed maximum overstatement were the
                  same. This would require drawing 11 precincts
                  instead of 8 from the largest county, and 1 instead
                  of 2 from the smallest counties. We give analogous
                  results for the 2008 U.S. House of Representatives
                  contests in California.  
                 },
}