@Article{HRS11, author = { Michael J. Higgins and Ronald L. Rivest and Philip B. Stark }, title = { Sharper $p$-values for stratified election audits }, doi = { 10.2202/2151-7509.1031 }, url = { http://www.bepress.com/spp/vol2/iss1/7 }, OPTpages = { }, journal = { Statistics, Politics, and Policy }, date = { 2011 }, OPTyear = { 2011 }, volume = { 2 }, number = { 1, Article 7 }, abstract = { Vote-tabulation audits can be used to collect evidence that the set of winners of an election (the outcome) according to the machine count is correct---that it agrees with the outcome that a full hand count of the audit trail would show. The strength of evidence is measured by the $p$-value of the hypothesis that the machine outcome is wrong. Smaller $p$-values are stronger evidence that the outcome is correct. \par Most states that have election audits of any kind require audit samples stratified by county for contests that cross county lines. Previous work on $p$-values for stratified samples based on the largest weighted overstatement of the margin used upper bounds that can be quite weak. Sharper $p$-values can be found by solving a 0-1 knapsack problem. For example, the 2006 U.S. Senate race in Minnesota was audited using a stratified sample of 2-8 precincts from each of 87 counties, 202 precincts in all. Earlier work (Stark 2008b) found that the $p$-value was no larger than 0.042. We show that it is no larger than 0.016: much stronger evidence that the machine outcome was correct. \par We also give algorithms for choosing how many batches to draw from each stratum to reduce the counting burden. In the 2006 Minnesota race, a stratified sample about half as large---109 precincts versus 202---would have given just as small a $p$-value if the observed maximum overstatement were the same. This would require drawing 11 precincts instead of 8 from the largest county, and 1 instead of 2 from the smallest counties. We give analogous results for the 2008 U.S. House of Representatives contests in California. }, }