001 /** 002 * File: UUCoder.java 003 * Author: Bryan Parno 004 * Email: parno@fas.harvard.edu 005 * Created: 5/16/03 006 * Purpose: Converts binary data into ASCII representation 007 * Updated 12/9/03 to convert short byte arrays 008 */ 009 package edu.harvard.deas.hyperenc.util; 010 011 012 /** 013 * Encodes and decodes binary data into ASCII using a similar, but not 014 * identical, format to UUEncoding. 015 * <p> 016 * Bytes are translated into ASCII characters in the same way as UUEncoding: 017 * groups of three 8-bit bytes are realigned into sets of four 6-bit groups. 018 * Each 6-bit group is then converted to an ASCII character by adding 32, with 019 * the exception of 00: a zero is represented by the backtick (`, ASCII 96). 020 * Thus the valid characters, spanning from ASCII 33 to ASCII 96, are:<br> 021 * <blockquote> 022 * 023 * <pre> 024 * !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` 025 * </pre> 026 * </blockquote> 027 * 028 * If the message to be encoded is not a multiple of 3 bytes, it is padded with 029 * zeros so that it is. 030 * <p> 031 * Messages are not delineated as in standard UUEncoding. The first line of a 032 * message is a decimal number indicating how many bytes are encoded by the 033 * message. This is followed by the encoded data on as many lines as needed. 034 * Unlike standard UUEncoding, the length of each line is not indicated in any 035 * way. The only requirement is that each line contain a multiple of 4 036 * characters, not including the trailing newline character(s). Finally, the 037 * message ends with a line consisting of only the token "<code>TERMEND</code>". 038 */ 039 // XXX it would be better if this did standard UUEncoding... 040 public class UUCoder 041 { 042 /** 043 * Number of characters to place on each line when encoding. Must be a 044 * multiple of 4. 045 */ 046 private final static int CHARS_PER_LINE = 48; 047 048 /** 049 * Token that indicates the end of a message. 050 */ 051 public final static String TERMINATOR = "TERMEND"; 052 053 /** 054 * @return a String containing all the legal characters that may appear in an 055 * encoding (excluding newline characters) 056 */ 057 public static String getLegalChars() { 058 String s = ""; 059 for (int i = 33; i < 97; i++) { 060 s += (char)i; 061 } 062 return s; 063 } 064 065 /** 066 * Encodes the given data. The format of the encoding is described in the 067 * class description. 068 * 069 * @param data 070 * the data to be encoded 071 * @return a string containing the encoding of <code>data</code>. The string 072 * contains only ASCII characters from 33 to 96. 073 */ 074 public static String encode(byte[] data) { 075 int numChars = 0; 076 String result = new String(""); 077 078 // Place the unencoded message length (in bytes) on the first line 079 result = result + data.length + "\n"; 080 081 // Encode in units of 3 bytes = 24 bits = 4 sets of 6 bits 082 for (int i = 0; i < (int) Math.ceil(data.length / 3.0); i++) { 083 byte a=0,b=0,c=0; 084 085 a = data[i * 3]; 086 // Pad with zero bytes so the input length is a multiple of 3 087 if (i*3 + 1 >= data.length) { 088 b = (byte) 0x00; 089 c = (byte) 0x00; 090 } else if (i*3 + 2 >= data.length) { 091 b = data[i * 3 + 1]; 092 c = (byte) 0x00; 093 } else { 094 b = data[i * 3 + 1]; 095 c = data[i * 3 + 2]; 096 } 097 098 char c1, c2, c3, c4; 099 // Take top 6 bits of 'a' 100 c1 = (char) (unsignedRightShift(a, 2) + 32); 101 // Take bottom 2 bits of 'a' and upper 4 bits of 'b' 102 c2 = (char) ((unsignedRightShift((byte)(a << 6), 2) 103 | unsignedRightShift(b, 4)) + 32); 104 // Take lower 4 bits of b and upper 2 bits of 'c' 105 c3 = (char) ((unsignedRightShift((byte)(b << 4), 2) 106 | unsignedRightShift(c, 6)) + 32); 107 // Take lower 6 bits of 'c' 108 c4 = (char) (unsignedRightShift((byte)(c << 2), 2) 109 + 32); 110 111 // Replace spaces (ASCII 32) with backticks (`, ASCII 96). 112 if (c1 == 32) 113 c1 = 96; 114 if (c2 == 32) 115 c2 = 96; 116 if (c3 == 32) 117 c3 = 96; 118 if (c4 == 32) 119 c4 = 96; 120 121 result = result + c1 + c2 + c3 + c4; 122 numChars += 4; 123 124 if (numChars >= CHARS_PER_LINE) { 125 numChars = 0; 126 result = addLineSuffix(result); 127 } 128 } 129 130 result = addTerminator(result); 131 132 return result; 133 } 134 135 /** Encode a very short block of bytes - Expansion factor is 4 characters for every 3 bytes 136 @param data Byte data to be encoded 137 public static String shortEncode(byte[] data) { 138 String result = new String(""); 139 140 // Encode in units of 3 bytes = 24 bits = 4 sets of 6 bits 141 for (int i = 0; i < (int) Math.ceil((double) (data.length / 3.0)); i++) { 142 byte a=0,b=0,c=0; 143 144 a = data[i * 3]; 145 if (i*3 + 1 >= data.length) { 146 b = (byte) 0xff; 147 c = (byte) 0xff; 148 } else if (i*3 + 2 >= data.length) { 149 b = data[i * 3 + 1]; 150 c = (byte) 0xff; 151 } else { 152 b = data[i * 3 + 1]; 153 c = data[i * 3 + 2]; 154 } 155 156 char c1, c2, c3, c4; 157 158 // Take top 6 bits of 'a' 159 c1 = (char) (unsignedRightShift(a, 2) + UUCoder.ASCII_SPACE); 160 161 // Take bottom 2 bits of 'a' and upper 4 bits of 'b' 162 c2 = (char) ((unsignedRightShift((byte)(a << 6), 2) 163 | unsignedRightShift(b, 4)) + UUCoder.ASCII_SPACE); 164 165 // Take lower 4 bits of b and upper 2 bits of 'c' 166 c3 = (char) ((unsignedRightShift((byte)(b << 4), 2) 167 | unsignedRightShift(c, 6)) + UUCoder.ASCII_SPACE); 168 169 // Take lower 6 bits of 'c' 170 c4 = (char) (unsignedRightShift((byte)(c << 2), 2) 171 + UUCoder.ASCII_SPACE); 172 173 result = result + c1 + c2 + c3 + c4; 174 } 175 176 return result; 177 } 178 179 /** Decodes a string that resulted from encoding a short array of bytes 180 @param data The encoded string 181 @param length The expected length of the byte array. Must be the exact 182 same length as the array encoded by shortEncode 183 public byte[] shortDecode(String data, int length) { 184 String line = data; 185 186 int numBytes = length; 187 188 byte[] bytes = new byte[numBytes]; 189 int count = 0; 190 191 if (line.length() % 4 != 0) { 192 System.out.println("Encoding error! Line is: " + line.length() + 193 " chars long. The line is:" + line + "and the offending char is" 194 + line.charAt(line.length()-1) + " and in decimal, that's" 195 + (int)line.charAt(line.length()-1)); 196 System.exit(5); 197 } 198 199 for (int i=0; i < line.length() / 4; i++) { 200 char c1, c2, c3, c4; 201 202 c1 = line.charAt(i*4); 203 c2 = line.charAt(i*4 + 1); 204 c3 = line.charAt(i*4 + 2); 205 c4 = line.charAt(i*4 + 3); 206 207 byte a, b, c; 208 209 // Grab upper 6 bits from c1 210 a =(byte)(((byte) (c1 - UUCoder.ASCII_SPACE)) << 2); 211 // Grab lower 2 bits from c2 212 a = (byte) (a | this.unsignedRightShift( 213 (byte)(c2 - UUCoder.ASCII_SPACE), 4)); 214 215 // Grab upper 4 bits from c2 216 b = (byte) (((byte)(c2 - UUCoder.ASCII_SPACE)) << 4); 217 // Grab lower 4 bits from c3 218 b = (byte) (b | this.unsignedRightShift( 219 (byte)(c3 - UUCoder.ASCII_SPACE), 2)); 220 221 // Grab upper 2 bits from c3 222 c = (byte) (((byte)(c3 - UUCoder.ASCII_SPACE)) << 6); 223 // Grab lower 6 bits from c4 224 c = (byte) (c | ((byte)(c4 - UUCoder.ASCII_SPACE))); 225 226 bytes[count++] = a; 227 if (count >= numBytes) {break;} 228 bytes[count++] = b; 229 if (count >= numBytes) {break;} 230 bytes[count++] = c; 231 if (count >= numBytes) {break;} 232 } 233 234 return bytes; 235 }*/ 236 237 238 239 240 private static String addLineSuffix(String data) { 241 return data + "\n"; 242 } 243 244 private static String addTerminator(String data) { 245 return data + "\n" + UUCoder.TERMINATOR; 246 } 247 248 /** 249 * Decodes the given string. The format of the encoding is described in the 250 * class description. 251 * 252 * @param data 253 * the data to be decoded 254 * @throws IllegalArgumentException 255 * if <code>data</code> is not a valid encoding 256 * @return a byte array containing the data encoded by <code>data</code> 257 */ 258 public static byte[] decode(String data) { 259 // \r\f = carriage-return and form-feed respectively 260 String[] lines = data.split("[\n\r\f]"); 261 int numBytes = Integer.parseInt(lines[0].trim()); 262 263 byte[] bytes = new byte[numBytes]; 264 int byteCount = 0; 265 266 // Start at 1 to skip the first line (processed above) 267 for (int linecount = 1; linecount < lines.length; linecount++) { 268 String line = lines[linecount]; 269 270 if (line.equals(UUCoder.TERMINATOR)) { 271 break; 272 } 273 274 if (line.length() % 4 != 0) { 275 throw new IllegalArgumentException("Line " + linecount 276 + " has illegal length of " + line.length() + " characters."); 277 } 278 279 for (int i=0; i < line.length() / 4; i++) { 280 char c1, c2, c3, c4; 281 282 c1 = line.charAt(i*4); 283 c2 = line.charAt(i*4 + 1); 284 c3 = line.charAt(i*4 + 2); 285 c4 = line.charAt(i*4 + 3); 286 287 if ( c1 < 33 || c2 < 33 || c3 < 33 || c4 < 33 || 288 c1 > 96 || c2 > 96 || c3 > 96 || c4 > 96 ) { 289 throw new IllegalArgumentException("Illegal character on line " 290 + linecount + " in group " + i + ": " + line.substring(i*4, i*4+4)); 291 } 292 293 // Replace backticks (`, ASCII 96) with spaces (ASCII 32) 294 if (c1 == 96) 295 c1 = 32; 296 if (c2 == 96) 297 c2 = 32; 298 if (c3 == 96) 299 c3 = 32; 300 if (c4 == 96) 301 c4 = 32; 302 303 byte a, b, c; 304 305 // Grab upper 6 bits from c1 306 a =(byte)(((byte) (c1 - 32)) << 2); 307 // Grab lower 2 bits from c2 308 a = (byte) (a | unsignedRightShift( 309 (byte)(c2 - 32), 4)); 310 311 // Grab upper 4 bits from c2 312 b = (byte) (((byte)(c2 - 32)) << 4); 313 // Grab lower 4 bits from c3 314 b = (byte) (b | unsignedRightShift( 315 (byte)(c3 - 32), 2)); 316 317 // Grab upper 2 bits from c3 318 c = (byte) (((byte)(c3 - 32)) << 6); 319 // Grab lower 6 bits from c4 320 c = (byte) (c | ((byte)(c4 - 32))); 321 322 // If we're about to overrun the bytes array, throw an error because 323 // there are too many characters here 324 if (byteCount >= numBytes) { 325 throw new IllegalArgumentException( 326 "Encoded message is too long. (Message was declared to be " 327 + numBytes + " bytes long.)"); 328 } 329 330 // Terminate early if we hit the end of the message, but not before 331 // checking that the remainder of the data is in fact zero 332 bytes[byteCount++] = a; 333 if (byteCount >= numBytes) { 334 if (b != 0 || c != 0) { 335 throw new IllegalArgumentException( 336 "Reached end of message, but pad bytes were non-zero (byte 2 = " 337 + b + ", byte 3 = " + c + ")."); 338 } 339 break; 340 } 341 342 bytes[byteCount++] = b; 343 if (byteCount >= numBytes) { 344 if (c != 0) { 345 throw new IllegalArgumentException( 346 "Reached end of message, but pad byte was non-zero (byte 3 = " 347 + c + ")."); 348 } 349 break; 350 } 351 352 bytes[byteCount++] = c; 353 if (byteCount >= numBytes) { 354 break; 355 } 356 } 357 } 358 359 return bytes; 360 } 361 362 /** 363 * Performs an unsigned right shift on bytes. Necessary because Java's 364 * <code>>>></code> only operates on 32-bit values: specifically, 365 * <code>(byte)((byte)0xff >>> 4)</code> yields <code>0xff</code> = -1, 366 * because <code>0xff</code> is sign-extended when it is converted to an 367 * <code>int</code> for use with <code>>>></code>. 368 */ 369 private static byte unsignedRightShift(byte b, int num) { 370 if (num == 0) { 371 return b; 372 } 373 374 if ((b & ((byte) 1 << 7)) != 0) { // High-order bit is 1 375 // First do the right shift 376 b = (byte) (b >>> num); 377 378 byte temp = (byte) 128; 379 // Use sign extension to fill in top bits with 1's 380 temp = (byte)(temp >> (num - 1)); 381 temp = (byte) ~temp; // Invert to get 0's on top 382 383 b = (byte) (b & temp); // Now clear out the top bits 384 385 return b; 386 } else { // High-order bit is 0, so we can do a normal right shift 387 return (byte) (b >>> num); 388 } 389 } 390 }