Base64 エンコード / デコード

エンコード。3バイト分のデータを4文字に変換。dataはバイト列。forの真ん中がlength-2なのは3バイトであまるものをはじくため。3バイトずつ処理して、データのお尻で余ったもの処理を入れた。

    public String encode(byte[] data) {
        char[] charBuffer = new char[(data.length + 2) / 3 * 4];        
        int position = 0;
        int _3byte = 0;
        for (int i=0; i<data.length-2; i+=3) {
            _3byte = ((data[i] & 0xFF) << 16) + ((data[i+1] & 0xFF) << 8) + (data[i+2] & 0xFF);
            charBuffer[position++] = ENCODE64[_3byte >> 18];
            charBuffer[position++] = ENCODE64[(_3byte >> 12) & 0x3F];
            charBuffer[position++] = ENCODE64[(_3byte >>  6) & 0x3F];
            charBuffer[position++] = ENCODE64[_3byte & 0x3F];
        }
        switch(data.length % 3) {
        case 1: // [111111][11 0000][0000 00][000000]
            _3byte = ((data[data.length-1] & 0xFF) << 16);
            charBuffer[position++] = ENCODE64[_3byte >> 18];
            charBuffer[position++] = ENCODE64[(_3byte >> 12) & 0x3F];
            charBuffer[position++] = PAD;
            charBuffer[position++] = PAD;
            break;
        case 2: // [111111][11 1111][1111 00][000000]
            _3byte = ((data[data.length-2] & 0xFF) << 16) + ((data[data.length-1] & 0xFF) << 8);
            charBuffer[position++] = ENCODE64[_3byte >> 18];
            charBuffer[position++] = ENCODE64[(_3byte >> 12) & 0x3F];
            charBuffer[position++] = ENCODE64[(_3byte >>  6) & 0x3F];
            charBuffer[position++] = PAD;
            break;
        }
        return new String(charBuffer);
    }

ENCODE64はバイト→文字の変換テーブル。

    private static final char[] ENCODE64 = {
        'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
        'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
        'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
        'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
    };
    private static char PAD = '=';

逆のデコード。

    public byte[] decode(String data) {
        int length = data.length();
        int lastLoopIndex = length;
        int byteLength = length / 4 * 3;
        if (data.charAt(length-1) == PAD) { // 末尾が = のパターン
            length--; byteLength--;
            if (data.charAt(length-1) == PAD) { // 末尾が == のパターン
                length--; byteLength--;
            }
            lastLoopIndex = length-4; // 最後のループで端数('='分)が入ってしまわないようにする 
        }
        byte[] byteBuffer = new byte[byteLength];
        int position = 0;
        int _3byte = 0;
        for (int i=0; i<lastLoopIndex; i+=4) {
            _3byte = (DECODE64[data.charAt(i)] << 18) 
                + (DECODE64[data.charAt(i+1)] << 12) 
                + (DECODE64[data.charAt(i+2)] << 6) + DECODE64[data.charAt(i+3)];
            byteBuffer[position++] = (byte)((_3byte >> 16) & 0xFF);
            byteBuffer[position++] = (byte)((_3byte >>  8) & 0xFF);
            byteBuffer[position++] = (byte)(_3byte & 0xFF);
        }
        switch (byteLength%3) {
        case 1: // [111111][11 0000][0000 00][000000]
            _3byte = (DECODE64[data.charAt(length-2)] << 18) 
                + (DECODE64[data.charAt(length-1)] << 12);
            byteBuffer[position++] = (byte)((_3byte >> 16) & 0xFF);
            break;
        case 2: // [111111][11 1111][1111 00][000000]
            _3byte = (DECODE64[data.charAt(length-3)] << 18) 
                + (DECODE64[data.charAt(length-2)] << 12) 
                + (DECODE64[data.charAt(length-1)] << 6);
            byteBuffer[position++] = (byte)((_3byte >> 16) & 0xFF);
            byteBuffer[position++] = (byte)((_3byte >>  8) & 0xFF);
            break;
        }
        return byteBuffer;
    }

DECODE64はバイト→文字の変換テーブル。無駄が多いけど単純に。

    private static final int _xFF = 0xFF;
    private static int[] DECODE64 = {
        _xFF, _xFF, _xFF, _xFF, _xFF, _xFF, _xFF, _xFF, _xFF, _xFF, // 0-9 
        ...
        _xFF, _xFF, _xFF, 0x3E, _xFF, _xFF, _xFF, 0x3F, 0x34, 0x35,
        0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, _xFF, _xFF, // 50-59
        _xFF, _xFF, _xFF, _xFF, _xFF, 0x00, 0x01, 0x02, 0x03, 0x04,
        0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E,
        0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
        0x19, _xFF, _xFF, _xFF, _xFF, _xFF, _xFF, 0x1A, 0x1B, 0x1C,
        0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, // 100-109
        0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30,
        0x31, 0x32, 0x33 // 120-122
    };


これ(coder)と、Commons CodecのBase64(base64)と下みたいな感じに比較してみる。

        String s = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
        long t = System.currentTimeMillis();
        for (int i=0; i<1000000; i++) {
            coder.decode(s);
        }
        System.out.println("corder.decode : " + (System.currentTimeMillis()-t));
        
        t = System.currentTimeMillis();
        byte[] b = s.getBytes();
        for (int i=0; i<1000000; i++) {
            base64.decode(b);
        }
        System.out.println("base64.decode : " + (System.currentTimeMillis()-t));

配列をごそっととる部分だけ修正しないと。