web-dev-qa-db-ja.com

Java BASE64 utf8文字列デコード

私はorg.Apache.commons.codec.binary.Base64を使用していますが、utf8である文字列をデコードします。時々私はbase64でエンコードされた文字列を取得しますが、デコード後は例えば^@k��@@。 base64が正しいかどうか、またはデコードされたutf8文字列が有効なutf8文字列かどうかを確認するにはどうすればよいですか?

明確にするために。私は使っています

public static String base64Decode(String str) {
    try {
        return new String(base64Decode(str.getBytes(Constants.UTF_8)), Constants.UTF_8);
    } catch (UnsupportedEncodingException e) {
         ...
    }
}

public static byte[] base64Decode(byte[] byteArray) {
    return Base64.decodeBase64(byteArray);
}
14
terry207

Stringbyte[]に、またはその逆に変換するときに、文字セットを指定する必要があります。

byte[] bytes = string.getBytes("UTF-8");
// feed bytes to Base64

そして

// get bytes from Base64
String string = new String(bytes, "UTF-8");

それ以外の場合は、プラットフォームのデフォルトのエンコーディングが使用されますが、UTF-8自体は必ずしも必要ではありません。

27
BalusC

これを試して:

var B64 = {
    alphabet: 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=',
    lookup: null,
    ie: /MSIE /.test(navigator.userAgent),
    ieo: /MSIE [67]/.test(navigator.userAgent),
    encode: function (s) {
        var buffer = B64.toUtf8(s),
            position = -1,
            len = buffer.length,
            nan1, nan2, enc = [, , , ];
        if (B64.ie) {
            var result = [];
            while (++position < len) {
                nan1 = buffer[position + 1], nan2 = buffer[position + 2];
                enc[0] = buffer[position] >> 2;
                enc[1] = ((buffer[position] & 3) << 4) | (buffer[++position] >> 4);
                if (isNaN(nan1)) enc[2] = enc[3] = 64;
                else {
                    enc[2] = ((buffer[position] & 15) << 2) | (buffer[++position] >> 6);
                    enc[3] = (isNaN(nan2)) ? 64 : buffer[position] & 63;
                }
                result.Push(B64.alphabet[enc[0]], B64.alphabet[enc[1]], B64.alphabet[enc[2]], B64.alphabet[enc[3]]);
            }
            return result.join('');
        } else {
            result = '';
            while (++position < len) {
                nan1 = buffer[position + 1], nan2 = buffer[position + 2];
                enc[0] = buffer[position] >> 2;
                enc[1] = ((buffer[position] & 3) << 4) | (buffer[++position] >> 4);
                if (isNaN(nan1)) enc[2] = enc[3] = 64;
                else {
                    enc[2] = ((buffer[position] & 15) << 2) | (buffer[++position] >> 6);
                    enc[3] = (isNaN(nan2)) ? 64 : buffer[position] & 63;
                }
                result += B64.alphabet[enc[0]] + B64.alphabet[enc[1]] + B64.alphabet[enc[2]] + B64.alphabet[enc[3]];
            }
            return result;
        }
    },
    decode: function (s) {
        var buffer = B64.fromUtf8(s),
            position = 0,
            len = buffer.length;
        if (B64.ieo) {
            result = [];
            while (position < len) {
                if (buffer[position] < 128) result.Push(String.fromCharCode(buffer[position++]));
                else if (buffer[position] > 191 && buffer[position] < 224) result.Push(String.fromCharCode(((buffer[position++] & 31) << 6) | (buffer[position++] & 63)));
                else result.Push(String.fromCharCode(((buffer[position++] & 15) << 12) | ((buffer[position++] & 63) << 6) | (buffer[position++] & 63)));
            }
            return result.join('');
        } else {
            result = '';
            while (position < len) {
                if (buffer[position] < 128) result += String.fromCharCode(buffer[position++]);
                else if (buffer[position] > 191 && buffer[position] < 224) result += String.fromCharCode(((buffer[position++] & 31) << 6) | (buffer[position++] & 63));
                else result += String.fromCharCode(((buffer[position++] & 15) << 12) | ((buffer[position++] & 63) << 6) | (buffer[position++] & 63));
            }
            return result;
        }
    },
    toUtf8: function (s) {
        var position = -1,
            len = s.length,
            chr, buffer = [];
        if (/^[\x00-\x7f]*$/.test(s)) while (++position < len)
        buffer.Push(s.charCodeAt(position));
        else while (++position < len) {
            chr = s.charCodeAt(position);
            if (chr < 128) buffer.Push(chr);
            else if (chr < 2048) buffer.Push((chr >> 6) | 192, (chr & 63) | 128);
            else buffer.Push((chr >> 12) | 224, ((chr >> 6) & 63) | 128, (chr & 63) | 128);
        }
        return buffer;
    },
    fromUtf8: function (s) {
        var position = -1,
            len, buffer = [],
            enc = [, , , ];
        if (!B64.lookup) {
            len = B64.alphabet.length;
            B64.lookup = {};
            while (++position < len)
            B64.lookup[B64.alphabet[position]] = position;
            position = -1;
        }
        len = s.length;
        while (position < len) {
            enc[0] = B64.lookup[s.charAt(++position)];
            enc[1] = B64.lookup[s.charAt(++position)];
            buffer.Push((enc[0] << 2) | (enc[1] >> 4));
            enc[2] = B64.lookup[s.charAt(++position)];
            if (enc[2] == 64) break;
            buffer.Push(((enc[1] & 15) << 4) | (enc[2] >> 2));
            enc[3] = B64.lookup[s.charAt(++position)];
            if (enc[3] == 64) break;
            buffer.Push(((enc[2] & 3) << 6) | enc[3]);
        }
        return buffer;
    }
};

表示 ここ

2
atiruz