javascriptgoogle-closure-compiler

Google Closure Compiler inlines a repeatedly used private property - a flaw or am I missing something?


I found a JavaScript base64 encoder/decoder some time ago on StackOverflow. It looks something like this:

var Base64 = {

    // private property
    _keyStr : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=",

    // private method for UTF-8 encoding
    _utf8_encode : function (string) {
        string = string.replace(/\r\n/g,"\n");
        var utftext = "";

        for (var n = 0; n < string.length; n++) {

            var c = string.charCodeAt(n);

            if (c < 128) {
                utftext += String.fromCharCode(c);
            }
            else if((c > 127) && (c < 2048)) {
                utftext += String.fromCharCode((c >> 6) | 192);
                utftext += String.fromCharCode((c & 63) | 128);
            }
            else {
                utftext += String.fromCharCode((c >> 12) | 224);
                utftext += String.fromCharCode(((c >> 6) & 63) | 128);
                utftext += String.fromCharCode((c & 63) | 128);
            }

        }

        return utftext;
    },

    // private method for UTF-8 decoding
    _utf8_decode : function (utftext) {
        var string = "";
        var i = 0;
        var c = c1 = c2 = 0;

        while ( i < utftext.length ) {

            c = utftext.charCodeAt(i);

            if (c < 128) {
                string += String.fromCharCode(c);
                i++;
            }
            else if((c > 191) && (c < 224)) {
                c2 = utftext.charCodeAt(i+1);
                string += String.fromCharCode(((c & 31) << 6) | (c2 & 63));
                i += 2;
            }
            else {
                c2 = utftext.charCodeAt(i+1);
                c3 = utftext.charCodeAt(i+2);
                string += String.fromCharCode(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63));
                i += 3;
            }

        }

        return string;
    },

    // public method for encoding
    encode : function (input){
            var output = "";
            var chr1, chr2, chr3, enc1, enc2, enc3, enc4;
            var i = 0;

            input = this._utf8_encode(input);

            while (i < input.length) {

                chr1 = input.charCodeAt(i++);
                chr2 = input.charCodeAt(i++);
                chr3 = input.charCodeAt(i++);

                enc1 = chr1 >> 2;
                enc2 = ((chr1 & 3) << 4) | (chr2 >> 4);
                enc3 = ((chr2 & 15) << 2) | (chr3 >> 6);
                enc4 = chr3 & 63;

                if (isNaN(chr2)) {
                    enc3 = enc4 = 64;
                } else if (isNaN(chr3)) {
                    enc4 = 64;
                }

                output = output +
                this._keyStr.charAt(enc1) + this._keyStr.charAt(enc2) +
                this._keyStr.charAt(enc3) + this._keyStr.charAt(enc4);

            }

            return output;

    }

    // public method for decoding
    decode : function (input) {
            var output = "";
            var chr1, chr2, chr3;
            var enc1, enc2, enc3, enc4;
            var i = 0;

            input = input.replace(/[^A-Za-z0-9\+\/\=]/g, "");

            while (i < input.length) {

                enc1 = this._keyStr.indexOf(input.charAt(i++));
                enc2 = this._keyStr.indexOf(input.charAt(i++));
                enc3 = this._keyStr.indexOf(input.charAt(i++));
                enc4 = this._keyStr.indexOf(input.charAt(i++));

                chr1 = (enc1 << 2) | (enc2 >> 4);
                chr2 = ((enc2 & 15) << 4) | (enc3 >> 2);
                chr3 = ((enc3 & 3) << 6) | enc4;

                output = output + String.fromCharCode(chr1);

                if (enc3 != 64) {
                    output = output + String.fromCharCode(chr2);
                }
                if (enc4 != 64) {
                    output = output + String.fromCharCode(chr3);
                }

            }

            output = this._utf8_decode(output);

            return output;

    }
};

I modified it to something like this:

var Base64 = (function(){
    var _keyStr = /* ... */;
    function _utf8_encode(string) {
        /* ... */
    }
    function _utf8_decode(utftext) {
        /* ... */
    }
    function encode(input){
        /* ... */
    }
    function decode(input){
        /* ... */
    }

    return {
        "encode" : encode,
        "decode" : decode
    }

})();

Should be the same right? And I have a bonus for not opposing "private" methods and property.

Then I make it go through Google Closure Compiler's "simple optimization" (actually I used this compressor but the output is the same). To my surprise, it compiles to something like this (prettified a little bit to make it more readable):

var Base64=function(){
    return{encode:function(b){
        /* ... */
        d=d+"ABCDE...+/=".charAt(a)+/* ... */
        },decode:function(b){
        /* ... */
        a="ABCDE...+/=".indexOf(/* ... */
    }};
}();

This seems very un-compressing, because the content of _keyStr is repeated many times in those two functions, effectively making the code larger. Not to mention the closure now seems pointless.

I tried to change the minified version to this:

var Base64=function(){
    var z="ABCDE...+/=";
    return{encode:function(b){
        /* ... */
        d=d+z.charAt(a)+/* ... */
        },decode:function(b){
        /* ... */
        a=z.indexOf(/* ... */
    }};
}();

And did a few test and it seems it's working.
But I'm not sure if I accidentally broke something, because in my experience, Closure Compiler do respect "private" function, and would not "inline" a private function if it's referred multiple times.

So my question is, is it OK to add back the private property here? Is this a minor flaw in Closure Compiler or am I missing something?


Solution

  • This is covered in the Closure Compiler FAQ: https://github.com/google/closure-compiler/wiki/FAQ#closure-compiler-inlined-all-my-strings-which-made-my-code-size-bigger-why-did-it-do-that

    There are cases where inlining a string will make code size larger post-gzip, but I don't expect that will be the case here as it is unlikely to "flood" the gzip compression window.