如何自制 json 解析器

这个文章缘起自同事兼朋友的一个技术分享,他介绍了 json 的 parse 和 stringify 的实现等等具体知识,我主要是学习他的代码和编程思路。同时,我也对比了 douglas crockford 的实现,他是 json 格式的发明者和布道者,他的实现代码也非常的酷,非常值得学习。

作为 web 开发者,我们把 json 当成了水和空气,没认认真真的研究她,今天我们就来拨开她的面纱看看她的实现原理吧。

同事的代码

    class JsonParser {
      constructor() {
        this.index = 0;
        this.json = "";
      }
      parse(json) {
        this.index = 0;
        this.json = json.trim();
        return this.parseJson();
      }
      stringify(obj) {
        if (Object.prototype.toString.call(obj) === "[object Undefined]") {
          return undefined;
        }
        obj = this.excludeUndefined(obj);
        return this.stringifyJson(obj);
      }
      excludeUndefined(obj) {
        for (let key of Object.keys(obj)) {
          if (Object.prototype.toString.call(obj[key]) === "[object Undefined]") {
            delete obj[key];
          }
        }
        return obj;
      }
      stringifyJson(obj) {
        let s = "";
        switch (Object.prototype.toString.call(obj)) {
          case "[object String]":
            s += '"' + obj + '"';
            break;
          case "[object Object]":
            s += "{";
            for (const key of Object.keys(obj)) {
              if (obj.hasOwnProperty(key)) {
                if (s[s.length - 1] !== "{") {
                  s += ",";
                }
                s += '"' + key + '":';
                s += this.stringifyJson(obj[key]);
              }
            }
            s += "}";
            break;
          case "[object Array]":
            s += "[";
            for (let element of obj) {
              if (s[s.length - 1] !== "[") {
                s += ",";
              }
              s += this.stringifyJson(element);
            }
            s += "]";
            break;
          case "[object Number]":
          case "[object Boolean]":
          case "[object Null]":
          default:
            s += obj;
        }
        return s;
      }
      ignore32NonPrintingCharacters() {
        while (this.index < this.json.length) {
          if (this.json[this.index] <= " ") {
            this.index++;
          } else {
            break;
          }
        }
      }
      parseJson() {
        this.ignore32NonPrintingCharacters();
        switch (this.json[this.index]) {
          case "{":
            return this.parseObject();
          case "[":
            return this.parseArray();
          case "n":
            return this.parseNull();
          case "t":
            return this.parseTrue();
          case "f":
            return this.parseFalse();
          case '"':
          case "'":
            return this.parseString();
          default:
            return this.parseNumber();
        }
      }
      parseTrue() {
        if (this.json.substring(this.index, this.index + 4) === "true") {
          this.index += 4;
          return true;
        }else {
          throw new Error("illegal json string, while parsing true");
        }
      }
      parseFalse() {
        if (this.json.substring(this.index, this.index + 5) === "false") {
          this.index += 5;
          return false;
        }
        else {
          throw new Error("illegal json string, while parsing false");
        }
      }
      parseNull() {
        if (this.json.substring(this.index, this.index + 4) === "null") {
          this.index += 4;
          return null;
        }else {
          throw new Error("illegal json string, while parsing null");
        }
      }
      // 对unicode字符进行转码
      replacer(match, group, index, all) {
        console.log(match, group, index, all);
        return String.fromCodePoint(+("0x" + group));
      }
      parseString() {
        // 字符串开头是单引号或者双引号
        let quotationMark = this.json[this.index];
        this.index++;
        let begin = this.index;
        for (; this.index < this.json.length && this.json[this.index] !== quotationMark; this.index++) {
          if (this.json[this.index] === "\\") {
            if (this.json[this.index + 1] === "u") {
              this.index += 5;
            }
            else {
              this.index++;
            }
          }
        }
        if (this.json[this.index] !== quotationMark) {
          throw new Error("illegal json string, while parsing string");
        }
        let str = this.json.substring(begin, this.index++);
        return str.replace(/\\u(\w{4})/g, this.replacer);
      }
      parseNumber() {
        this.ignore32NonPrintingCharacters();
        let begin = this.index;
        while (this.index < this.json.length &&
          this.isNumberChar(this.json[this.index])) {
          this.index++;
        }
        return +this.json.substring(begin, this.index);
      }
      isNumberChar(c) {
        return JsonParser.numberChars.has(c) || (c <= "9" && c >= "0");
      }
      parseObject() {
        this.index++;
        this.ignore32NonPrintingCharacters();
        let dict = {};
        while (this.index < this.json.length && this.json[this.index] !== "}") {
          let key = this.parseString();
          this.ignore32NonPrintingCharacters();
          if (this.json[this.index++] !== ":") {
            throw new Error("illegal json string, while parsing :");
          }
          let value = this.parseJson();
          dict[key] = value;
          this.ignore32NonPrintingCharacters();
          if (this.json[this.index] === ",") {
            this.index++;
          }
          this.ignore32NonPrintingCharacters();
        }
        if (this.json[this.index++] !== "}") {
          throw new Error("illegal json string, while parsing object");
        }
        return dict;
      }
      parseArray() {
        this.index++;
        this.ignore32NonPrintingCharacters();
        let arrayList = new Array();
        while (this.index < this.json.length && this.json[this.index] !== "]") {
          arrayList.push(this.parseJson());
          if (this.json[this.index] === ",") {
            this.index++;
          }
          this.ignore32NonPrintingCharacters();
        }
        if (this.json[this.index++] !== "]") {
          throw new Error("illegal json string, while parsing array");
        }
        return arrayList;
      }
    }

    JsonParser.numberChars = new Set(["-", "+", "e", "E", "."]);

老道的代码

(function () {
      "use strict";

      var rx_one = /^[\],:{}\s]*$/;
      var rx_two = /\\(?:["\\\/bfnrt]|u[0-9a-fA-F]{4})/g;
      var rx_three = /"[^"\\\n\r]*"|true|false|null|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?/g;
      var rx_four = /(?:^|:|,)(?:\s*\[)+/g;
      var rx_escapable = /[\\"\u0000-\u001f\u007f-\u009f\u00ad\u0600-\u0604\u070f\u17b4\u17b5\u200c-\u200f\u2028-\u202f\u2060-\u206f\ufeff\ufff0-\uffff]/g;
      var rx_dangerous = /[\u0000\u00ad\u0600-\u0604\u070f\u17b4\u17b5\u200c-\u200f\u2028-\u202f\u2060-\u206f\ufeff\ufff0-\uffff]/g;

      function f(n) {
        // Format integers to have at least two digits.
        return (n < 10)
          ? "0" + n
          : n;
      }

      function this_value() {
        return this.valueOf();
      }

      if (typeof Date.prototype.toJSON !== "function") {

        Date.prototype.toJSON = function () {

          return isFinite(this.valueOf())
            ? (
              this.getUTCFullYear()
              + "-"
              + f(this.getUTCMonth() + 1)
              + "-"
              + f(this.getUTCDate())
              + "T"
              + f(this.getUTCHours())
              + ":"
              + f(this.getUTCMinutes())
              + ":"
              + f(this.getUTCSeconds())
              + "Z"
            )
            : null;
        };

        Boolean.prototype.toJSON = this_value;
        Number.prototype.toJSON = this_value;
        String.prototype.toJSON = this_value;
      }

      var gap;
      var indent;
      var meta;
      var rep;


      function quote(string) {

        // If the string contains no control characters, no quote characters, and no
        // backslash characters, then we can safely slap some quotes around it.
        // Otherwise we must also replace the offending characters with safe escape
        // sequences.

        rx_escapable.lastIndex = 0;
        return rx_escapable.test(string)
          ? "\"" + string.replace(rx_escapable, function (a) {
            var c = meta[a];
            return typeof c === "string"
              ? c
              : "\\u" + ("0000" + a.charCodeAt(0).toString(16)).slice(-4);
          }) + "\""
          : "\"" + string + "\"";
      }


      function str(key, holder) {

        // Produce a string from holder[key].

        var i;          // The loop counter.
        var k;          // The member key.
        var v;          // The member value.
        var length;
        var mind = gap;
        var partial;
        var value = holder[key];

        // If the value has a toJSON method, call it to obtain a replacement value.

        if (
          value
          && typeof value === "object"
          && typeof value.toJSON === "function"
        ) {
          value = value.toJSON(key);
        }

        // If we were called with a replacer function, then call the replacer to
        // obtain a replacement value.

        if (typeof rep === "function") {
          value = rep.call(holder, key, value);
        }

        // What happens next depends on the value's type.

        switch (typeof value) {
          case "string":
            return quote(value);

          case "number":

            // JSON numbers must be finite. Encode non-finite numbers as null.

            return (isFinite(value))
              ? String(value)
              : "null";

          case "boolean":
          case "null":

            // If the value is a boolean or null, convert it to a string. Note:
            // typeof null does not produce "null". The case is included here in
            // the remote chance that this gets fixed someday.

            return String(value);

          // If the type is "object", we might be dealing with an object or an array or
          // null.

          case "object":

            // Due to a specification blunder in ECMAScript, typeof null is "object",
            // so watch out for that case.

            if (!value) {
              return "null";
            }

            // Make an array to hold the partial results of stringifying this object value.

            gap += indent;
            partial = [];

            // Is the value an array?

            if (Object.prototype.toString.apply(value) === "[object Array]") {

              // The value is an array. Stringify every element. Use null as a placeholder
              // for non-JSON values.

              length = value.length;
              for (i = 0; i < length; i += 1) {
                partial[i] = str(i, value) || "null";
              }

              // Join all of the elements together, separated with commas, and wrap them in
              // brackets.

              v = partial.length === 0
                ? "[]"
                : gap
                  ? (
                    "[\n"
                    + gap
                    + partial.join(",\n" + gap)
                    + "\n"
                    + mind
                    + "]"
                  )
                  : "[" + partial.join(",") + "]";
              gap = mind;
              return v;
            }

            // If the replacer is an array, use it to select the members to be stringified.

            if (rep && typeof rep === "object") {
              length = rep.length;
              for (i = 0; i < length; i += 1) {
                if (typeof rep[i] === "string") {
                  k = rep[i];
                  v = str(k, value);
                  if (v) {
                    partial.push(quote(k) + (
                      (gap)
                        ? ": "
                        : ":"
                    ) + v);
                  }
                }
              }
            } else {

              // Otherwise, iterate through all of the keys in the object.

              for (k in value) {
                if (Object.prototype.hasOwnProperty.call(value, k)) {
                  v = str(k, value);
                  if (v) {
                    partial.push(quote(k) + (
                      (gap)
                        ? ": "
                        : ":"
                    ) + v);
                  }
                }
              }
            }

            // Join all of the member texts together, separated with commas,
            // and wrap them in braces.

            v = partial.length === 0
              ? "{}"
              : gap
                ? "{\n" + gap + partial.join(",\n" + gap) + "\n" + mind + "}"
                : "{" + partial.join(",") + "}";
            gap = mind;
            return v;
        }
      }

      // If the JSON object does not yet have a stringify method, give it one.

      if (typeof JSON.stringify !== "function") {
        meta = {    // table of character substitutions
          "\b": "\\b",
          "\t": "\\t",
          "\n": "\\n",
          "\f": "\\f",
          "\r": "\\r",
          "\"": "\\\"",
          "\\": "\\\\"
        };
        JSON.stringify = function (value, replacer, space) {

          // The stringify method takes a value and an optional replacer, and an optional
          // space parameter, and returns a JSON text. The replacer can be a function
          // that can replace values, or an array of strings that will select the keys.
          // A default replacer method can be provided. Use of the space parameter can
          // produce text that is more easily readable.

          var i;
          gap = "";
          indent = "";

          // If the space parameter is a number, make an indent string containing that
          // many spaces.

          if (typeof space === "number") {
            for (i = 0; i < space; i += 1) {
              indent += " ";
            }

            // If the space parameter is a string, it will be used as the indent string.

          } else if (typeof space === "string") {
            indent = space;
          }

          // If there is a replacer, it must be a function or an array.
          // Otherwise, throw an error.

          rep = replacer;
          if (replacer && typeof replacer !== "function" && (
            typeof replacer !== "object"
            || typeof replacer.length !== "number"
          )) {
            throw new Error("JSON.stringify");
          }

          // Make a fake root object containing our value under the key of "".
          // Return the result of stringifying the value.

          return str("", { "": value });
        };
      }


      // If the JSON object does not yet have a parse method, give it one.

      if (typeof JSON.parse2 !== "function") {
        JSON.parse2 = function (text, reviver) {

          // The parse method takes a text and an optional reviver function, and returns
          // a JavaScript value if the text is a valid JSON text.

          var j;

          function walk(holder, key) {

            // The walk method is used to recursively walk the resulting structure so
            // that modifications can be made.

            var k;
            var v;
            var value = holder[key];
            if (value && typeof value === "object") {
              for (k in value) {
                if (Object.prototype.hasOwnProperty.call(value, k)) {
                  v = walk(value, k);
                  if (v !== undefined) {
                    value[k] = v;
                  } else {
                    delete value[k];
                  }
                }
              }
            }
            return reviver.call(holder, key, value);
          }


          // Parsing happens in four stages. In the first stage, we replace certain
          // Unicode characters with escape sequences. JavaScript handles many characters
          // incorrectly, either silently deleting them, or treating them as line endings.

          text = String(text);
          rx_dangerous.lastIndex = 0;
          if (rx_dangerous.test(text)) {
            text = text.replace(rx_dangerous, function (a) {
              return (
                "\\u"
                + ("0000" + a.charCodeAt(0).toString(16)).slice(-4)
              );
            });
          }

          // In the second stage, we run the text against regular expressions that look
          // for non-JSON patterns. We are especially concerned with "()" and "new"
          // because they can cause invocation, and "=" because it can cause mutation.
          // But just to be safe, we want to reject all unexpected forms.

          // We split the second stage into 4 regexp operations in order to work around
          // crippling inefficiencies in IE's and Safari's regexp engines. First we
          // replace the JSON backslash pairs with "@" (a non-JSON character). Second, we
          // replace all simple value tokens with "]" characters. Third, we delete all
          // open brackets that follow a colon or comma or that begin the text. Finally,
          // we look to see that the remaining characters are only whitespace or "]" or
          // "," or ":" or "{" or "}". If that is so, then the text is safe for eval.

          if (
            rx_one.test(
              text
                .replace(rx_two, "@")
                .replace(rx_three, "]")
                .replace(rx_four, "")
            )
          ) {

            // In the third stage we use the eval function to compile the text into a
            // JavaScript structure. The "{" operator is subject to a syntactic ambiguity
            // in JavaScript: it can begin a block or an object literal. We wrap the text
            // in parens to eliminate the ambiguity.

            j = eval("(" + text + ")");

            // In the optional fourth stage, we recursively walk the new structure, passing
            // each name/value pair to a reviver function for possible transformation.

            return (typeof reviver === "function")
              ? walk({ "": j }, "")
              : j;
          }

          // If the text is not JSON parseable, then a SyntaxError is thrown.

          throw new SyntaxError("JSON.parse");
        };
      }
    }());

补充

文章还没有写完,我赶着去吃饭了。后续文章会更新在这里。

参考链接

作者: 曾小乱

喜欢写点有意思的东西

发表回复

您的电子邮箱地址不会被公开。 必填项已用*标注

此站点使用Akismet来减少垃圾评论。了解我们如何处理您的评论数据