这个文章缘起自同事兼朋友的一个技术分享,他介绍了 json 的 parse 和 stringify 的实现等等具体知识,我主要是学习他的代码和编程思路。同时,我也对比了 douglas crockford 的实现,他是 json 格式的发明者和布道者,他的实现代码也非常的酷,非常值得学习。
作为 web 开发者,我们把 json 当成了水和空气,没认认真真的研究她,今天我们就来拨开她的面纱看看她的实现原理吧。
同事的代码
class JsonParser {
constructor() {
this.index = 0;
this.json = "";
}
parse(json) {
this.index = 0;
this.json = json.trim();
return this.parseJson();
}
stringify(obj) {
if (Object.prototype.toString.call(obj) === "[object Undefined]") {
return undefined;
}
obj = this.excludeUndefined(obj);
return this.stringifyJson(obj);
}
excludeUndefined(obj) {
for (let key of Object.keys(obj)) {
if (Object.prototype.toString.call(obj[key]) === "[object Undefined]") {
delete obj[key];
}
}
return obj;
}
stringifyJson(obj) {
let s = "";
switch (Object.prototype.toString.call(obj)) {
case "[object String]":
s += '"' + obj + '"';
break;
case "[object Object]":
s += "{";
for (const key of Object.keys(obj)) {
if (obj.hasOwnProperty(key)) {
if (s[s.length - 1] !== "{") {
s += ",";
}
s += '"' + key + '":';
s += this.stringifyJson(obj[key]);
}
}
s += "}";
break;
case "[object Array]":
s += "[";
for (let element of obj) {
if (s[s.length - 1] !== "[") {
s += ",";
}
s += this.stringifyJson(element);
}
s += "]";
break;
case "[object Number]":
case "[object Boolean]":
case "[object Null]":
default:
s += obj;
}
return s;
}
ignore32NonPrintingCharacters() {
while (this.index < this.json.length) {
if (this.json[this.index] <= " ") {
this.index++;
} else {
break;
}
}
}
parseJson() {
this.ignore32NonPrintingCharacters();
switch (this.json[this.index]) {
case "{":
return this.parseObject();
case "[":
return this.parseArray();
case "n":
return this.parseNull();
case "t":
return this.parseTrue();
case "f":
return this.parseFalse();
case '"':
case "'":
return this.parseString();
default:
return this.parseNumber();
}
}
parseTrue() {
if (this.json.substring(this.index, this.index + 4) === "true") {
this.index += 4;
return true;
}else {
throw new Error("illegal json string, while parsing true");
}
}
parseFalse() {
if (this.json.substring(this.index, this.index + 5) === "false") {
this.index += 5;
return false;
}
else {
throw new Error("illegal json string, while parsing false");
}
}
parseNull() {
if (this.json.substring(this.index, this.index + 4) === "null") {
this.index += 4;
return null;
}else {
throw new Error("illegal json string, while parsing null");
}
}
// 对unicode字符进行转码
replacer(match, group, index, all) {
console.log(match, group, index, all);
return String.fromCodePoint(+("0x" + group));
}
parseString() {
// 字符串开头是单引号或者双引号
let quotationMark = this.json[this.index];
this.index++;
let begin = this.index;
for (; this.index < this.json.length && this.json[this.index] !== quotationMark; this.index++) {
if (this.json[this.index] === "\\") {
if (this.json[this.index + 1] === "u") {
this.index += 5;
}
else {
this.index++;
}
}
}
if (this.json[this.index] !== quotationMark) {
throw new Error("illegal json string, while parsing string");
}
let str = this.json.substring(begin, this.index++);
return str.replace(/\\u(\w{4})/g, this.replacer);
}
parseNumber() {
this.ignore32NonPrintingCharacters();
let begin = this.index;
while (this.index < this.json.length &&
this.isNumberChar(this.json[this.index])) {
this.index++;
}
return +this.json.substring(begin, this.index);
}
isNumberChar(c) {
return JsonParser.numberChars.has(c) || (c <= "9" && c >= "0");
}
parseObject() {
this.index++;
this.ignore32NonPrintingCharacters();
let dict = {};
while (this.index < this.json.length && this.json[this.index] !== "}") {
let key = this.parseString();
this.ignore32NonPrintingCharacters();
if (this.json[this.index++] !== ":") {
throw new Error("illegal json string, while parsing :");
}
let value = this.parseJson();
dict[key] = value;
this.ignore32NonPrintingCharacters();
if (this.json[this.index] === ",") {
this.index++;
}
this.ignore32NonPrintingCharacters();
}
if (this.json[this.index++] !== "}") {
throw new Error("illegal json string, while parsing object");
}
return dict;
}
parseArray() {
this.index++;
this.ignore32NonPrintingCharacters();
let arrayList = new Array();
while (this.index < this.json.length && this.json[this.index] !== "]") {
arrayList.push(this.parseJson());
if (this.json[this.index] === ",") {
this.index++;
}
this.ignore32NonPrintingCharacters();
}
if (this.json[this.index++] !== "]") {
throw new Error("illegal json string, while parsing array");
}
return arrayList;
}
}
JsonParser.numberChars = new Set(["-", "+", "e", "E", "."]);
老道的代码
(function () {
"use strict";
var rx_one = /^[\],:{}\s]*$/;
var rx_two = /\\(?:["\\\/bfnrt]|u[0-9a-fA-F]{4})/g;
var rx_three = /"[^"\\\n\r]*"|true|false|null|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?/g;
var rx_four = /(?:^|:|,)(?:\s*\[)+/g;
var rx_escapable = /[\\"\u0000-\u001f\u007f-\u009f\u00ad\u0600-\u0604\u070f\u17b4\u17b5\u200c-\u200f\u2028-\u202f\u2060-\u206f\ufeff\ufff0-\uffff]/g;
var rx_dangerous = /[\u0000\u00ad\u0600-\u0604\u070f\u17b4\u17b5\u200c-\u200f\u2028-\u202f\u2060-\u206f\ufeff\ufff0-\uffff]/g;
function f(n) {
// Format integers to have at least two digits.
return (n < 10)
? "0" + n
: n;
}
function this_value() {
return this.valueOf();
}
if (typeof Date.prototype.toJSON !== "function") {
Date.prototype.toJSON = function () {
return isFinite(this.valueOf())
? (
this.getUTCFullYear()
+ "-"
+ f(this.getUTCMonth() + 1)
+ "-"
+ f(this.getUTCDate())
+ "T"
+ f(this.getUTCHours())
+ ":"
+ f(this.getUTCMinutes())
+ ":"
+ f(this.getUTCSeconds())
+ "Z"
)
: null;
};
Boolean.prototype.toJSON = this_value;
Number.prototype.toJSON = this_value;
String.prototype.toJSON = this_value;
}
var gap;
var indent;
var meta;
var rep;
function quote(string) {
// If the string contains no control characters, no quote characters, and no
// backslash characters, then we can safely slap some quotes around it.
// Otherwise we must also replace the offending characters with safe escape
// sequences.
rx_escapable.lastIndex = 0;
return rx_escapable.test(string)
? "\"" + string.replace(rx_escapable, function (a) {
var c = meta[a];
return typeof c === "string"
? c
: "\\u" + ("0000" + a.charCodeAt(0).toString(16)).slice(-4);
}) + "\""
: "\"" + string + "\"";
}
function str(key, holder) {
// Produce a string from holder[key].
var i; // The loop counter.
var k; // The member key.
var v; // The member value.
var length;
var mind = gap;
var partial;
var value = holder[key];
// If the value has a toJSON method, call it to obtain a replacement value.
if (
value
&& typeof value === "object"
&& typeof value.toJSON === "function"
) {
value = value.toJSON(key);
}
// If we were called with a replacer function, then call the replacer to
// obtain a replacement value.
if (typeof rep === "function") {
value = rep.call(holder, key, value);
}
// What happens next depends on the value's type.
switch (typeof value) {
case "string":
return quote(value);
case "number":
// JSON numbers must be finite. Encode non-finite numbers as null.
return (isFinite(value))
? String(value)
: "null";
case "boolean":
case "null":
// If the value is a boolean or null, convert it to a string. Note:
// typeof null does not produce "null". The case is included here in
// the remote chance that this gets fixed someday.
return String(value);
// If the type is "object", we might be dealing with an object or an array or
// null.
case "object":
// Due to a specification blunder in ECMAScript, typeof null is "object",
// so watch out for that case.
if (!value) {
return "null";
}
// Make an array to hold the partial results of stringifying this object value.
gap += indent;
partial = [];
// Is the value an array?
if (Object.prototype.toString.apply(value) === "[object Array]") {
// The value is an array. Stringify every element. Use null as a placeholder
// for non-JSON values.
length = value.length;
for (i = 0; i < length; i += 1) {
partial[i] = str(i, value) || "null";
}
// Join all of the elements together, separated with commas, and wrap them in
// brackets.
v = partial.length === 0
? "[]"
: gap
? (
"[\n"
+ gap
+ partial.join(",\n" + gap)
+ "\n"
+ mind
+ "]"
)
: "[" + partial.join(",") + "]";
gap = mind;
return v;
}
// If the replacer is an array, use it to select the members to be stringified.
if (rep && typeof rep === "object") {
length = rep.length;
for (i = 0; i < length; i += 1) {
if (typeof rep[i] === "string") {
k = rep[i];
v = str(k, value);
if (v) {
partial.push(quote(k) + (
(gap)
? ": "
: ":"
) + v);
}
}
}
} else {
// Otherwise, iterate through all of the keys in the object.
for (k in value) {
if (Object.prototype.hasOwnProperty.call(value, k)) {
v = str(k, value);
if (v) {
partial.push(quote(k) + (
(gap)
? ": "
: ":"
) + v);
}
}
}
}
// Join all of the member texts together, separated with commas,
// and wrap them in braces.
v = partial.length === 0
? "{}"
: gap
? "{\n" + gap + partial.join(",\n" + gap) + "\n" + mind + "}"
: "{" + partial.join(",") + "}";
gap = mind;
return v;
}
}
// If the JSON object does not yet have a stringify method, give it one.
if (typeof JSON.stringify !== "function") {
meta = { // table of character substitutions
"\b": "\\b",
"\t": "\\t",
"\n": "\\n",
"\f": "\\f",
"\r": "\\r",
"\"": "\\\"",
"\\": "\\\\"
};
JSON.stringify = function (value, replacer, space) {
// The stringify method takes a value and an optional replacer, and an optional
// space parameter, and returns a JSON text. The replacer can be a function
// that can replace values, or an array of strings that will select the keys.
// A default replacer method can be provided. Use of the space parameter can
// produce text that is more easily readable.
var i;
gap = "";
indent = "";
// If the space parameter is a number, make an indent string containing that
// many spaces.
if (typeof space === "number") {
for (i = 0; i < space; i += 1) {
indent += " ";
}
// If the space parameter is a string, it will be used as the indent string.
} else if (typeof space === "string") {
indent = space;
}
// If there is a replacer, it must be a function or an array.
// Otherwise, throw an error.
rep = replacer;
if (replacer && typeof replacer !== "function" && (
typeof replacer !== "object"
|| typeof replacer.length !== "number"
)) {
throw new Error("JSON.stringify");
}
// Make a fake root object containing our value under the key of "".
// Return the result of stringifying the value.
return str("", { "": value });
};
}
// If the JSON object does not yet have a parse method, give it one.
if (typeof JSON.parse2 !== "function") {
JSON.parse2 = function (text, reviver) {
// The parse method takes a text and an optional reviver function, and returns
// a JavaScript value if the text is a valid JSON text.
var j;
function walk(holder, key) {
// The walk method is used to recursively walk the resulting structure so
// that modifications can be made.
var k;
var v;
var value = holder[key];
if (value && typeof value === "object") {
for (k in value) {
if (Object.prototype.hasOwnProperty.call(value, k)) {
v = walk(value, k);
if (v !== undefined) {
value[k] = v;
} else {
delete value[k];
}
}
}
}
return reviver.call(holder, key, value);
}
// Parsing happens in four stages. In the first stage, we replace certain
// Unicode characters with escape sequences. JavaScript handles many characters
// incorrectly, either silently deleting them, or treating them as line endings.
text = String(text);
rx_dangerous.lastIndex = 0;
if (rx_dangerous.test(text)) {
text = text.replace(rx_dangerous, function (a) {
return (
"\\u"
+ ("0000" + a.charCodeAt(0).toString(16)).slice(-4)
);
});
}
// In the second stage, we run the text against regular expressions that look
// for non-JSON patterns. We are especially concerned with "()" and "new"
// because they can cause invocation, and "=" because it can cause mutation.
// But just to be safe, we want to reject all unexpected forms.
// We split the second stage into 4 regexp operations in order to work around
// crippling inefficiencies in IE's and Safari's regexp engines. First we
// replace the JSON backslash pairs with "@" (a non-JSON character). Second, we
// replace all simple value tokens with "]" characters. Third, we delete all
// open brackets that follow a colon or comma or that begin the text. Finally,
// we look to see that the remaining characters are only whitespace or "]" or
// "," or ":" or "{" or "}". If that is so, then the text is safe for eval.
if (
rx_one.test(
text
.replace(rx_two, "@")
.replace(rx_three, "]")
.replace(rx_four, "")
)
) {
// In the third stage we use the eval function to compile the text into a
// JavaScript structure. The "{" operator is subject to a syntactic ambiguity
// in JavaScript: it can begin a block or an object literal. We wrap the text
// in parens to eliminate the ambiguity.
j = eval("(" + text + ")");
// In the optional fourth stage, we recursively walk the new structure, passing
// each name/value pair to a reviver function for possible transformation.
return (typeof reviver === "function")
? walk({ "": j }, "")
: j;
}
// If the text is not JSON parseable, then a SyntaxError is thrown.
throw new SyntaxError("JSON.parse");
};
}
}());
补充
文章还没有写完,我赶着去吃饭了。后续文章会更新在这里。