JSON parser
This commit is contained in:
parent
57f666118a
commit
1b4b07c1fa
6 changed files with 1059 additions and 3 deletions
79
src/languages/json/experiments.ts
Normal file
79
src/languages/json/experiments.ts
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
import { sourceText } from 'source-region';
|
||||
import { parseDocument, programOf } from './parser';
|
||||
import { Program } from './syntax';
|
||||
|
||||
// === Experiments ===
|
||||
|
||||
function experiment00_emptyDocument(): void {
|
||||
logParse("empty document", "");
|
||||
}
|
||||
|
||||
function experiment01_topLevelValues(): void {
|
||||
logParse("top-level values", 'true false null "hello" 123');
|
||||
}
|
||||
|
||||
function experiment02_objectsAndArrays(): void {
|
||||
logParse("objects and arrays", '{"name": "Ada", "scores": [1, 2, 3], "ok": true}');
|
||||
}
|
||||
|
||||
function experiment03_missingArraySeparator(): void {
|
||||
logParse("missing array separator", '[1 2, 3]');
|
||||
}
|
||||
|
||||
function experiment04_arrayTrailingAndRepeatedComma(): void {
|
||||
logParse("array comma errors", '[1, 2,] [1,, 2]');
|
||||
}
|
||||
|
||||
function experiment05_objectMissingColon(): void {
|
||||
logParse("object missing colon", '{"x" 1, "y": 2}');
|
||||
}
|
||||
|
||||
function experiment06_objectMissingSeparator(): void {
|
||||
logParse("object missing separator", '{"x": 1 "y": 2}');
|
||||
}
|
||||
|
||||
function experiment07_objectKeyErrors(): void {
|
||||
logParse("object key errors", '{x: 1, "ok": 2, : 3}');
|
||||
}
|
||||
|
||||
function experiment08_stringErrors(): void {
|
||||
logParse("string errors", '"unterminated\n"bad escape: \\x" "bad unicode: \\u12"');
|
||||
}
|
||||
|
||||
function experiment09_numberErrors(): void {
|
||||
logParse("number errors", '01 - 1. 1e+ 123abc');
|
||||
}
|
||||
|
||||
function experiment10_mismatchedDelimiters(): void {
|
||||
logParse("mismatched delimiters", '{"x": [1, 2} {"y": 3]');
|
||||
}
|
||||
|
||||
function experiment11_recoverAtDocumentLevel(): void {
|
||||
logParse("document recovery", '@@@ {"ok": true} nil [1, 2]');
|
||||
}
|
||||
|
||||
function logParse(name: string, input: string): void {
|
||||
const region = sourceText(input).fullRegion();
|
||||
const result = parseDocument(region);
|
||||
console.log(`==== json:${name} ====`);
|
||||
console.log(input);
|
||||
console.log(result.syntax.tag, Program.show(programOf(result.syntax)));
|
||||
console.dir(result.errors, { depth: null });
|
||||
}
|
||||
|
||||
export function runExperiments(): void {
|
||||
[
|
||||
experiment00_emptyDocument,
|
||||
experiment01_topLevelValues,
|
||||
experiment02_objectsAndArrays,
|
||||
experiment03_missingArraySeparator,
|
||||
experiment04_arrayTrailingAndRepeatedComma,
|
||||
experiment05_objectMissingColon,
|
||||
experiment06_objectMissingSeparator,
|
||||
experiment07_objectKeyErrors,
|
||||
experiment08_stringErrors,
|
||||
experiment09_numberErrors,
|
||||
experiment10_mismatchedDelimiters,
|
||||
experiment11_recoverAtDocumentLevel,
|
||||
].forEach((experiment) => experiment());
|
||||
}
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
export * from './parse_errors';
|
||||
export * from './parser';
|
||||
export * from './syntax';
|
||||
|
|
@ -1,4 +1,60 @@
|
|||
import type { CodePoint, CodePointSpan } from 'source-region';
|
||||
|
||||
export type ParseError =
|
||||
| {} // TODO
|
||||
| {
|
||||
tag: "expected-value";
|
||||
span: CodePointSpan;
|
||||
found: FoundSyntax;
|
||||
}
|
||||
| {
|
||||
tag: "expected-member-key";
|
||||
span: CodePointSpan;
|
||||
found: FoundSyntax;
|
||||
}
|
||||
| {
|
||||
tag: "expected-colon";
|
||||
span: CodePointSpan;
|
||||
found: FoundSyntax;
|
||||
}
|
||||
| {
|
||||
tag: "expected-array-separator";
|
||||
span: CodePointSpan;
|
||||
found: FoundSyntax;
|
||||
}
|
||||
| {
|
||||
tag: "expected-object-separator";
|
||||
span: CodePointSpan;
|
||||
found: FoundSyntax;
|
||||
}
|
||||
| {
|
||||
tag: "expected-close-delimiter";
|
||||
span: CodePointSpan;
|
||||
open: CodePointSpan;
|
||||
expected: "brace" | "bracket";
|
||||
found: FoundSyntax;
|
||||
}
|
||||
| {
|
||||
tag: "unexpected-close-delimiter";
|
||||
span: CodePointSpan;
|
||||
delimiter: "brace" | "bracket";
|
||||
}
|
||||
| {
|
||||
tag: "invalid-string";
|
||||
span: CodePointSpan;
|
||||
reason: "unterminated" | "invalid-escape" | "invalid-unicode-escape" | "control-character";
|
||||
}
|
||||
| {
|
||||
tag: "invalid-number";
|
||||
span: CodePointSpan;
|
||||
text: string;
|
||||
reason:
|
||||
| "missing-integer-digits"
|
||||
| "leading-zero"
|
||||
| "missing-fraction-digits"
|
||||
| "missing-exponent-digits"
|
||||
| "trailing-junk";
|
||||
}
|
||||
|
||||
export type FoundSyntax =
|
||||
| { tag: "eof"; span: CodePointSpan }
|
||||
| { tag: "code-point"; value: CodePoint; span: CodePointSpan };
|
||||
|
|
|
|||
756
src/languages/json/parser.ts
Normal file
756
src/languages/json/parser.ts
Normal file
|
|
@ -0,0 +1,756 @@
|
|||
import {
|
||||
CodePointString,
|
||||
SourceCursor,
|
||||
char,
|
||||
isAsciiAlphanumeric,
|
||||
isAsciiWhitespace,
|
||||
isBetween,
|
||||
isDigit,
|
||||
} from 'source-region';
|
||||
import type {
|
||||
CodePoint,
|
||||
CodePointSpan,
|
||||
SourceRegion,
|
||||
} from 'source-region';
|
||||
import { matchCodePointString, skipWhile } from '../../recognizers';
|
||||
import type { FoundSyntax, ParseError } from './parse_errors';
|
||||
import {
|
||||
ArrayItem,
|
||||
ColonToken,
|
||||
ConcreteError,
|
||||
DelimiterToken,
|
||||
JsonValue,
|
||||
MemberItem,
|
||||
Program,
|
||||
} from './syntax';
|
||||
import type {
|
||||
ConcreteInfo,
|
||||
JsonValue as JsonValueType,
|
||||
MemberItem as MemberItemType,
|
||||
ArrayItem as ArrayItemType,
|
||||
Program as ProgramType,
|
||||
StringLiteral,
|
||||
} from './syntax';
|
||||
|
||||
type InvalidNumberReason = Extract<ParseError, { tag: "invalid-number" }>["reason"];
|
||||
|
||||
// Whitespace convention:
|
||||
// - parseDocument consumes leading whitespace before each top-level value.
|
||||
// - parseJsonValue assumes leading whitespace has already been consumed.
|
||||
// - Successful value parsers stop immediately after the value.
|
||||
// - object/array parsers own whitespace around separators, colons, members, and items.
|
||||
//
|
||||
// Recovery policy:
|
||||
// - Unknown value starts consume at least one code point, then panic until a
|
||||
// separator, delimiter, whitespace, or plausible value start.
|
||||
// - Arrays and objects record separator errors inline, then continue from the
|
||||
// next plausible item/member.
|
||||
// - Missing object values are represented by an error-expression value.
|
||||
//
|
||||
// Span convention:
|
||||
// - Parser internals and diagnostics use CodePointSpan.
|
||||
// - Error nodes span the malformed/recovered syntax when possible; individual
|
||||
// diagnostic nodes still keep a narrower focus span.
|
||||
|
||||
const OPEN_BRACE = char('{');
|
||||
const CLOSE_BRACE = char('}');
|
||||
const OPEN_BRACKET = char('[');
|
||||
const CLOSE_BRACKET = char(']');
|
||||
const COMMA = char(',');
|
||||
const COLON = char(':');
|
||||
const QUOTE = char('"');
|
||||
const BACKSLASH = char('\\');
|
||||
const SLASH = char('/');
|
||||
const DASH = char('-');
|
||||
const PLUS = char('+');
|
||||
const DOT = char('.');
|
||||
const LOWER_E = char('e');
|
||||
const UPPER_E = char('E');
|
||||
const LOWER_U = char('u');
|
||||
const LOWER_B = char('b');
|
||||
const LOWER_F = char('f');
|
||||
const LOWER_N = char('n');
|
||||
const LOWER_R = char('r');
|
||||
const LOWER_T = char('t');
|
||||
const DIGIT_0 = char('0');
|
||||
const DIGIT_9 = char('9');
|
||||
const LOWERCASE_A = char('a');
|
||||
const LOWERCASE_F = char('f');
|
||||
const UPPERCASE_A = char('A');
|
||||
const UPPERCASE_F = char('F');
|
||||
|
||||
const TRUE = CodePointString.makeFromString("true");
|
||||
const FALSE = CodePointString.makeFromString("false");
|
||||
const NULL = CodePointString.makeFromString("null");
|
||||
|
||||
export type ConcreteSyntaxResult =
|
||||
| { tag: "valid", value: ValidConcreteSyntax }
|
||||
| { tag: "invalid", value: PartialConcreteSyntax }
|
||||
|
||||
export type ParseDocumentResult = {
|
||||
syntax: ConcreteSyntaxResult;
|
||||
errors: ParseError[];
|
||||
};
|
||||
|
||||
export type ValidConcreteSyntax = ProgramType<ConcreteInfo, never>
|
||||
export type PartialConcreteSyntax = ProgramType<ConcreteInfo, ConcreteError>
|
||||
export type PartialJsonValue = JsonValueType<ConcreteInfo, ConcreteError>;
|
||||
export type PartialMemberItem = MemberItemType<ConcreteInfo, ConcreteError>;
|
||||
export type PartialArrayItem = ArrayItemType<ConcreteInfo, ConcreteError>;
|
||||
|
||||
export namespace ConcreteSyntaxResult {
|
||||
export function valid(value: ValidConcreteSyntax): ConcreteSyntaxResult {
|
||||
return { tag: "valid", value };
|
||||
}
|
||||
|
||||
export function invalid(value: PartialConcreteSyntax): ConcreteSyntaxResult {
|
||||
return { tag: "invalid", value };
|
||||
}
|
||||
}
|
||||
|
||||
export function programOf(result: ConcreteSyntaxResult): PartialConcreteSyntax {
|
||||
return result.value;
|
||||
}
|
||||
|
||||
export function parseDocument(region: SourceRegion): ParseDocumentResult {
|
||||
return new Parser(region).parseDocument();
|
||||
}
|
||||
|
||||
class Parser {
|
||||
private readonly cursor: SourceCursor;
|
||||
private readonly errors: ParseError[] = [];
|
||||
|
||||
constructor(private readonly region: SourceRegion) {
|
||||
this.cursor = region.makeCursor();
|
||||
}
|
||||
|
||||
parseDocument(): ParseDocumentResult {
|
||||
const expressions: PartialJsonValue[] = [];
|
||||
|
||||
while (true) {
|
||||
this.skipWhitespace();
|
||||
if (this.cursor.isAtEnd()) break;
|
||||
|
||||
expressions.push(this.parseJsonValue());
|
||||
}
|
||||
|
||||
const program = Program.make(expressions, { span: this.region.codePointSpan });
|
||||
return {
|
||||
syntax: this.errors.length === 0
|
||||
? ConcreteSyntaxResult.valid(program as ValidConcreteSyntax)
|
||||
: ConcreteSyntaxResult.invalid(program as PartialConcreteSyntax),
|
||||
errors: this.errors,
|
||||
};
|
||||
}
|
||||
|
||||
private parseJsonValue(): PartialJsonValue {
|
||||
const cp = this.cursor.peek();
|
||||
|
||||
if (cp === undefined) {
|
||||
return this.errorExpression(this.makeError({
|
||||
tag: "expected-value",
|
||||
span: this.cursor.eofSpan(),
|
||||
found: this.found(),
|
||||
}));
|
||||
}
|
||||
|
||||
if (cp === CLOSE_BRACE || cp === CLOSE_BRACKET) {
|
||||
const delimiter = cp === CLOSE_BRACE ? "brace" : "bracket";
|
||||
const span = this.cursor.currentSpan();
|
||||
this.cursor.advance();
|
||||
return this.errorExpression(this.makeError({
|
||||
tag: "unexpected-close-delimiter",
|
||||
span,
|
||||
delimiter,
|
||||
}));
|
||||
}
|
||||
|
||||
if (cp === OPEN_BRACE) return this.parseObject();
|
||||
if (cp === OPEN_BRACKET) return this.parseArray();
|
||||
if (cp === QUOTE) return this.parseString();
|
||||
if (cp === DASH || isDigit(cp)) return this.parseNumber();
|
||||
if (cp === LOWER_T) return this.parseKeyword(TRUE, JsonValue.trueValue);
|
||||
if (cp === LOWER_F) return this.parseKeyword(FALSE, JsonValue.falseValue);
|
||||
if (cp === LOWER_N) return this.parseKeyword(NULL, JsonValue.nullValue);
|
||||
|
||||
return this.parseUnknownValue();
|
||||
}
|
||||
|
||||
private parseObject(): PartialJsonValue {
|
||||
const start = this.cursor.checkpoint();
|
||||
const open = DelimiterToken.openBrace(this.cursor.currentSpan());
|
||||
this.cursor.advance();
|
||||
|
||||
const members: PartialMemberItem[] = [];
|
||||
let expectingMember = true;
|
||||
let sawMember = false;
|
||||
let lastWasSeparator = false;
|
||||
|
||||
while (true) {
|
||||
this.skipWhitespace();
|
||||
|
||||
const cp = this.cursor.peek();
|
||||
if (cp === CLOSE_BRACE) {
|
||||
const close = DelimiterToken.closeBrace(this.cursor.currentSpan());
|
||||
if (lastWasSeparator) {
|
||||
members.push(this.missingMemberKey());
|
||||
}
|
||||
this.cursor.advance();
|
||||
return JsonValue.object(open, members, this.cursor.spanFrom(start), close);
|
||||
}
|
||||
|
||||
if (cp === CLOSE_BRACKET) {
|
||||
const close = DelimiterToken.closeBracket(this.cursor.currentSpan());
|
||||
const error = this.makeError({
|
||||
tag: "expected-close-delimiter",
|
||||
span: this.cursor.currentSpan(),
|
||||
open: open.span,
|
||||
expected: "brace",
|
||||
found: this.found(),
|
||||
});
|
||||
this.cursor.advance();
|
||||
return JsonValue.object(open, members, this.cursor.spanFrom(start), close, error);
|
||||
}
|
||||
|
||||
if (cp === undefined) {
|
||||
const error = this.makeError({
|
||||
tag: "expected-close-delimiter",
|
||||
span: this.cursor.eofSpan(),
|
||||
open: open.span,
|
||||
expected: "brace",
|
||||
found: this.found(),
|
||||
});
|
||||
return JsonValue.object(open, members, this.cursor.spanFrom(start), undefined, error);
|
||||
}
|
||||
|
||||
if (expectingMember) {
|
||||
if (cp === COMMA) {
|
||||
const commaSpan = this.cursor.currentSpan();
|
||||
this.cursor.advance();
|
||||
const error = this.makeError({
|
||||
tag: "expected-member-key",
|
||||
span: commaSpan,
|
||||
found: { tag: "code-point", value: COMMA, span: commaSpan },
|
||||
});
|
||||
members.push(MemberItem.errorSeparator(error, commaSpan));
|
||||
lastWasSeparator = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
members.push(this.parseMember());
|
||||
expectingMember = false;
|
||||
sawMember = true;
|
||||
lastWasSeparator = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cp === COMMA) {
|
||||
this.cursor.advance();
|
||||
expectingMember = true;
|
||||
lastWasSeparator = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
const error = this.makeError({
|
||||
tag: "expected-object-separator",
|
||||
span: this.cursor.currentSpan(),
|
||||
found: this.found(),
|
||||
});
|
||||
members.push(MemberItem.errorSeparator(error, this.cursor.currentSpan()));
|
||||
expectingMember = true;
|
||||
lastWasSeparator = false;
|
||||
|
||||
if (!sawMember && !isMemberStart(cp)) {
|
||||
members.push(this.missingMemberKey());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private parseMember(): PartialMemberItem {
|
||||
const start = this.cursor.checkpoint();
|
||||
const key = this.parseMemberKey();
|
||||
|
||||
this.skipWhitespace();
|
||||
let colon = undefined;
|
||||
let memberError = undefined;
|
||||
|
||||
if (this.cursor.peek() === COLON) {
|
||||
colon = ColonToken.make(this.cursor.currentSpan());
|
||||
this.cursor.advance();
|
||||
} else {
|
||||
memberError = this.makeError({
|
||||
tag: "expected-colon",
|
||||
span: this.cursor.currentSpan(),
|
||||
found: this.found(),
|
||||
});
|
||||
}
|
||||
|
||||
this.skipWhitespace();
|
||||
const value = isValueBoundary(this.cursor.peek())
|
||||
? this.errorExpression(this.makeError({
|
||||
tag: "expected-value",
|
||||
span: this.cursor.currentSpan(),
|
||||
found: this.found(),
|
||||
}))
|
||||
: this.parseJsonValue();
|
||||
|
||||
const span = this.cursor.spanFrom(start);
|
||||
return MemberItem.member(key, value, span, colon, memberError);
|
||||
}
|
||||
|
||||
private parseMemberKey(): StringLiteral<ConcreteInfo, ConcreteError> {
|
||||
if (this.cursor.peek() === QUOTE) {
|
||||
return this.parseStringLiteral();
|
||||
}
|
||||
|
||||
const focus = this.cursor.currentSpan();
|
||||
const start = this.cursor.checkpoint();
|
||||
const found = this.found();
|
||||
|
||||
while (true) {
|
||||
const cp = this.cursor.peek();
|
||||
if (
|
||||
cp === undefined
|
||||
|| cp === COLON
|
||||
|| cp === COMMA
|
||||
|| cp === CLOSE_BRACE
|
||||
|| cp === CLOSE_BRACKET
|
||||
|| isAsciiWhitespace(cp)
|
||||
) {
|
||||
break;
|
||||
}
|
||||
this.cursor.advance();
|
||||
}
|
||||
|
||||
const span = this.cursor.spanFrom(start);
|
||||
const panickedOver = span.start === span.end ? undefined : span;
|
||||
const error = this.makeError({
|
||||
tag: "expected-member-key",
|
||||
span: focus,
|
||||
found,
|
||||
}, panickedOver);
|
||||
|
||||
return JsonValue.errorString(error, panickedOver ?? focus);
|
||||
}
|
||||
|
||||
private parseArray(): PartialJsonValue {
|
||||
const start = this.cursor.checkpoint();
|
||||
const open = DelimiterToken.openBracket(this.cursor.currentSpan());
|
||||
this.cursor.advance();
|
||||
|
||||
const items: PartialArrayItem[] = [];
|
||||
let expectingValue = true;
|
||||
let sawItem = false;
|
||||
let lastWasSeparator = false;
|
||||
|
||||
while (true) {
|
||||
this.skipWhitespace();
|
||||
|
||||
const cp = this.cursor.peek();
|
||||
if (cp === CLOSE_BRACKET) {
|
||||
const close = DelimiterToken.closeBracket(this.cursor.currentSpan());
|
||||
if (lastWasSeparator) {
|
||||
items.push(this.missingArrayValue());
|
||||
}
|
||||
this.cursor.advance();
|
||||
return JsonValue.array(open, items, this.cursor.spanFrom(start), close);
|
||||
}
|
||||
|
||||
if (cp === CLOSE_BRACE) {
|
||||
const close = DelimiterToken.closeBrace(this.cursor.currentSpan());
|
||||
const error = this.makeError({
|
||||
tag: "expected-close-delimiter",
|
||||
span: this.cursor.currentSpan(),
|
||||
open: open.span,
|
||||
expected: "bracket",
|
||||
found: this.found(),
|
||||
});
|
||||
this.cursor.advance();
|
||||
return JsonValue.array(open, items, this.cursor.spanFrom(start), close, error);
|
||||
}
|
||||
|
||||
if (cp === undefined) {
|
||||
const error = this.makeError({
|
||||
tag: "expected-close-delimiter",
|
||||
span: this.cursor.eofSpan(),
|
||||
open: open.span,
|
||||
expected: "bracket",
|
||||
found: this.found(),
|
||||
});
|
||||
return JsonValue.array(open, items, this.cursor.spanFrom(start), undefined, error);
|
||||
}
|
||||
|
||||
if (expectingValue) {
|
||||
if (cp === COMMA) {
|
||||
const commaSpan = this.cursor.currentSpan();
|
||||
this.cursor.advance();
|
||||
const error = this.makeError({
|
||||
tag: "expected-value",
|
||||
span: commaSpan,
|
||||
found: { tag: "code-point", value: COMMA, span: commaSpan },
|
||||
});
|
||||
items.push(this.errorExpression(error, commaSpan));
|
||||
lastWasSeparator = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
items.push(this.parseJsonValue());
|
||||
expectingValue = false;
|
||||
sawItem = true;
|
||||
lastWasSeparator = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cp === COMMA) {
|
||||
this.cursor.advance();
|
||||
expectingValue = true;
|
||||
lastWasSeparator = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
const error = this.makeError({
|
||||
tag: "expected-array-separator",
|
||||
span: this.cursor.currentSpan(),
|
||||
found: this.found(),
|
||||
});
|
||||
items.push(ArrayItem.errorSeparator(error, this.cursor.currentSpan()));
|
||||
expectingValue = true;
|
||||
lastWasSeparator = false;
|
||||
|
||||
if (!sawItem && !isValueStart(cp)) {
|
||||
items.push(this.missingArrayValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private parseString(): PartialJsonValue {
|
||||
return this.parseStringLiteral();
|
||||
}
|
||||
|
||||
private parseStringLiteral(): StringLiteral<ConcreteInfo, ConcreteError> {
|
||||
const start = this.cursor.checkpoint();
|
||||
this.cursor.advance();
|
||||
|
||||
let value = "";
|
||||
|
||||
while (true) {
|
||||
const cp = this.cursor.peek();
|
||||
|
||||
if (cp === undefined) {
|
||||
const span = this.cursor.spanFrom(start);
|
||||
return JsonValue.errorString(this.makeError({
|
||||
tag: "invalid-string",
|
||||
span,
|
||||
reason: "unterminated",
|
||||
}, span), span);
|
||||
}
|
||||
|
||||
if (cp === QUOTE) {
|
||||
this.cursor.advance();
|
||||
return { tag: "string", value, span: this.cursor.spanFrom(start) };
|
||||
}
|
||||
|
||||
if (isControlCharacter(cp)) {
|
||||
const focus = this.cursor.currentSpan();
|
||||
this.cursor.advance();
|
||||
const span = this.cursor.spanFrom(start);
|
||||
return JsonValue.errorString(this.makeError({
|
||||
tag: "invalid-string",
|
||||
span: focus,
|
||||
reason: "control-character",
|
||||
}, span), span);
|
||||
}
|
||||
|
||||
if (cp === BACKSLASH) {
|
||||
const escaped = this.consumeEscape();
|
||||
if (escaped.tag === "error") {
|
||||
const focus = escaped.span;
|
||||
this.consumeUntilStringRecovery();
|
||||
const span = this.cursor.spanFrom(start);
|
||||
return JsonValue.errorString(this.makeError({
|
||||
tag: "invalid-string",
|
||||
span: focus,
|
||||
reason: escaped.reason,
|
||||
}, span), span);
|
||||
}
|
||||
value += escaped.value;
|
||||
continue;
|
||||
}
|
||||
|
||||
value += String.fromCodePoint(cp);
|
||||
this.cursor.advance();
|
||||
}
|
||||
}
|
||||
|
||||
private consumeEscape():
|
||||
| { tag: "ok"; value: string }
|
||||
| { tag: "error"; span: CodePointSpan; reason: "invalid-escape" | "invalid-unicode-escape" } {
|
||||
const start = this.cursor.checkpoint();
|
||||
this.cursor.advance();
|
||||
const cp = this.cursor.peek();
|
||||
|
||||
if (cp === undefined) {
|
||||
return { tag: "error", span: this.cursor.spanFrom(start), reason: "invalid-escape" };
|
||||
}
|
||||
|
||||
if (cp === QUOTE || cp === BACKSLASH || cp === SLASH) {
|
||||
this.cursor.advance();
|
||||
return { tag: "ok", value: String.fromCodePoint(cp) };
|
||||
}
|
||||
|
||||
if (cp === LOWER_B) {
|
||||
this.cursor.advance();
|
||||
return { tag: "ok", value: "\b" };
|
||||
}
|
||||
if (cp === LOWER_F) {
|
||||
this.cursor.advance();
|
||||
return { tag: "ok", value: "\f" };
|
||||
}
|
||||
if (cp === LOWER_N) {
|
||||
this.cursor.advance();
|
||||
return { tag: "ok", value: "\n" };
|
||||
}
|
||||
if (cp === LOWER_R) {
|
||||
this.cursor.advance();
|
||||
return { tag: "ok", value: "\r" };
|
||||
}
|
||||
if (cp === LOWER_T) {
|
||||
this.cursor.advance();
|
||||
return { tag: "ok", value: "\t" };
|
||||
}
|
||||
|
||||
if (cp === LOWER_U) {
|
||||
this.cursor.advance();
|
||||
let hex = "";
|
||||
for (let i = 0; i < 4; i += 1) {
|
||||
const hexCp = this.cursor.peek();
|
||||
if (hexCp === undefined || !isHexDigit(hexCp)) {
|
||||
return { tag: "error", span: this.cursor.spanFrom(start), reason: "invalid-unicode-escape" };
|
||||
}
|
||||
hex += String.fromCodePoint(hexCp);
|
||||
this.cursor.advance();
|
||||
}
|
||||
return { tag: "ok", value: String.fromCodePoint(Number.parseInt(hex, 16)) };
|
||||
}
|
||||
|
||||
this.cursor.advance();
|
||||
return { tag: "error", span: this.cursor.spanFrom(start), reason: "invalid-escape" };
|
||||
}
|
||||
|
||||
private parseNumber(): PartialJsonValue {
|
||||
const start = this.cursor.checkpoint();
|
||||
let reason: InvalidNumberReason | undefined = undefined;
|
||||
|
||||
if (this.cursor.peek() === DASH) {
|
||||
this.cursor.advance();
|
||||
}
|
||||
|
||||
const integerStart = this.cursor.checkpoint();
|
||||
const firstDigit = this.cursor.peek();
|
||||
if (firstDigit === DIGIT_0) {
|
||||
this.cursor.advance();
|
||||
if (isDigit(this.cursor.peek() ?? -1)) {
|
||||
reason = "leading-zero";
|
||||
this.consumeDigits();
|
||||
}
|
||||
} else if (firstDigit !== undefined && isBetween(char('1'), firstDigit, DIGIT_9)) {
|
||||
this.consumeDigits();
|
||||
} else {
|
||||
reason = "missing-integer-digits";
|
||||
}
|
||||
|
||||
if (this.cursor.peek() === DOT) {
|
||||
this.cursor.advance();
|
||||
const fractionStart = this.cursor.checkpoint();
|
||||
this.consumeDigits();
|
||||
if (this.cursor.checkpoint() === fractionStart && reason === undefined) {
|
||||
reason = "missing-fraction-digits";
|
||||
}
|
||||
}
|
||||
|
||||
if (this.cursor.peek() === LOWER_E || this.cursor.peek() === UPPER_E) {
|
||||
this.cursor.advance();
|
||||
if (this.cursor.peek() === PLUS || this.cursor.peek() === DASH) {
|
||||
this.cursor.advance();
|
||||
}
|
||||
|
||||
const exponentStart = this.cursor.checkpoint();
|
||||
this.consumeDigits();
|
||||
if (this.cursor.checkpoint() === exponentStart && reason === undefined) {
|
||||
reason = "missing-exponent-digits";
|
||||
}
|
||||
}
|
||||
|
||||
if (isNumberJunk(this.cursor.peek())) {
|
||||
reason = "trailing-junk";
|
||||
while (isNumberJunk(this.cursor.peek())) {
|
||||
this.cursor.advance();
|
||||
}
|
||||
}
|
||||
|
||||
const span = this.cursor.spanFrom(start);
|
||||
const text = this.cursor.slice(span);
|
||||
|
||||
if (reason !== undefined || this.cursor.checkpoint() === integerStart) {
|
||||
return JsonValue.errorNumber(this.makeError({
|
||||
tag: "invalid-number",
|
||||
span,
|
||||
text,
|
||||
reason: reason ?? "missing-integer-digits",
|
||||
}, span), span);
|
||||
}
|
||||
|
||||
return JsonValue.number(Number(text), span);
|
||||
}
|
||||
|
||||
private parseKeyword(
|
||||
keyword: CodePointString,
|
||||
makeValue: (span: CodePointSpan) => PartialJsonValue,
|
||||
): PartialJsonValue {
|
||||
const start = this.cursor.checkpoint();
|
||||
const match = matchCodePointString(this.cursor, keyword);
|
||||
|
||||
if (match.tag === "match" && !isKeywordPart(this.cursor.peek())) {
|
||||
return makeValue(match.span);
|
||||
}
|
||||
|
||||
this.cursor.restore(start);
|
||||
return this.parseUnknownValue();
|
||||
}
|
||||
|
||||
private parseUnknownValue(): PartialJsonValue {
|
||||
const start = this.cursor.checkpoint();
|
||||
const focus = this.cursor.currentSpan();
|
||||
const found = this.found();
|
||||
|
||||
this.cursor.advance();
|
||||
while (true) {
|
||||
const cp = this.cursor.peek();
|
||||
if (
|
||||
cp === undefined
|
||||
|| isAsciiWhitespace(cp)
|
||||
|| isValueBoundary(cp)
|
||||
|| isValueStart(cp)
|
||||
) {
|
||||
break;
|
||||
}
|
||||
this.cursor.advance();
|
||||
}
|
||||
|
||||
const panickedOver = this.cursor.spanFrom(start);
|
||||
const error = this.makeError({
|
||||
tag: "expected-value",
|
||||
span: focus,
|
||||
found,
|
||||
}, panickedOver);
|
||||
|
||||
return this.errorExpression(error, panickedOver);
|
||||
}
|
||||
|
||||
private missingArrayValue(): PartialJsonValue {
|
||||
return this.errorExpression(this.makeError({
|
||||
tag: "expected-value",
|
||||
span: this.cursor.currentSpan(),
|
||||
found: this.found(),
|
||||
}));
|
||||
}
|
||||
|
||||
private missingMemberKey(): PartialMemberItem {
|
||||
const error = this.makeError({
|
||||
tag: "expected-member-key",
|
||||
span: this.cursor.currentSpan(),
|
||||
found: this.found(),
|
||||
});
|
||||
return MemberItem.errorSeparator(error, this.cursor.currentSpan());
|
||||
}
|
||||
|
||||
private consumeDigits(): void {
|
||||
while (isDigit(this.cursor.peek() ?? -1)) {
|
||||
this.cursor.advance();
|
||||
}
|
||||
}
|
||||
|
||||
private consumeUntilStringRecovery(): void {
|
||||
while (true) {
|
||||
const cp = this.cursor.peek();
|
||||
if (cp === QUOTE) {
|
||||
this.cursor.advance();
|
||||
return;
|
||||
}
|
||||
if (cp === undefined || cp === COMMA || cp === CLOSE_BRACE || cp === CLOSE_BRACKET) {
|
||||
return;
|
||||
}
|
||||
this.cursor.advance();
|
||||
}
|
||||
}
|
||||
|
||||
private skipWhitespace(): void {
|
||||
skipWhile(this.cursor, isAsciiWhitespace);
|
||||
}
|
||||
|
||||
private found(): FoundSyntax {
|
||||
const cp = this.cursor.peek();
|
||||
if (cp === undefined) return { tag: "eof", span: this.cursor.eofSpan() };
|
||||
return { tag: "code-point", value: cp, span: this.cursor.currentSpan() };
|
||||
}
|
||||
|
||||
private makeError(error: ParseError, panickedOver?: CodePointSpan): ConcreteError {
|
||||
this.errors.push(error);
|
||||
return ConcreteError.single({
|
||||
span: error.span,
|
||||
error,
|
||||
panickedOver,
|
||||
});
|
||||
}
|
||||
|
||||
private errorExpression(error: ConcreteError, span?: CodePointSpan): PartialJsonValue {
|
||||
return JsonValue.errorExpression(error, span ?? error[0].span);
|
||||
}
|
||||
}
|
||||
|
||||
function isValueStart(cp: CodePoint | undefined): boolean {
|
||||
return cp !== undefined && (
|
||||
cp === OPEN_BRACE
|
||||
|| cp === OPEN_BRACKET
|
||||
|| cp === QUOTE
|
||||
|| cp === DASH
|
||||
|| cp === LOWER_T
|
||||
|| cp === LOWER_F
|
||||
|| cp === LOWER_N
|
||||
|| isDigit(cp)
|
||||
);
|
||||
}
|
||||
|
||||
function isMemberStart(cp: CodePoint | undefined): boolean {
|
||||
return cp === QUOTE;
|
||||
}
|
||||
|
||||
function isValueBoundary(cp: CodePoint | undefined): boolean {
|
||||
return cp === undefined
|
||||
|| cp === COMMA
|
||||
|| cp === CLOSE_BRACE
|
||||
|| cp === CLOSE_BRACKET;
|
||||
}
|
||||
|
||||
function isKeywordPart(cp: CodePoint | undefined): boolean {
|
||||
return cp !== undefined && (isAsciiAlphanumeric(cp) || cp === char('_') || cp === DASH);
|
||||
}
|
||||
|
||||
function isNumberJunk(cp: CodePoint | undefined): boolean {
|
||||
return cp !== undefined
|
||||
&& !isAsciiWhitespace(cp)
|
||||
&& cp !== COMMA
|
||||
&& cp !== CLOSE_BRACE
|
||||
&& cp !== CLOSE_BRACKET
|
||||
&& cp !== COLON;
|
||||
}
|
||||
|
||||
function isControlCharacter(cp: CodePoint): boolean {
|
||||
return cp < 0x20;
|
||||
}
|
||||
|
||||
function isHexDigit(cp: CodePoint): boolean {
|
||||
return isDigit(cp)
|
||||
|| isBetween(LOWERCASE_A, cp, LOWERCASE_F)
|
||||
|| isBetween(UPPERCASE_A, cp, UPPERCASE_F);
|
||||
}
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
import type { CodePointSpan } from 'source-region';
|
||||
import type { ParseError } from './parse_errors.ts';
|
||||
import type { ParseError } from './parse_errors';
|
||||
|
||||
export type ConcreteInfo = { span: CodePointSpan };
|
||||
|
||||
|
|
@ -22,6 +22,32 @@ export type DelimiterToken =
|
|||
| { tag: "open-bracket", span: CodePointSpan }
|
||||
| { tag: "close-bracket", span: CodePointSpan }
|
||||
|
||||
export namespace DelimiterToken {
|
||||
export function openBrace(span: CodePointSpan): DelimiterToken {
|
||||
return { tag: "open-brace", span };
|
||||
}
|
||||
|
||||
export function closeBrace(span: CodePointSpan): DelimiterToken {
|
||||
return { tag: "close-brace", span };
|
||||
}
|
||||
|
||||
export function openBracket(span: CodePointSpan): DelimiterToken {
|
||||
return { tag: "open-bracket", span };
|
||||
}
|
||||
|
||||
export function closeBracket(span: CodePointSpan): DelimiterToken {
|
||||
return { tag: "close-bracket", span };
|
||||
}
|
||||
}
|
||||
|
||||
export type ColonToken = { tag: "colon", span: CodePointSpan };
|
||||
|
||||
export namespace ColonToken {
|
||||
export function make(span: CodePointSpan): ColonToken {
|
||||
return { tag: "colon", span };
|
||||
}
|
||||
}
|
||||
|
||||
export type Program<Info, Error> = {
|
||||
tag: "program",
|
||||
expressions: JsonValue<Info, Error>[],
|
||||
|
|
@ -48,7 +74,7 @@ export type MemberItem<Info, Error> =
|
|||
|
||||
export type Member<Info, Error> = {
|
||||
key: StringLiteral<Info, Error>,
|
||||
colon?: { tag: "colon", span: CodePointSpan },
|
||||
colon?: ColonToken,
|
||||
value: JsonValue<Info, Error>,
|
||||
error?: Error
|
||||
} & Info
|
||||
|
|
@ -93,3 +119,137 @@ export type JsonScalar<Info, Error> =
|
|||
| { tag: "true", error?: Error } & Info
|
||||
| { tag: "false", error?: Error } & Info
|
||||
|
||||
export namespace Program {
|
||||
export function make<Info, Error>(
|
||||
expressions: JsonValue<Info, Error>[],
|
||||
info: Info,
|
||||
error?: Error,
|
||||
): Program<Info, Error> {
|
||||
return error === undefined
|
||||
? { tag: "program", expressions, ...info }
|
||||
: { tag: "program", expressions, error, ...info };
|
||||
}
|
||||
|
||||
export function show<Info, Error>(program: Program<Info, Error>): string {
|
||||
return program.expressions.map(JsonValue.show).join(" ");
|
||||
}
|
||||
}
|
||||
|
||||
export namespace JsonValue {
|
||||
export function object(
|
||||
open: DelimiterToken,
|
||||
members: MemberItem<ConcreteInfo, ConcreteError>[],
|
||||
span: CodePointSpan,
|
||||
close?: DelimiterToken,
|
||||
error?: ConcreteError,
|
||||
): JsonValue<ConcreteInfo, ConcreteError> {
|
||||
return { tag: "object", open, members, close, error, span };
|
||||
}
|
||||
|
||||
export function array(
|
||||
open: DelimiterToken,
|
||||
items: ArrayItem<ConcreteInfo, ConcreteError>[],
|
||||
span: CodePointSpan,
|
||||
close?: DelimiterToken,
|
||||
error?: ConcreteError,
|
||||
): JsonValue<ConcreteInfo, ConcreteError> {
|
||||
return { tag: "array", open, items, close, error, span };
|
||||
}
|
||||
|
||||
export function string(
|
||||
value: string,
|
||||
span: CodePointSpan,
|
||||
error?: ConcreteError,
|
||||
): JsonValue<ConcreteInfo, ConcreteError> {
|
||||
return { tag: "string", value, error, span };
|
||||
}
|
||||
|
||||
export function errorString(error: ConcreteError, span: CodePointSpan): StringLiteral<ConcreteInfo, ConcreteError> {
|
||||
return { tag: "error-string", error, span };
|
||||
}
|
||||
|
||||
export function number(
|
||||
value: number,
|
||||
span: CodePointSpan,
|
||||
error?: ConcreteError,
|
||||
): JsonValue<ConcreteInfo, ConcreteError> {
|
||||
return { tag: "number", value, error, span };
|
||||
}
|
||||
|
||||
export function errorNumber(error: ConcreteError, span: CodePointSpan): NumberLiteral<ConcreteInfo, ConcreteError> {
|
||||
return { tag: "error-number", error, span };
|
||||
}
|
||||
|
||||
export function nullValue(span: CodePointSpan): JsonValue<ConcreteInfo, ConcreteError> {
|
||||
return { tag: "null", span };
|
||||
}
|
||||
|
||||
export function trueValue(span: CodePointSpan): JsonValue<ConcreteInfo, ConcreteError> {
|
||||
return { tag: "true", span };
|
||||
}
|
||||
|
||||
export function falseValue(span: CodePointSpan): JsonValue<ConcreteInfo, ConcreteError> {
|
||||
return { tag: "false", span };
|
||||
}
|
||||
|
||||
export function errorExpression(error: ConcreteError, span: CodePointSpan): JsonValue<ConcreteInfo, ConcreteError> {
|
||||
return { tag: "error-expression", error, span };
|
||||
}
|
||||
|
||||
export function show<Info, Error>(value: JsonValue<Info, Error>): string {
|
||||
switch (value.tag) {
|
||||
case "object":
|
||||
return `{${value.members.map(MemberItem.show).join(", ")}}`;
|
||||
case "array":
|
||||
return `[${value.items.map(ArrayItem.show).join(", ")}]`;
|
||||
case "string":
|
||||
return JSON.stringify(value.value);
|
||||
case "number":
|
||||
return `${value.value}`;
|
||||
case "null":
|
||||
return "null";
|
||||
case "true":
|
||||
return "true";
|
||||
case "false":
|
||||
return "false";
|
||||
case "error-expression":
|
||||
return "<error-expression>";
|
||||
case "error-string":
|
||||
return "<error-string>";
|
||||
case "error-number":
|
||||
return "<error-number>";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export namespace MemberItem {
|
||||
export function member(
|
||||
key: StringLiteral<ConcreteInfo, ConcreteError>,
|
||||
value: JsonValue<ConcreteInfo, ConcreteError>,
|
||||
span: CodePointSpan,
|
||||
colon?: ColonToken,
|
||||
error?: ConcreteError,
|
||||
): MemberItem<ConcreteInfo, ConcreteError> {
|
||||
return { tag: "member", key, colon, value, error, span };
|
||||
}
|
||||
|
||||
export function errorSeparator(error: ConcreteError, span: CodePointSpan): MemberItem<ConcreteInfo, ConcreteError> {
|
||||
return { tag: "error-object-separator", error, span };
|
||||
}
|
||||
|
||||
export function show<Info, Error>(item: MemberItem<Info, Error>): string {
|
||||
if (item.tag === "error-object-separator") return "<error-object-separator>";
|
||||
return `${JsonValue.show(item.key)}: ${JsonValue.show(item.value)}`;
|
||||
}
|
||||
}
|
||||
|
||||
export namespace ArrayItem {
|
||||
export function errorSeparator(error: ConcreteError, span: CodePointSpan): ArrayItem<ConcreteInfo, ConcreteError> {
|
||||
return { tag: "error-array-separator", error, span };
|
||||
}
|
||||
|
||||
export function show<Info, Error>(item: ArrayItem<Info, Error>): string {
|
||||
if (item.tag === "error-array-separator") return "<error-array-separator>";
|
||||
return JsonValue.show(item);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
import { runExperiments as runLispExperiments } from './languages/lisp/experiments';
|
||||
import { runExperiments as runJsonExperiments } from './languages/json/experiments';
|
||||
|
||||
runLispExperiments();
|
||||
runJsonExperiments();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue