JSON parser

This commit is contained in:
Yura Dupyn 2026-04-25 18:03:27 +02:00
parent 57f666118a
commit 1b4b07c1fa
6 changed files with 1059 additions and 3 deletions

View file

@ -0,0 +1,79 @@
import { sourceText } from 'source-region';
import { parseDocument, programOf } from './parser';
import { Program } from './syntax';
// === Experiments ===
function experiment00_emptyDocument(): void {
logParse("empty document", "");
}
function experiment01_topLevelValues(): void {
logParse("top-level values", 'true false null "hello" 123');
}
function experiment02_objectsAndArrays(): void {
logParse("objects and arrays", '{"name": "Ada", "scores": [1, 2, 3], "ok": true}');
}
function experiment03_missingArraySeparator(): void {
logParse("missing array separator", '[1 2, 3]');
}
function experiment04_arrayTrailingAndRepeatedComma(): void {
logParse("array comma errors", '[1, 2,] [1,, 2]');
}
function experiment05_objectMissingColon(): void {
logParse("object missing colon", '{"x" 1, "y": 2}');
}
function experiment06_objectMissingSeparator(): void {
logParse("object missing separator", '{"x": 1 "y": 2}');
}
function experiment07_objectKeyErrors(): void {
logParse("object key errors", '{x: 1, "ok": 2, : 3}');
}
function experiment08_stringErrors(): void {
logParse("string errors", '"unterminated\n"bad escape: \\x" "bad unicode: \\u12"');
}
function experiment09_numberErrors(): void {
logParse("number errors", '01 - 1. 1e+ 123abc');
}
function experiment10_mismatchedDelimiters(): void {
logParse("mismatched delimiters", '{"x": [1, 2} {"y": 3]');
}
function experiment11_recoverAtDocumentLevel(): void {
logParse("document recovery", '@@@ {"ok": true} nil [1, 2]');
}
function logParse(name: string, input: string): void {
const region = sourceText(input).fullRegion();
const result = parseDocument(region);
console.log(`==== json:${name} ====`);
console.log(input);
console.log(result.syntax.tag, Program.show(programOf(result.syntax)));
console.dir(result.errors, { depth: null });
}
export function runExperiments(): void {
[
experiment00_emptyDocument,
experiment01_topLevelValues,
experiment02_objectsAndArrays,
experiment03_missingArraySeparator,
experiment04_arrayTrailingAndRepeatedComma,
experiment05_objectMissingColon,
experiment06_objectMissingSeparator,
experiment07_objectKeyErrors,
experiment08_stringErrors,
experiment09_numberErrors,
experiment10_mismatchedDelimiters,
experiment11_recoverAtDocumentLevel,
].forEach((experiment) => experiment());
}

View file

@ -0,0 +1,3 @@
export * from './parse_errors';
export * from './parser';
export * from './syntax';

View file

@ -1,4 +1,60 @@
import type { CodePoint, CodePointSpan } from 'source-region'; import type { CodePoint, CodePointSpan } from 'source-region';
export type ParseError = export type ParseError =
| {} // TODO | {
tag: "expected-value";
span: CodePointSpan;
found: FoundSyntax;
}
| {
tag: "expected-member-key";
span: CodePointSpan;
found: FoundSyntax;
}
| {
tag: "expected-colon";
span: CodePointSpan;
found: FoundSyntax;
}
| {
tag: "expected-array-separator";
span: CodePointSpan;
found: FoundSyntax;
}
| {
tag: "expected-object-separator";
span: CodePointSpan;
found: FoundSyntax;
}
| {
tag: "expected-close-delimiter";
span: CodePointSpan;
open: CodePointSpan;
expected: "brace" | "bracket";
found: FoundSyntax;
}
| {
tag: "unexpected-close-delimiter";
span: CodePointSpan;
delimiter: "brace" | "bracket";
}
| {
tag: "invalid-string";
span: CodePointSpan;
reason: "unterminated" | "invalid-escape" | "invalid-unicode-escape" | "control-character";
}
| {
tag: "invalid-number";
span: CodePointSpan;
text: string;
reason:
| "missing-integer-digits"
| "leading-zero"
| "missing-fraction-digits"
| "missing-exponent-digits"
| "trailing-junk";
}
export type FoundSyntax =
| { tag: "eof"; span: CodePointSpan }
| { tag: "code-point"; value: CodePoint; span: CodePointSpan };

View file

@ -0,0 +1,756 @@
import {
CodePointString,
SourceCursor,
char,
isAsciiAlphanumeric,
isAsciiWhitespace,
isBetween,
isDigit,
} from 'source-region';
import type {
CodePoint,
CodePointSpan,
SourceRegion,
} from 'source-region';
import { matchCodePointString, skipWhile } from '../../recognizers';
import type { FoundSyntax, ParseError } from './parse_errors';
import {
ArrayItem,
ColonToken,
ConcreteError,
DelimiterToken,
JsonValue,
MemberItem,
Program,
} from './syntax';
import type {
ConcreteInfo,
JsonValue as JsonValueType,
MemberItem as MemberItemType,
ArrayItem as ArrayItemType,
Program as ProgramType,
StringLiteral,
} from './syntax';
type InvalidNumberReason = Extract<ParseError, { tag: "invalid-number" }>["reason"];
// Whitespace convention:
// - parseDocument consumes leading whitespace before each top-level value.
// - parseJsonValue assumes leading whitespace has already been consumed.
// - Successful value parsers stop immediately after the value.
// - object/array parsers own whitespace around separators, colons, members, and items.
//
// Recovery policy:
// - Unknown value starts consume at least one code point, then panic until a
// separator, delimiter, whitespace, or plausible value start.
// - Arrays and objects record separator errors inline, then continue from the
// next plausible item/member.
// - Missing object values are represented by an error-expression value.
//
// Span convention:
// - Parser internals and diagnostics use CodePointSpan.
// - Error nodes span the malformed/recovered syntax when possible; individual
// diagnostic nodes still keep a narrower focus span.
const OPEN_BRACE = char('{');
const CLOSE_BRACE = char('}');
const OPEN_BRACKET = char('[');
const CLOSE_BRACKET = char(']');
const COMMA = char(',');
const COLON = char(':');
const QUOTE = char('"');
const BACKSLASH = char('\\');
const SLASH = char('/');
const DASH = char('-');
const PLUS = char('+');
const DOT = char('.');
const LOWER_E = char('e');
const UPPER_E = char('E');
const LOWER_U = char('u');
const LOWER_B = char('b');
const LOWER_F = char('f');
const LOWER_N = char('n');
const LOWER_R = char('r');
const LOWER_T = char('t');
const DIGIT_0 = char('0');
const DIGIT_9 = char('9');
const LOWERCASE_A = char('a');
const LOWERCASE_F = char('f');
const UPPERCASE_A = char('A');
const UPPERCASE_F = char('F');
const TRUE = CodePointString.makeFromString("true");
const FALSE = CodePointString.makeFromString("false");
const NULL = CodePointString.makeFromString("null");
export type ConcreteSyntaxResult =
| { tag: "valid", value: ValidConcreteSyntax }
| { tag: "invalid", value: PartialConcreteSyntax }
export type ParseDocumentResult = {
syntax: ConcreteSyntaxResult;
errors: ParseError[];
};
export type ValidConcreteSyntax = ProgramType<ConcreteInfo, never>
export type PartialConcreteSyntax = ProgramType<ConcreteInfo, ConcreteError>
export type PartialJsonValue = JsonValueType<ConcreteInfo, ConcreteError>;
export type PartialMemberItem = MemberItemType<ConcreteInfo, ConcreteError>;
export type PartialArrayItem = ArrayItemType<ConcreteInfo, ConcreteError>;
export namespace ConcreteSyntaxResult {
export function valid(value: ValidConcreteSyntax): ConcreteSyntaxResult {
return { tag: "valid", value };
}
export function invalid(value: PartialConcreteSyntax): ConcreteSyntaxResult {
return { tag: "invalid", value };
}
}
export function programOf(result: ConcreteSyntaxResult): PartialConcreteSyntax {
return result.value;
}
export function parseDocument(region: SourceRegion): ParseDocumentResult {
return new Parser(region).parseDocument();
}
class Parser {
private readonly cursor: SourceCursor;
private readonly errors: ParseError[] = [];
constructor(private readonly region: SourceRegion) {
this.cursor = region.makeCursor();
}
parseDocument(): ParseDocumentResult {
const expressions: PartialJsonValue[] = [];
while (true) {
this.skipWhitespace();
if (this.cursor.isAtEnd()) break;
expressions.push(this.parseJsonValue());
}
const program = Program.make(expressions, { span: this.region.codePointSpan });
return {
syntax: this.errors.length === 0
? ConcreteSyntaxResult.valid(program as ValidConcreteSyntax)
: ConcreteSyntaxResult.invalid(program as PartialConcreteSyntax),
errors: this.errors,
};
}
private parseJsonValue(): PartialJsonValue {
const cp = this.cursor.peek();
if (cp === undefined) {
return this.errorExpression(this.makeError({
tag: "expected-value",
span: this.cursor.eofSpan(),
found: this.found(),
}));
}
if (cp === CLOSE_BRACE || cp === CLOSE_BRACKET) {
const delimiter = cp === CLOSE_BRACE ? "brace" : "bracket";
const span = this.cursor.currentSpan();
this.cursor.advance();
return this.errorExpression(this.makeError({
tag: "unexpected-close-delimiter",
span,
delimiter,
}));
}
if (cp === OPEN_BRACE) return this.parseObject();
if (cp === OPEN_BRACKET) return this.parseArray();
if (cp === QUOTE) return this.parseString();
if (cp === DASH || isDigit(cp)) return this.parseNumber();
if (cp === LOWER_T) return this.parseKeyword(TRUE, JsonValue.trueValue);
if (cp === LOWER_F) return this.parseKeyword(FALSE, JsonValue.falseValue);
if (cp === LOWER_N) return this.parseKeyword(NULL, JsonValue.nullValue);
return this.parseUnknownValue();
}
private parseObject(): PartialJsonValue {
const start = this.cursor.checkpoint();
const open = DelimiterToken.openBrace(this.cursor.currentSpan());
this.cursor.advance();
const members: PartialMemberItem[] = [];
let expectingMember = true;
let sawMember = false;
let lastWasSeparator = false;
while (true) {
this.skipWhitespace();
const cp = this.cursor.peek();
if (cp === CLOSE_BRACE) {
const close = DelimiterToken.closeBrace(this.cursor.currentSpan());
if (lastWasSeparator) {
members.push(this.missingMemberKey());
}
this.cursor.advance();
return JsonValue.object(open, members, this.cursor.spanFrom(start), close);
}
if (cp === CLOSE_BRACKET) {
const close = DelimiterToken.closeBracket(this.cursor.currentSpan());
const error = this.makeError({
tag: "expected-close-delimiter",
span: this.cursor.currentSpan(),
open: open.span,
expected: "brace",
found: this.found(),
});
this.cursor.advance();
return JsonValue.object(open, members, this.cursor.spanFrom(start), close, error);
}
if (cp === undefined) {
const error = this.makeError({
tag: "expected-close-delimiter",
span: this.cursor.eofSpan(),
open: open.span,
expected: "brace",
found: this.found(),
});
return JsonValue.object(open, members, this.cursor.spanFrom(start), undefined, error);
}
if (expectingMember) {
if (cp === COMMA) {
const commaSpan = this.cursor.currentSpan();
this.cursor.advance();
const error = this.makeError({
tag: "expected-member-key",
span: commaSpan,
found: { tag: "code-point", value: COMMA, span: commaSpan },
});
members.push(MemberItem.errorSeparator(error, commaSpan));
lastWasSeparator = true;
continue;
}
members.push(this.parseMember());
expectingMember = false;
sawMember = true;
lastWasSeparator = false;
continue;
}
if (cp === COMMA) {
this.cursor.advance();
expectingMember = true;
lastWasSeparator = true;
continue;
}
const error = this.makeError({
tag: "expected-object-separator",
span: this.cursor.currentSpan(),
found: this.found(),
});
members.push(MemberItem.errorSeparator(error, this.cursor.currentSpan()));
expectingMember = true;
lastWasSeparator = false;
if (!sawMember && !isMemberStart(cp)) {
members.push(this.missingMemberKey());
}
}
}
private parseMember(): PartialMemberItem {
const start = this.cursor.checkpoint();
const key = this.parseMemberKey();
this.skipWhitespace();
let colon = undefined;
let memberError = undefined;
if (this.cursor.peek() === COLON) {
colon = ColonToken.make(this.cursor.currentSpan());
this.cursor.advance();
} else {
memberError = this.makeError({
tag: "expected-colon",
span: this.cursor.currentSpan(),
found: this.found(),
});
}
this.skipWhitespace();
const value = isValueBoundary(this.cursor.peek())
? this.errorExpression(this.makeError({
tag: "expected-value",
span: this.cursor.currentSpan(),
found: this.found(),
}))
: this.parseJsonValue();
const span = this.cursor.spanFrom(start);
return MemberItem.member(key, value, span, colon, memberError);
}
private parseMemberKey(): StringLiteral<ConcreteInfo, ConcreteError> {
if (this.cursor.peek() === QUOTE) {
return this.parseStringLiteral();
}
const focus = this.cursor.currentSpan();
const start = this.cursor.checkpoint();
const found = this.found();
while (true) {
const cp = this.cursor.peek();
if (
cp === undefined
|| cp === COLON
|| cp === COMMA
|| cp === CLOSE_BRACE
|| cp === CLOSE_BRACKET
|| isAsciiWhitespace(cp)
) {
break;
}
this.cursor.advance();
}
const span = this.cursor.spanFrom(start);
const panickedOver = span.start === span.end ? undefined : span;
const error = this.makeError({
tag: "expected-member-key",
span: focus,
found,
}, panickedOver);
return JsonValue.errorString(error, panickedOver ?? focus);
}
private parseArray(): PartialJsonValue {
const start = this.cursor.checkpoint();
const open = DelimiterToken.openBracket(this.cursor.currentSpan());
this.cursor.advance();
const items: PartialArrayItem[] = [];
let expectingValue = true;
let sawItem = false;
let lastWasSeparator = false;
while (true) {
this.skipWhitespace();
const cp = this.cursor.peek();
if (cp === CLOSE_BRACKET) {
const close = DelimiterToken.closeBracket(this.cursor.currentSpan());
if (lastWasSeparator) {
items.push(this.missingArrayValue());
}
this.cursor.advance();
return JsonValue.array(open, items, this.cursor.spanFrom(start), close);
}
if (cp === CLOSE_BRACE) {
const close = DelimiterToken.closeBrace(this.cursor.currentSpan());
const error = this.makeError({
tag: "expected-close-delimiter",
span: this.cursor.currentSpan(),
open: open.span,
expected: "bracket",
found: this.found(),
});
this.cursor.advance();
return JsonValue.array(open, items, this.cursor.spanFrom(start), close, error);
}
if (cp === undefined) {
const error = this.makeError({
tag: "expected-close-delimiter",
span: this.cursor.eofSpan(),
open: open.span,
expected: "bracket",
found: this.found(),
});
return JsonValue.array(open, items, this.cursor.spanFrom(start), undefined, error);
}
if (expectingValue) {
if (cp === COMMA) {
const commaSpan = this.cursor.currentSpan();
this.cursor.advance();
const error = this.makeError({
tag: "expected-value",
span: commaSpan,
found: { tag: "code-point", value: COMMA, span: commaSpan },
});
items.push(this.errorExpression(error, commaSpan));
lastWasSeparator = true;
continue;
}
items.push(this.parseJsonValue());
expectingValue = false;
sawItem = true;
lastWasSeparator = false;
continue;
}
if (cp === COMMA) {
this.cursor.advance();
expectingValue = true;
lastWasSeparator = true;
continue;
}
const error = this.makeError({
tag: "expected-array-separator",
span: this.cursor.currentSpan(),
found: this.found(),
});
items.push(ArrayItem.errorSeparator(error, this.cursor.currentSpan()));
expectingValue = true;
lastWasSeparator = false;
if (!sawItem && !isValueStart(cp)) {
items.push(this.missingArrayValue());
}
}
}
private parseString(): PartialJsonValue {
return this.parseStringLiteral();
}
private parseStringLiteral(): StringLiteral<ConcreteInfo, ConcreteError> {
const start = this.cursor.checkpoint();
this.cursor.advance();
let value = "";
while (true) {
const cp = this.cursor.peek();
if (cp === undefined) {
const span = this.cursor.spanFrom(start);
return JsonValue.errorString(this.makeError({
tag: "invalid-string",
span,
reason: "unterminated",
}, span), span);
}
if (cp === QUOTE) {
this.cursor.advance();
return { tag: "string", value, span: this.cursor.spanFrom(start) };
}
if (isControlCharacter(cp)) {
const focus = this.cursor.currentSpan();
this.cursor.advance();
const span = this.cursor.spanFrom(start);
return JsonValue.errorString(this.makeError({
tag: "invalid-string",
span: focus,
reason: "control-character",
}, span), span);
}
if (cp === BACKSLASH) {
const escaped = this.consumeEscape();
if (escaped.tag === "error") {
const focus = escaped.span;
this.consumeUntilStringRecovery();
const span = this.cursor.spanFrom(start);
return JsonValue.errorString(this.makeError({
tag: "invalid-string",
span: focus,
reason: escaped.reason,
}, span), span);
}
value += escaped.value;
continue;
}
value += String.fromCodePoint(cp);
this.cursor.advance();
}
}
private consumeEscape():
| { tag: "ok"; value: string }
| { tag: "error"; span: CodePointSpan; reason: "invalid-escape" | "invalid-unicode-escape" } {
const start = this.cursor.checkpoint();
this.cursor.advance();
const cp = this.cursor.peek();
if (cp === undefined) {
return { tag: "error", span: this.cursor.spanFrom(start), reason: "invalid-escape" };
}
if (cp === QUOTE || cp === BACKSLASH || cp === SLASH) {
this.cursor.advance();
return { tag: "ok", value: String.fromCodePoint(cp) };
}
if (cp === LOWER_B) {
this.cursor.advance();
return { tag: "ok", value: "\b" };
}
if (cp === LOWER_F) {
this.cursor.advance();
return { tag: "ok", value: "\f" };
}
if (cp === LOWER_N) {
this.cursor.advance();
return { tag: "ok", value: "\n" };
}
if (cp === LOWER_R) {
this.cursor.advance();
return { tag: "ok", value: "\r" };
}
if (cp === LOWER_T) {
this.cursor.advance();
return { tag: "ok", value: "\t" };
}
if (cp === LOWER_U) {
this.cursor.advance();
let hex = "";
for (let i = 0; i < 4; i += 1) {
const hexCp = this.cursor.peek();
if (hexCp === undefined || !isHexDigit(hexCp)) {
return { tag: "error", span: this.cursor.spanFrom(start), reason: "invalid-unicode-escape" };
}
hex += String.fromCodePoint(hexCp);
this.cursor.advance();
}
return { tag: "ok", value: String.fromCodePoint(Number.parseInt(hex, 16)) };
}
this.cursor.advance();
return { tag: "error", span: this.cursor.spanFrom(start), reason: "invalid-escape" };
}
private parseNumber(): PartialJsonValue {
const start = this.cursor.checkpoint();
let reason: InvalidNumberReason | undefined = undefined;
if (this.cursor.peek() === DASH) {
this.cursor.advance();
}
const integerStart = this.cursor.checkpoint();
const firstDigit = this.cursor.peek();
if (firstDigit === DIGIT_0) {
this.cursor.advance();
if (isDigit(this.cursor.peek() ?? -1)) {
reason = "leading-zero";
this.consumeDigits();
}
} else if (firstDigit !== undefined && isBetween(char('1'), firstDigit, DIGIT_9)) {
this.consumeDigits();
} else {
reason = "missing-integer-digits";
}
if (this.cursor.peek() === DOT) {
this.cursor.advance();
const fractionStart = this.cursor.checkpoint();
this.consumeDigits();
if (this.cursor.checkpoint() === fractionStart && reason === undefined) {
reason = "missing-fraction-digits";
}
}
if (this.cursor.peek() === LOWER_E || this.cursor.peek() === UPPER_E) {
this.cursor.advance();
if (this.cursor.peek() === PLUS || this.cursor.peek() === DASH) {
this.cursor.advance();
}
const exponentStart = this.cursor.checkpoint();
this.consumeDigits();
if (this.cursor.checkpoint() === exponentStart && reason === undefined) {
reason = "missing-exponent-digits";
}
}
if (isNumberJunk(this.cursor.peek())) {
reason = "trailing-junk";
while (isNumberJunk(this.cursor.peek())) {
this.cursor.advance();
}
}
const span = this.cursor.spanFrom(start);
const text = this.cursor.slice(span);
if (reason !== undefined || this.cursor.checkpoint() === integerStart) {
return JsonValue.errorNumber(this.makeError({
tag: "invalid-number",
span,
text,
reason: reason ?? "missing-integer-digits",
}, span), span);
}
return JsonValue.number(Number(text), span);
}
private parseKeyword(
keyword: CodePointString,
makeValue: (span: CodePointSpan) => PartialJsonValue,
): PartialJsonValue {
const start = this.cursor.checkpoint();
const match = matchCodePointString(this.cursor, keyword);
if (match.tag === "match" && !isKeywordPart(this.cursor.peek())) {
return makeValue(match.span);
}
this.cursor.restore(start);
return this.parseUnknownValue();
}
private parseUnknownValue(): PartialJsonValue {
const start = this.cursor.checkpoint();
const focus = this.cursor.currentSpan();
const found = this.found();
this.cursor.advance();
while (true) {
const cp = this.cursor.peek();
if (
cp === undefined
|| isAsciiWhitespace(cp)
|| isValueBoundary(cp)
|| isValueStart(cp)
) {
break;
}
this.cursor.advance();
}
const panickedOver = this.cursor.spanFrom(start);
const error = this.makeError({
tag: "expected-value",
span: focus,
found,
}, panickedOver);
return this.errorExpression(error, panickedOver);
}
private missingArrayValue(): PartialJsonValue {
return this.errorExpression(this.makeError({
tag: "expected-value",
span: this.cursor.currentSpan(),
found: this.found(),
}));
}
private missingMemberKey(): PartialMemberItem {
const error = this.makeError({
tag: "expected-member-key",
span: this.cursor.currentSpan(),
found: this.found(),
});
return MemberItem.errorSeparator(error, this.cursor.currentSpan());
}
private consumeDigits(): void {
while (isDigit(this.cursor.peek() ?? -1)) {
this.cursor.advance();
}
}
private consumeUntilStringRecovery(): void {
while (true) {
const cp = this.cursor.peek();
if (cp === QUOTE) {
this.cursor.advance();
return;
}
if (cp === undefined || cp === COMMA || cp === CLOSE_BRACE || cp === CLOSE_BRACKET) {
return;
}
this.cursor.advance();
}
}
private skipWhitespace(): void {
skipWhile(this.cursor, isAsciiWhitespace);
}
private found(): FoundSyntax {
const cp = this.cursor.peek();
if (cp === undefined) return { tag: "eof", span: this.cursor.eofSpan() };
return { tag: "code-point", value: cp, span: this.cursor.currentSpan() };
}
private makeError(error: ParseError, panickedOver?: CodePointSpan): ConcreteError {
this.errors.push(error);
return ConcreteError.single({
span: error.span,
error,
panickedOver,
});
}
private errorExpression(error: ConcreteError, span?: CodePointSpan): PartialJsonValue {
return JsonValue.errorExpression(error, span ?? error[0].span);
}
}
function isValueStart(cp: CodePoint | undefined): boolean {
return cp !== undefined && (
cp === OPEN_BRACE
|| cp === OPEN_BRACKET
|| cp === QUOTE
|| cp === DASH
|| cp === LOWER_T
|| cp === LOWER_F
|| cp === LOWER_N
|| isDigit(cp)
);
}
function isMemberStart(cp: CodePoint | undefined): boolean {
return cp === QUOTE;
}
function isValueBoundary(cp: CodePoint | undefined): boolean {
return cp === undefined
|| cp === COMMA
|| cp === CLOSE_BRACE
|| cp === CLOSE_BRACKET;
}
function isKeywordPart(cp: CodePoint | undefined): boolean {
return cp !== undefined && (isAsciiAlphanumeric(cp) || cp === char('_') || cp === DASH);
}
function isNumberJunk(cp: CodePoint | undefined): boolean {
return cp !== undefined
&& !isAsciiWhitespace(cp)
&& cp !== COMMA
&& cp !== CLOSE_BRACE
&& cp !== CLOSE_BRACKET
&& cp !== COLON;
}
function isControlCharacter(cp: CodePoint): boolean {
return cp < 0x20;
}
function isHexDigit(cp: CodePoint): boolean {
return isDigit(cp)
|| isBetween(LOWERCASE_A, cp, LOWERCASE_F)
|| isBetween(UPPERCASE_A, cp, UPPERCASE_F);
}

View file

@ -1,5 +1,5 @@
import type { CodePointSpan } from 'source-region'; import type { CodePointSpan } from 'source-region';
import type { ParseError } from './parse_errors.ts'; import type { ParseError } from './parse_errors';
export type ConcreteInfo = { span: CodePointSpan }; export type ConcreteInfo = { span: CodePointSpan };
@ -22,6 +22,32 @@ export type DelimiterToken =
| { tag: "open-bracket", span: CodePointSpan } | { tag: "open-bracket", span: CodePointSpan }
| { tag: "close-bracket", span: CodePointSpan } | { tag: "close-bracket", span: CodePointSpan }
export namespace DelimiterToken {
export function openBrace(span: CodePointSpan): DelimiterToken {
return { tag: "open-brace", span };
}
export function closeBrace(span: CodePointSpan): DelimiterToken {
return { tag: "close-brace", span };
}
export function openBracket(span: CodePointSpan): DelimiterToken {
return { tag: "open-bracket", span };
}
export function closeBracket(span: CodePointSpan): DelimiterToken {
return { tag: "close-bracket", span };
}
}
export type ColonToken = { tag: "colon", span: CodePointSpan };
export namespace ColonToken {
export function make(span: CodePointSpan): ColonToken {
return { tag: "colon", span };
}
}
export type Program<Info, Error> = { export type Program<Info, Error> = {
tag: "program", tag: "program",
expressions: JsonValue<Info, Error>[], expressions: JsonValue<Info, Error>[],
@ -48,7 +74,7 @@ export type MemberItem<Info, Error> =
export type Member<Info, Error> = { export type Member<Info, Error> = {
key: StringLiteral<Info, Error>, key: StringLiteral<Info, Error>,
colon?: { tag: "colon", span: CodePointSpan }, colon?: ColonToken,
value: JsonValue<Info, Error>, value: JsonValue<Info, Error>,
error?: Error error?: Error
} & Info } & Info
@ -93,3 +119,137 @@ export type JsonScalar<Info, Error> =
| { tag: "true", error?: Error } & Info | { tag: "true", error?: Error } & Info
| { tag: "false", error?: Error } & Info | { tag: "false", error?: Error } & Info
export namespace Program {
export function make<Info, Error>(
expressions: JsonValue<Info, Error>[],
info: Info,
error?: Error,
): Program<Info, Error> {
return error === undefined
? { tag: "program", expressions, ...info }
: { tag: "program", expressions, error, ...info };
}
export function show<Info, Error>(program: Program<Info, Error>): string {
return program.expressions.map(JsonValue.show).join(" ");
}
}
export namespace JsonValue {
export function object(
open: DelimiterToken,
members: MemberItem<ConcreteInfo, ConcreteError>[],
span: CodePointSpan,
close?: DelimiterToken,
error?: ConcreteError,
): JsonValue<ConcreteInfo, ConcreteError> {
return { tag: "object", open, members, close, error, span };
}
export function array(
open: DelimiterToken,
items: ArrayItem<ConcreteInfo, ConcreteError>[],
span: CodePointSpan,
close?: DelimiterToken,
error?: ConcreteError,
): JsonValue<ConcreteInfo, ConcreteError> {
return { tag: "array", open, items, close, error, span };
}
export function string(
value: string,
span: CodePointSpan,
error?: ConcreteError,
): JsonValue<ConcreteInfo, ConcreteError> {
return { tag: "string", value, error, span };
}
export function errorString(error: ConcreteError, span: CodePointSpan): StringLiteral<ConcreteInfo, ConcreteError> {
return { tag: "error-string", error, span };
}
export function number(
value: number,
span: CodePointSpan,
error?: ConcreteError,
): JsonValue<ConcreteInfo, ConcreteError> {
return { tag: "number", value, error, span };
}
export function errorNumber(error: ConcreteError, span: CodePointSpan): NumberLiteral<ConcreteInfo, ConcreteError> {
return { tag: "error-number", error, span };
}
export function nullValue(span: CodePointSpan): JsonValue<ConcreteInfo, ConcreteError> {
return { tag: "null", span };
}
export function trueValue(span: CodePointSpan): JsonValue<ConcreteInfo, ConcreteError> {
return { tag: "true", span };
}
export function falseValue(span: CodePointSpan): JsonValue<ConcreteInfo, ConcreteError> {
return { tag: "false", span };
}
export function errorExpression(error: ConcreteError, span: CodePointSpan): JsonValue<ConcreteInfo, ConcreteError> {
return { tag: "error-expression", error, span };
}
export function show<Info, Error>(value: JsonValue<Info, Error>): string {
switch (value.tag) {
case "object":
return `{${value.members.map(MemberItem.show).join(", ")}}`;
case "array":
return `[${value.items.map(ArrayItem.show).join(", ")}]`;
case "string":
return JSON.stringify(value.value);
case "number":
return `${value.value}`;
case "null":
return "null";
case "true":
return "true";
case "false":
return "false";
case "error-expression":
return "<error-expression>";
case "error-string":
return "<error-string>";
case "error-number":
return "<error-number>";
}
}
}
export namespace MemberItem {
export function member(
key: StringLiteral<ConcreteInfo, ConcreteError>,
value: JsonValue<ConcreteInfo, ConcreteError>,
span: CodePointSpan,
colon?: ColonToken,
error?: ConcreteError,
): MemberItem<ConcreteInfo, ConcreteError> {
return { tag: "member", key, colon, value, error, span };
}
export function errorSeparator(error: ConcreteError, span: CodePointSpan): MemberItem<ConcreteInfo, ConcreteError> {
return { tag: "error-object-separator", error, span };
}
export function show<Info, Error>(item: MemberItem<Info, Error>): string {
if (item.tag === "error-object-separator") return "<error-object-separator>";
return `${JsonValue.show(item.key)}: ${JsonValue.show(item.value)}`;
}
}
export namespace ArrayItem {
export function errorSeparator(error: ConcreteError, span: CodePointSpan): ArrayItem<ConcreteInfo, ConcreteError> {
return { tag: "error-array-separator", error, span };
}
export function show<Info, Error>(item: ArrayItem<Info, Error>): string {
if (item.tag === "error-array-separator") return "<error-array-separator>";
return JsonValue.show(item);
}
}

View file

@ -1,3 +1,5 @@
import { runExperiments as runLispExperiments } from './languages/lisp/experiments'; import { runExperiments as runLispExperiments } from './languages/lisp/experiments';
import { runExperiments as runJsonExperiments } from './languages/json/experiments';
runLispExperiments(); runLispExperiments();
runJsonExperiments();