diff --git a/src/languages/json/experiments.ts b/src/languages/json/experiments.ts new file mode 100644 index 0000000..032f623 --- /dev/null +++ b/src/languages/json/experiments.ts @@ -0,0 +1,79 @@ +import { sourceText } from 'source-region'; +import { parseDocument, programOf } from './parser'; +import { Program } from './syntax'; + +// === Experiments === + +function experiment00_emptyDocument(): void { + logParse("empty document", ""); +} + +function experiment01_topLevelValues(): void { + logParse("top-level values", 'true false null "hello" 123'); +} + +function experiment02_objectsAndArrays(): void { + logParse("objects and arrays", '{"name": "Ada", "scores": [1, 2, 3], "ok": true}'); +} + +function experiment03_missingArraySeparator(): void { + logParse("missing array separator", '[1 2, 3]'); +} + +function experiment04_arrayTrailingAndRepeatedComma(): void { + logParse("array comma errors", '[1, 2,] [1,, 2]'); +} + +function experiment05_objectMissingColon(): void { + logParse("object missing colon", '{"x" 1, "y": 2}'); +} + +function experiment06_objectMissingSeparator(): void { + logParse("object missing separator", '{"x": 1 "y": 2}'); +} + +function experiment07_objectKeyErrors(): void { + logParse("object key errors", '{x: 1, "ok": 2, : 3}'); +} + +function experiment08_stringErrors(): void { + logParse("string errors", '"unterminated\n"bad escape: \\x" "bad unicode: \\u12"'); +} + +function experiment09_numberErrors(): void { + logParse("number errors", '01 - 1. 1e+ 123abc'); +} + +function experiment10_mismatchedDelimiters(): void { + logParse("mismatched delimiters", '{"x": [1, 2} {"y": 3]'); +} + +function experiment11_recoverAtDocumentLevel(): void { + logParse("document recovery", '@@@ {"ok": true} nil [1, 2]'); +} + +function logParse(name: string, input: string): void { + const region = sourceText(input).fullRegion(); + const result = parseDocument(region); + console.log(`==== json:${name} ====`); + console.log(input); + console.log(result.syntax.tag, Program.show(programOf(result.syntax))); + console.dir(result.errors, { depth: null }); +} + +export function runExperiments(): void { + [ + experiment00_emptyDocument, + experiment01_topLevelValues, + experiment02_objectsAndArrays, + experiment03_missingArraySeparator, + experiment04_arrayTrailingAndRepeatedComma, + experiment05_objectMissingColon, + experiment06_objectMissingSeparator, + experiment07_objectKeyErrors, + experiment08_stringErrors, + experiment09_numberErrors, + experiment10_mismatchedDelimiters, + experiment11_recoverAtDocumentLevel, + ].forEach((experiment) => experiment()); +} diff --git a/src/languages/json/index.ts b/src/languages/json/index.ts index e69de29..811b711 100644 --- a/src/languages/json/index.ts +++ b/src/languages/json/index.ts @@ -0,0 +1,3 @@ +export * from './parse_errors'; +export * from './parser'; +export * from './syntax'; diff --git a/src/languages/json/parse_errors.ts b/src/languages/json/parse_errors.ts index 17b10ee..e6b2fb4 100644 --- a/src/languages/json/parse_errors.ts +++ b/src/languages/json/parse_errors.ts @@ -1,4 +1,60 @@ import type { CodePoint, CodePointSpan } from 'source-region'; export type ParseError = -| {} // TODO +| { + tag: "expected-value"; + span: CodePointSpan; + found: FoundSyntax; + } +| { + tag: "expected-member-key"; + span: CodePointSpan; + found: FoundSyntax; + } +| { + tag: "expected-colon"; + span: CodePointSpan; + found: FoundSyntax; + } +| { + tag: "expected-array-separator"; + span: CodePointSpan; + found: FoundSyntax; + } +| { + tag: "expected-object-separator"; + span: CodePointSpan; + found: FoundSyntax; + } +| { + tag: "expected-close-delimiter"; + span: CodePointSpan; + open: CodePointSpan; + expected: "brace" | "bracket"; + found: FoundSyntax; + } +| { + tag: "unexpected-close-delimiter"; + span: CodePointSpan; + delimiter: "brace" | "bracket"; + } +| { + tag: "invalid-string"; + span: CodePointSpan; + reason: "unterminated" | "invalid-escape" | "invalid-unicode-escape" | "control-character"; + } +| { + tag: "invalid-number"; + span: CodePointSpan; + text: string; + reason: + | "missing-integer-digits" + | "leading-zero" + | "missing-fraction-digits" + | "missing-exponent-digits" + | "trailing-junk"; + } + +export type FoundSyntax = +| { tag: "eof"; span: CodePointSpan } +| { tag: "code-point"; value: CodePoint; span: CodePointSpan }; diff --git a/src/languages/json/parser.ts b/src/languages/json/parser.ts new file mode 100644 index 0000000..3e01ffe --- /dev/null +++ b/src/languages/json/parser.ts @@ -0,0 +1,756 @@ +import { + CodePointString, + SourceCursor, + char, + isAsciiAlphanumeric, + isAsciiWhitespace, + isBetween, + isDigit, +} from 'source-region'; +import type { + CodePoint, + CodePointSpan, + SourceRegion, +} from 'source-region'; +import { matchCodePointString, skipWhile } from '../../recognizers'; +import type { FoundSyntax, ParseError } from './parse_errors'; +import { + ArrayItem, + ColonToken, + ConcreteError, + DelimiterToken, + JsonValue, + MemberItem, + Program, +} from './syntax'; +import type { + ConcreteInfo, + JsonValue as JsonValueType, + MemberItem as MemberItemType, + ArrayItem as ArrayItemType, + Program as ProgramType, + StringLiteral, +} from './syntax'; + +type InvalidNumberReason = Extract["reason"]; + +// Whitespace convention: +// - parseDocument consumes leading whitespace before each top-level value. +// - parseJsonValue assumes leading whitespace has already been consumed. +// - Successful value parsers stop immediately after the value. +// - object/array parsers own whitespace around separators, colons, members, and items. +// +// Recovery policy: +// - Unknown value starts consume at least one code point, then panic until a +// separator, delimiter, whitespace, or plausible value start. +// - Arrays and objects record separator errors inline, then continue from the +// next plausible item/member. +// - Missing object values are represented by an error-expression value. +// +// Span convention: +// - Parser internals and diagnostics use CodePointSpan. +// - Error nodes span the malformed/recovered syntax when possible; individual +// diagnostic nodes still keep a narrower focus span. + +const OPEN_BRACE = char('{'); +const CLOSE_BRACE = char('}'); +const OPEN_BRACKET = char('['); +const CLOSE_BRACKET = char(']'); +const COMMA = char(','); +const COLON = char(':'); +const QUOTE = char('"'); +const BACKSLASH = char('\\'); +const SLASH = char('/'); +const DASH = char('-'); +const PLUS = char('+'); +const DOT = char('.'); +const LOWER_E = char('e'); +const UPPER_E = char('E'); +const LOWER_U = char('u'); +const LOWER_B = char('b'); +const LOWER_F = char('f'); +const LOWER_N = char('n'); +const LOWER_R = char('r'); +const LOWER_T = char('t'); +const DIGIT_0 = char('0'); +const DIGIT_9 = char('9'); +const LOWERCASE_A = char('a'); +const LOWERCASE_F = char('f'); +const UPPERCASE_A = char('A'); +const UPPERCASE_F = char('F'); + +const TRUE = CodePointString.makeFromString("true"); +const FALSE = CodePointString.makeFromString("false"); +const NULL = CodePointString.makeFromString("null"); + +export type ConcreteSyntaxResult = +| { tag: "valid", value: ValidConcreteSyntax } +| { tag: "invalid", value: PartialConcreteSyntax } + +export type ParseDocumentResult = { + syntax: ConcreteSyntaxResult; + errors: ParseError[]; +}; + +export type ValidConcreteSyntax = ProgramType +export type PartialConcreteSyntax = ProgramType +export type PartialJsonValue = JsonValueType; +export type PartialMemberItem = MemberItemType; +export type PartialArrayItem = ArrayItemType; + +export namespace ConcreteSyntaxResult { + export function valid(value: ValidConcreteSyntax): ConcreteSyntaxResult { + return { tag: "valid", value }; + } + + export function invalid(value: PartialConcreteSyntax): ConcreteSyntaxResult { + return { tag: "invalid", value }; + } +} + +export function programOf(result: ConcreteSyntaxResult): PartialConcreteSyntax { + return result.value; +} + +export function parseDocument(region: SourceRegion): ParseDocumentResult { + return new Parser(region).parseDocument(); +} + +class Parser { + private readonly cursor: SourceCursor; + private readonly errors: ParseError[] = []; + + constructor(private readonly region: SourceRegion) { + this.cursor = region.makeCursor(); + } + + parseDocument(): ParseDocumentResult { + const expressions: PartialJsonValue[] = []; + + while (true) { + this.skipWhitespace(); + if (this.cursor.isAtEnd()) break; + + expressions.push(this.parseJsonValue()); + } + + const program = Program.make(expressions, { span: this.region.codePointSpan }); + return { + syntax: this.errors.length === 0 + ? ConcreteSyntaxResult.valid(program as ValidConcreteSyntax) + : ConcreteSyntaxResult.invalid(program as PartialConcreteSyntax), + errors: this.errors, + }; + } + + private parseJsonValue(): PartialJsonValue { + const cp = this.cursor.peek(); + + if (cp === undefined) { + return this.errorExpression(this.makeError({ + tag: "expected-value", + span: this.cursor.eofSpan(), + found: this.found(), + })); + } + + if (cp === CLOSE_BRACE || cp === CLOSE_BRACKET) { + const delimiter = cp === CLOSE_BRACE ? "brace" : "bracket"; + const span = this.cursor.currentSpan(); + this.cursor.advance(); + return this.errorExpression(this.makeError({ + tag: "unexpected-close-delimiter", + span, + delimiter, + })); + } + + if (cp === OPEN_BRACE) return this.parseObject(); + if (cp === OPEN_BRACKET) return this.parseArray(); + if (cp === QUOTE) return this.parseString(); + if (cp === DASH || isDigit(cp)) return this.parseNumber(); + if (cp === LOWER_T) return this.parseKeyword(TRUE, JsonValue.trueValue); + if (cp === LOWER_F) return this.parseKeyword(FALSE, JsonValue.falseValue); + if (cp === LOWER_N) return this.parseKeyword(NULL, JsonValue.nullValue); + + return this.parseUnknownValue(); + } + + private parseObject(): PartialJsonValue { + const start = this.cursor.checkpoint(); + const open = DelimiterToken.openBrace(this.cursor.currentSpan()); + this.cursor.advance(); + + const members: PartialMemberItem[] = []; + let expectingMember = true; + let sawMember = false; + let lastWasSeparator = false; + + while (true) { + this.skipWhitespace(); + + const cp = this.cursor.peek(); + if (cp === CLOSE_BRACE) { + const close = DelimiterToken.closeBrace(this.cursor.currentSpan()); + if (lastWasSeparator) { + members.push(this.missingMemberKey()); + } + this.cursor.advance(); + return JsonValue.object(open, members, this.cursor.spanFrom(start), close); + } + + if (cp === CLOSE_BRACKET) { + const close = DelimiterToken.closeBracket(this.cursor.currentSpan()); + const error = this.makeError({ + tag: "expected-close-delimiter", + span: this.cursor.currentSpan(), + open: open.span, + expected: "brace", + found: this.found(), + }); + this.cursor.advance(); + return JsonValue.object(open, members, this.cursor.spanFrom(start), close, error); + } + + if (cp === undefined) { + const error = this.makeError({ + tag: "expected-close-delimiter", + span: this.cursor.eofSpan(), + open: open.span, + expected: "brace", + found: this.found(), + }); + return JsonValue.object(open, members, this.cursor.spanFrom(start), undefined, error); + } + + if (expectingMember) { + if (cp === COMMA) { + const commaSpan = this.cursor.currentSpan(); + this.cursor.advance(); + const error = this.makeError({ + tag: "expected-member-key", + span: commaSpan, + found: { tag: "code-point", value: COMMA, span: commaSpan }, + }); + members.push(MemberItem.errorSeparator(error, commaSpan)); + lastWasSeparator = true; + continue; + } + + members.push(this.parseMember()); + expectingMember = false; + sawMember = true; + lastWasSeparator = false; + continue; + } + + if (cp === COMMA) { + this.cursor.advance(); + expectingMember = true; + lastWasSeparator = true; + continue; + } + + const error = this.makeError({ + tag: "expected-object-separator", + span: this.cursor.currentSpan(), + found: this.found(), + }); + members.push(MemberItem.errorSeparator(error, this.cursor.currentSpan())); + expectingMember = true; + lastWasSeparator = false; + + if (!sawMember && !isMemberStart(cp)) { + members.push(this.missingMemberKey()); + } + } + } + + private parseMember(): PartialMemberItem { + const start = this.cursor.checkpoint(); + const key = this.parseMemberKey(); + + this.skipWhitespace(); + let colon = undefined; + let memberError = undefined; + + if (this.cursor.peek() === COLON) { + colon = ColonToken.make(this.cursor.currentSpan()); + this.cursor.advance(); + } else { + memberError = this.makeError({ + tag: "expected-colon", + span: this.cursor.currentSpan(), + found: this.found(), + }); + } + + this.skipWhitespace(); + const value = isValueBoundary(this.cursor.peek()) + ? this.errorExpression(this.makeError({ + tag: "expected-value", + span: this.cursor.currentSpan(), + found: this.found(), + })) + : this.parseJsonValue(); + + const span = this.cursor.spanFrom(start); + return MemberItem.member(key, value, span, colon, memberError); + } + + private parseMemberKey(): StringLiteral { + if (this.cursor.peek() === QUOTE) { + return this.parseStringLiteral(); + } + + const focus = this.cursor.currentSpan(); + const start = this.cursor.checkpoint(); + const found = this.found(); + + while (true) { + const cp = this.cursor.peek(); + if ( + cp === undefined + || cp === COLON + || cp === COMMA + || cp === CLOSE_BRACE + || cp === CLOSE_BRACKET + || isAsciiWhitespace(cp) + ) { + break; + } + this.cursor.advance(); + } + + const span = this.cursor.spanFrom(start); + const panickedOver = span.start === span.end ? undefined : span; + const error = this.makeError({ + tag: "expected-member-key", + span: focus, + found, + }, panickedOver); + + return JsonValue.errorString(error, panickedOver ?? focus); + } + + private parseArray(): PartialJsonValue { + const start = this.cursor.checkpoint(); + const open = DelimiterToken.openBracket(this.cursor.currentSpan()); + this.cursor.advance(); + + const items: PartialArrayItem[] = []; + let expectingValue = true; + let sawItem = false; + let lastWasSeparator = false; + + while (true) { + this.skipWhitespace(); + + const cp = this.cursor.peek(); + if (cp === CLOSE_BRACKET) { + const close = DelimiterToken.closeBracket(this.cursor.currentSpan()); + if (lastWasSeparator) { + items.push(this.missingArrayValue()); + } + this.cursor.advance(); + return JsonValue.array(open, items, this.cursor.spanFrom(start), close); + } + + if (cp === CLOSE_BRACE) { + const close = DelimiterToken.closeBrace(this.cursor.currentSpan()); + const error = this.makeError({ + tag: "expected-close-delimiter", + span: this.cursor.currentSpan(), + open: open.span, + expected: "bracket", + found: this.found(), + }); + this.cursor.advance(); + return JsonValue.array(open, items, this.cursor.spanFrom(start), close, error); + } + + if (cp === undefined) { + const error = this.makeError({ + tag: "expected-close-delimiter", + span: this.cursor.eofSpan(), + open: open.span, + expected: "bracket", + found: this.found(), + }); + return JsonValue.array(open, items, this.cursor.spanFrom(start), undefined, error); + } + + if (expectingValue) { + if (cp === COMMA) { + const commaSpan = this.cursor.currentSpan(); + this.cursor.advance(); + const error = this.makeError({ + tag: "expected-value", + span: commaSpan, + found: { tag: "code-point", value: COMMA, span: commaSpan }, + }); + items.push(this.errorExpression(error, commaSpan)); + lastWasSeparator = true; + continue; + } + + items.push(this.parseJsonValue()); + expectingValue = false; + sawItem = true; + lastWasSeparator = false; + continue; + } + + if (cp === COMMA) { + this.cursor.advance(); + expectingValue = true; + lastWasSeparator = true; + continue; + } + + const error = this.makeError({ + tag: "expected-array-separator", + span: this.cursor.currentSpan(), + found: this.found(), + }); + items.push(ArrayItem.errorSeparator(error, this.cursor.currentSpan())); + expectingValue = true; + lastWasSeparator = false; + + if (!sawItem && !isValueStart(cp)) { + items.push(this.missingArrayValue()); + } + } + } + + private parseString(): PartialJsonValue { + return this.parseStringLiteral(); + } + + private parseStringLiteral(): StringLiteral { + const start = this.cursor.checkpoint(); + this.cursor.advance(); + + let value = ""; + + while (true) { + const cp = this.cursor.peek(); + + if (cp === undefined) { + const span = this.cursor.spanFrom(start); + return JsonValue.errorString(this.makeError({ + tag: "invalid-string", + span, + reason: "unterminated", + }, span), span); + } + + if (cp === QUOTE) { + this.cursor.advance(); + return { tag: "string", value, span: this.cursor.spanFrom(start) }; + } + + if (isControlCharacter(cp)) { + const focus = this.cursor.currentSpan(); + this.cursor.advance(); + const span = this.cursor.spanFrom(start); + return JsonValue.errorString(this.makeError({ + tag: "invalid-string", + span: focus, + reason: "control-character", + }, span), span); + } + + if (cp === BACKSLASH) { + const escaped = this.consumeEscape(); + if (escaped.tag === "error") { + const focus = escaped.span; + this.consumeUntilStringRecovery(); + const span = this.cursor.spanFrom(start); + return JsonValue.errorString(this.makeError({ + tag: "invalid-string", + span: focus, + reason: escaped.reason, + }, span), span); + } + value += escaped.value; + continue; + } + + value += String.fromCodePoint(cp); + this.cursor.advance(); + } + } + + private consumeEscape(): + | { tag: "ok"; value: string } + | { tag: "error"; span: CodePointSpan; reason: "invalid-escape" | "invalid-unicode-escape" } { + const start = this.cursor.checkpoint(); + this.cursor.advance(); + const cp = this.cursor.peek(); + + if (cp === undefined) { + return { tag: "error", span: this.cursor.spanFrom(start), reason: "invalid-escape" }; + } + + if (cp === QUOTE || cp === BACKSLASH || cp === SLASH) { + this.cursor.advance(); + return { tag: "ok", value: String.fromCodePoint(cp) }; + } + + if (cp === LOWER_B) { + this.cursor.advance(); + return { tag: "ok", value: "\b" }; + } + if (cp === LOWER_F) { + this.cursor.advance(); + return { tag: "ok", value: "\f" }; + } + if (cp === LOWER_N) { + this.cursor.advance(); + return { tag: "ok", value: "\n" }; + } + if (cp === LOWER_R) { + this.cursor.advance(); + return { tag: "ok", value: "\r" }; + } + if (cp === LOWER_T) { + this.cursor.advance(); + return { tag: "ok", value: "\t" }; + } + + if (cp === LOWER_U) { + this.cursor.advance(); + let hex = ""; + for (let i = 0; i < 4; i += 1) { + const hexCp = this.cursor.peek(); + if (hexCp === undefined || !isHexDigit(hexCp)) { + return { tag: "error", span: this.cursor.spanFrom(start), reason: "invalid-unicode-escape" }; + } + hex += String.fromCodePoint(hexCp); + this.cursor.advance(); + } + return { tag: "ok", value: String.fromCodePoint(Number.parseInt(hex, 16)) }; + } + + this.cursor.advance(); + return { tag: "error", span: this.cursor.spanFrom(start), reason: "invalid-escape" }; + } + + private parseNumber(): PartialJsonValue { + const start = this.cursor.checkpoint(); + let reason: InvalidNumberReason | undefined = undefined; + + if (this.cursor.peek() === DASH) { + this.cursor.advance(); + } + + const integerStart = this.cursor.checkpoint(); + const firstDigit = this.cursor.peek(); + if (firstDigit === DIGIT_0) { + this.cursor.advance(); + if (isDigit(this.cursor.peek() ?? -1)) { + reason = "leading-zero"; + this.consumeDigits(); + } + } else if (firstDigit !== undefined && isBetween(char('1'), firstDigit, DIGIT_9)) { + this.consumeDigits(); + } else { + reason = "missing-integer-digits"; + } + + if (this.cursor.peek() === DOT) { + this.cursor.advance(); + const fractionStart = this.cursor.checkpoint(); + this.consumeDigits(); + if (this.cursor.checkpoint() === fractionStart && reason === undefined) { + reason = "missing-fraction-digits"; + } + } + + if (this.cursor.peek() === LOWER_E || this.cursor.peek() === UPPER_E) { + this.cursor.advance(); + if (this.cursor.peek() === PLUS || this.cursor.peek() === DASH) { + this.cursor.advance(); + } + + const exponentStart = this.cursor.checkpoint(); + this.consumeDigits(); + if (this.cursor.checkpoint() === exponentStart && reason === undefined) { + reason = "missing-exponent-digits"; + } + } + + if (isNumberJunk(this.cursor.peek())) { + reason = "trailing-junk"; + while (isNumberJunk(this.cursor.peek())) { + this.cursor.advance(); + } + } + + const span = this.cursor.spanFrom(start); + const text = this.cursor.slice(span); + + if (reason !== undefined || this.cursor.checkpoint() === integerStart) { + return JsonValue.errorNumber(this.makeError({ + tag: "invalid-number", + span, + text, + reason: reason ?? "missing-integer-digits", + }, span), span); + } + + return JsonValue.number(Number(text), span); + } + + private parseKeyword( + keyword: CodePointString, + makeValue: (span: CodePointSpan) => PartialJsonValue, + ): PartialJsonValue { + const start = this.cursor.checkpoint(); + const match = matchCodePointString(this.cursor, keyword); + + if (match.tag === "match" && !isKeywordPart(this.cursor.peek())) { + return makeValue(match.span); + } + + this.cursor.restore(start); + return this.parseUnknownValue(); + } + + private parseUnknownValue(): PartialJsonValue { + const start = this.cursor.checkpoint(); + const focus = this.cursor.currentSpan(); + const found = this.found(); + + this.cursor.advance(); + while (true) { + const cp = this.cursor.peek(); + if ( + cp === undefined + || isAsciiWhitespace(cp) + || isValueBoundary(cp) + || isValueStart(cp) + ) { + break; + } + this.cursor.advance(); + } + + const panickedOver = this.cursor.spanFrom(start); + const error = this.makeError({ + tag: "expected-value", + span: focus, + found, + }, panickedOver); + + return this.errorExpression(error, panickedOver); + } + + private missingArrayValue(): PartialJsonValue { + return this.errorExpression(this.makeError({ + tag: "expected-value", + span: this.cursor.currentSpan(), + found: this.found(), + })); + } + + private missingMemberKey(): PartialMemberItem { + const error = this.makeError({ + tag: "expected-member-key", + span: this.cursor.currentSpan(), + found: this.found(), + }); + return MemberItem.errorSeparator(error, this.cursor.currentSpan()); + } + + private consumeDigits(): void { + while (isDigit(this.cursor.peek() ?? -1)) { + this.cursor.advance(); + } + } + + private consumeUntilStringRecovery(): void { + while (true) { + const cp = this.cursor.peek(); + if (cp === QUOTE) { + this.cursor.advance(); + return; + } + if (cp === undefined || cp === COMMA || cp === CLOSE_BRACE || cp === CLOSE_BRACKET) { + return; + } + this.cursor.advance(); + } + } + + private skipWhitespace(): void { + skipWhile(this.cursor, isAsciiWhitespace); + } + + private found(): FoundSyntax { + const cp = this.cursor.peek(); + if (cp === undefined) return { tag: "eof", span: this.cursor.eofSpan() }; + return { tag: "code-point", value: cp, span: this.cursor.currentSpan() }; + } + + private makeError(error: ParseError, panickedOver?: CodePointSpan): ConcreteError { + this.errors.push(error); + return ConcreteError.single({ + span: error.span, + error, + panickedOver, + }); + } + + private errorExpression(error: ConcreteError, span?: CodePointSpan): PartialJsonValue { + return JsonValue.errorExpression(error, span ?? error[0].span); + } +} + +function isValueStart(cp: CodePoint | undefined): boolean { + return cp !== undefined && ( + cp === OPEN_BRACE + || cp === OPEN_BRACKET + || cp === QUOTE + || cp === DASH + || cp === LOWER_T + || cp === LOWER_F + || cp === LOWER_N + || isDigit(cp) + ); +} + +function isMemberStart(cp: CodePoint | undefined): boolean { + return cp === QUOTE; +} + +function isValueBoundary(cp: CodePoint | undefined): boolean { + return cp === undefined + || cp === COMMA + || cp === CLOSE_BRACE + || cp === CLOSE_BRACKET; +} + +function isKeywordPart(cp: CodePoint | undefined): boolean { + return cp !== undefined && (isAsciiAlphanumeric(cp) || cp === char('_') || cp === DASH); +} + +function isNumberJunk(cp: CodePoint | undefined): boolean { + return cp !== undefined + && !isAsciiWhitespace(cp) + && cp !== COMMA + && cp !== CLOSE_BRACE + && cp !== CLOSE_BRACKET + && cp !== COLON; +} + +function isControlCharacter(cp: CodePoint): boolean { + return cp < 0x20; +} + +function isHexDigit(cp: CodePoint): boolean { + return isDigit(cp) + || isBetween(LOWERCASE_A, cp, LOWERCASE_F) + || isBetween(UPPERCASE_A, cp, UPPERCASE_F); +} diff --git a/src/languages/json/syntax.ts b/src/languages/json/syntax.ts index 0c4aeb2..52e8e3e 100644 --- a/src/languages/json/syntax.ts +++ b/src/languages/json/syntax.ts @@ -1,5 +1,5 @@ import type { CodePointSpan } from 'source-region'; -import type { ParseError } from './parse_errors.ts'; +import type { ParseError } from './parse_errors'; export type ConcreteInfo = { span: CodePointSpan }; @@ -22,6 +22,32 @@ export type DelimiterToken = | { tag: "open-bracket", span: CodePointSpan } | { tag: "close-bracket", span: CodePointSpan } +export namespace DelimiterToken { + export function openBrace(span: CodePointSpan): DelimiterToken { + return { tag: "open-brace", span }; + } + + export function closeBrace(span: CodePointSpan): DelimiterToken { + return { tag: "close-brace", span }; + } + + export function openBracket(span: CodePointSpan): DelimiterToken { + return { tag: "open-bracket", span }; + } + + export function closeBracket(span: CodePointSpan): DelimiterToken { + return { tag: "close-bracket", span }; + } +} + +export type ColonToken = { tag: "colon", span: CodePointSpan }; + +export namespace ColonToken { + export function make(span: CodePointSpan): ColonToken { + return { tag: "colon", span }; + } +} + export type Program = { tag: "program", expressions: JsonValue[], @@ -48,7 +74,7 @@ export type MemberItem = export type Member = { key: StringLiteral, - colon?: { tag: "colon", span: CodePointSpan }, + colon?: ColonToken, value: JsonValue, error?: Error } & Info @@ -93,3 +119,137 @@ export type JsonScalar = | { tag: "true", error?: Error } & Info | { tag: "false", error?: Error } & Info +export namespace Program { + export function make( + expressions: JsonValue[], + info: Info, + error?: Error, + ): Program { + return error === undefined + ? { tag: "program", expressions, ...info } + : { tag: "program", expressions, error, ...info }; + } + + export function show(program: Program): string { + return program.expressions.map(JsonValue.show).join(" "); + } +} + +export namespace JsonValue { + export function object( + open: DelimiterToken, + members: MemberItem[], + span: CodePointSpan, + close?: DelimiterToken, + error?: ConcreteError, + ): JsonValue { + return { tag: "object", open, members, close, error, span }; + } + + export function array( + open: DelimiterToken, + items: ArrayItem[], + span: CodePointSpan, + close?: DelimiterToken, + error?: ConcreteError, + ): JsonValue { + return { tag: "array", open, items, close, error, span }; + } + + export function string( + value: string, + span: CodePointSpan, + error?: ConcreteError, + ): JsonValue { + return { tag: "string", value, error, span }; + } + + export function errorString(error: ConcreteError, span: CodePointSpan): StringLiteral { + return { tag: "error-string", error, span }; + } + + export function number( + value: number, + span: CodePointSpan, + error?: ConcreteError, + ): JsonValue { + return { tag: "number", value, error, span }; + } + + export function errorNumber(error: ConcreteError, span: CodePointSpan): NumberLiteral { + return { tag: "error-number", error, span }; + } + + export function nullValue(span: CodePointSpan): JsonValue { + return { tag: "null", span }; + } + + export function trueValue(span: CodePointSpan): JsonValue { + return { tag: "true", span }; + } + + export function falseValue(span: CodePointSpan): JsonValue { + return { tag: "false", span }; + } + + export function errorExpression(error: ConcreteError, span: CodePointSpan): JsonValue { + return { tag: "error-expression", error, span }; + } + + export function show(value: JsonValue): string { + switch (value.tag) { + case "object": + return `{${value.members.map(MemberItem.show).join(", ")}}`; + case "array": + return `[${value.items.map(ArrayItem.show).join(", ")}]`; + case "string": + return JSON.stringify(value.value); + case "number": + return `${value.value}`; + case "null": + return "null"; + case "true": + return "true"; + case "false": + return "false"; + case "error-expression": + return ""; + case "error-string": + return ""; + case "error-number": + return ""; + } + } +} + +export namespace MemberItem { + export function member( + key: StringLiteral, + value: JsonValue, + span: CodePointSpan, + colon?: ColonToken, + error?: ConcreteError, + ): MemberItem { + return { tag: "member", key, colon, value, error, span }; + } + + export function errorSeparator(error: ConcreteError, span: CodePointSpan): MemberItem { + return { tag: "error-object-separator", error, span }; + } + + export function show(item: MemberItem): string { + if (item.tag === "error-object-separator") return ""; + return `${JsonValue.show(item.key)}: ${JsonValue.show(item.value)}`; + } +} + +export namespace ArrayItem { + export function errorSeparator(error: ConcreteError, span: CodePointSpan): ArrayItem { + return { tag: "error-array-separator", error, span }; + } + + export function show(item: ArrayItem): string { + if (item.tag === "error-array-separator") return ""; + return JsonValue.show(item); + } +} diff --git a/src/parser.experiments.ts b/src/parser.experiments.ts index bf20d9e..ef78ef3 100644 --- a/src/parser.experiments.ts +++ b/src/parser.experiments.ts @@ -1,3 +1,5 @@ import { runExperiments as runLispExperiments } from './languages/lisp/experiments'; +import { runExperiments as runJsonExperiments } from './languages/json/experiments'; runLispExperiments(); +runJsonExperiments();