From 57f666118acea33e9617965990f65c4f8ff46072 Mon Sep 17 00:00:00 2001 From: Yura Dupyn <2153100+omedusyo@users.noreply.github.com> Date: Sat, 25 Apr 2026 17:44:05 +0200 Subject: [PATCH] define basic Partial CST for JSON --- QESTIONS.md | 18 ++++++ src/languages/json/parse_errors.ts | 4 ++ src/languages/json/syntax.ts | 95 ++++++++++++++++++++++++++++++ src/languages/lisp/syntax.ts | 8 +-- 4 files changed, 121 insertions(+), 4 deletions(-) create mode 100644 QESTIONS.md create mode 100644 src/languages/json/parse_errors.ts diff --git a/QESTIONS.md b/QESTIONS.md new file mode 100644 index 0000000..83b16e0 --- /dev/null +++ b/QESTIONS.md @@ -0,0 +1,18 @@ + +# tokens in Partial Concrete Syntax +What sort of tokens should I track in the syntax? +- delimiters like e.g. in `[a, b, c]`? +- more significant separators like the `:` in `{ "foo" : a }`? +- What about groupin symbols like `{ ... }`? +- What about keywords? e.g. `fn` or `for` or `while`? + +Can these questions be answered universally, or is this application dependent? +- For example, maybe when building a compiler, we don't need to track so much stuff. +- But for formatter, we probably need to track a bit more. +- But what about something like a library in a IDE that handles various transformations of the code? + +# delimiter confusion +I just realized that I've been misunderstanding the word `delimiter`. +I thought that a delimiter was like the `,` in `[ a, b, c]`. But that's called properly called a separator! Or item-separator. +I thought separator and delimiter where synonyms. But it seems like a `delimiter` is actually the grouping symbols like `[` or `]`. + diff --git a/src/languages/json/parse_errors.ts b/src/languages/json/parse_errors.ts new file mode 100644 index 0000000..17b10ee --- /dev/null +++ b/src/languages/json/parse_errors.ts @@ -0,0 +1,4 @@ +import type { CodePoint, CodePointSpan } from 'source-region'; + +export type ParseError = +| {} // TODO diff --git a/src/languages/json/syntax.ts b/src/languages/json/syntax.ts index e69de29..0c4aeb2 100644 --- a/src/languages/json/syntax.ts +++ b/src/languages/json/syntax.ts @@ -0,0 +1,95 @@ +import type { CodePointSpan } from 'source-region'; +import type { ParseError } from './parse_errors.ts'; + +export type ConcreteInfo = { span: CodePointSpan }; + +export type ConcreteError = ConcreteErrorNode[] // Convention: can't be empty. +export type ConcreteErrorNode = { + span: CodePointSpan, + error: ParseError, + panickedOver?: CodePointSpan, +} + +export namespace ConcreteError { + export function single(node: ConcreteErrorNode): ConcreteError { + return [node]; + } +} + +export type DelimiterToken = + | { tag: "open-brace", span: CodePointSpan } + | { tag: "close-brace", span: CodePointSpan } + | { tag: "open-bracket", span: CodePointSpan } + | { tag: "close-bracket", span: CodePointSpan } + +export type Program = { + tag: "program", + expressions: JsonValue[], + error?: Error, +} & Info + +export type JsonValue = +| JsonObject +| JsonArray +| JsonScalar +| { tag: "error-expression", error: Error } & Info + +export type JsonObject = { + tag: "object", + open: DelimiterToken, + members: MemberItem[], + close?: DelimiterToken, + error?: Error +} & Info + +export type MemberItem = +| { tag: "member" } & Member +| { tag: "error-object-separator", error: Error } & Info + +export type Member = { + key: StringLiteral, + colon?: { tag: "colon", span: CodePointSpan }, + value: JsonValue, + error?: Error +} & Info + +export type JsonArray = { + tag: "array", + open: DelimiterToken, + items: ArrayItem[], + close?: DelimiterToken, + error?: Error +} & Info + +export type ArrayItem = +| JsonValue +| { tag: "error-array-separator", error: Error } & Info + +export type StringLiteral = +| { + tag: "string", + // TODO: There are various possibilities of storing the actual literal value. But I don't care about this right now. + value: string, + error?: Error, +} & Info +| { tag: "error-string", error: Error } & Info + +export type NumberLiteral = +| { + tag: "number", + // TODO: There are various possibilities of storing the actual literal value. But I don't care about this right now. + value: number, + error?: Error, +} & Info +| { tag: "error-number", error: Error } & Info + +export type JsonScalar = +// === number === +| NumberLiteral +// === string === +| StringLiteral +// === constants === +| { tag: "null", error?: Error } & Info +| { tag: "true", error?: Error } & Info +| { tag: "false", error?: Error } & Info + diff --git a/src/languages/lisp/syntax.ts b/src/languages/lisp/syntax.ts index d377730..8310bdf 100644 --- a/src/languages/lisp/syntax.ts +++ b/src/languages/lisp/syntax.ts @@ -17,10 +17,10 @@ export namespace ConcreteError { } export type DelimiterToken = - | { tag: "open-paren"; span: CodePointSpan } - | { tag: "close-paren"; span: CodePointSpan } - | { tag: "open-bracket"; span: CodePointSpan } - | { tag: "close-bracket"; span: CodePointSpan }; + | { tag: "open-paren", span: CodePointSpan } + | { tag: "close-paren", span: CodePointSpan } + | { tag: "open-bracket", span: CodePointSpan } + | { tag: "close-bracket", span: CodePointSpan } export namespace DelimiterToken { export function openParen(span: CodePointSpan): DelimiterToken {