From d1491ec5e68e0e96a278263c377cbec364dde9f5 Mon Sep 17 00:00:00 2001 From: Yura Dupyn <2153100+omedusyo@users.noreply.github.com> Date: Sat, 25 Apr 2026 16:42:57 +0200 Subject: [PATCH] Stub stuff out. Include markdown discussion --- PARTIAL_SYNTAX.md | 286 ++++++++++++++++++++++++ SOURCE_REGION_EXPERIMENTS.md | 16 ++ src/languages/json/SYNTAX.md | 0 src/languages/json/index.ts | 0 src/languages/json/syntax.ts | 0 src/ui/languages/json/App.tsx | 0 src/ui/languages/json/StructurePane.tsx | 0 src/ui/languages/json/format.ts | 0 8 files changed, 302 insertions(+) create mode 100644 PARTIAL_SYNTAX.md create mode 100644 SOURCE_REGION_EXPERIMENTS.md create mode 100644 src/languages/json/SYNTAX.md create mode 100644 src/languages/json/index.ts create mode 100644 src/languages/json/syntax.ts create mode 100644 src/ui/languages/json/App.tsx create mode 100644 src/ui/languages/json/StructurePane.tsx create mode 100644 src/ui/languages/json/format.ts diff --git a/PARTIAL_SYNTAX.md b/PARTIAL_SYNTAX.md new file mode 100644 index 0000000..3c9ef65 --- /dev/null +++ b/PARTIAL_SYNTAX.md @@ -0,0 +1,286 @@ +# Partial Syntax Notes + +This document records the current candidate design for partial concrete syntax in the Lisp parser experiment. + +The goal is not a generic parser framework. The goal is to make the current toy Lisp syntax rich enough to represent recovered malformed syntax while preserving the ability to distinguish valid trees from partial trees. + +## Core Constraints + +- `ValidConcreteSyntax` should be a subtype of `PartialConcreteSyntax`. +- If a `PartialConcreteSyntax` contains no errors, it should be safe to coerce it to `ValidConcreteSyntax` without rebuilding the tree. +- Concrete syntax may preserve syntactic choices that are semantically irrelevant. +- Error payloads should remain structured and span-aware. + +The `never` parameter is the main trick: when `Error = never`, error branches and `error?: never` fields become unconstructable. + +## Current Candidate Types + +```ts +export type ConcreteSyntaxResult = + | { tag: "valid"; value: ValidConcreteSyntax } + | { tag: "invalid"; value: PartialConcreteSyntax }; + +export type ValidConcreteSyntax = + Program<{ span: CodePointSpan }, never>; + +export type PartialConcreteSyntax = + Program<{ span: CodePointSpan }, ConcreteError>; + +export type ConcreteError = ConcreteErrorNode[]; // Convention: non-empty. + +export namespace ConcreteError { + export function single(node: ConcreteErrorNode): ConcreteError { + return [node]; + } +} + +export type ConcreteErrorNode = { + span: CodePointSpan; + error: ParseError; + panickedOver?: CodePointSpan; +}; + +export type DelimiterToken = + | { tag: "open-paren"; span: CodePointSpan } + | { tag: "close-paren"; span: CodePointSpan } + | { tag: "open-bracket"; span: CodePointSpan } + | { tag: "close-bracket"; span: CodePointSpan }; + +export type Program = ({ + expressions: Expr[]; + error?: Error; +} & Info); + +export type Expr = + | Literal + | List + | ({ tag: "error-expression"; error: Error } & Info); + +export type List = ({ + tag: "list"; + open: DelimiterToken; + items: ListItem[]; + close?: DelimiterToken; + error?: Error; +} & Info); + +export type ListItem = + | Expr + | ({ tag: "error-list-separator"; error: Error } & Info); + +export type Literal = + | ({ tag: "number"; value: number } & Info) + | ({ tag: "error-number"; error: Error } & Info) + | ({ tag: "identifier"; value: Identifier } & Info) + | ({ tag: "error-identifier"; error: Error } & Info); + +export type Identifier = string; +``` + +## Error Ownership + +Errors are owned by the smallest useful syntax node. + +- `error-expression`: syntax that cannot reasonably be interpreted as any expression node. +- `error-number`: malformed numeric literal, such as `123fasd`. +- `error-identifier`: malformed identifier, if the language later has such cases. +- `error-list-separator`: malformed relationship between neighboring list items. +- `list.error`: structural error about the whole list, such as missing or mismatched close delimiter. +- `program.error`: top-level recovery errors that do not belong to one expression. + +`ConcreteErrorNode.span` is the primary diagnostic focus. `panickedOver` is recovery/debug metadata showing what source region was skipped while recovering. + +## Delimiters + +Delimiter tokens are stored explicitly because this is concrete syntax. + +Even if round and square lists are semantically equivalent later, the concrete tree should preserve whether the source used: + +```lisp +(a b c) +[a, b, c] +``` + +This is useful for UI, formatting, recovery diagnostics, and syntax experiments. A later semantic AST can erase this distinction. + +## Lisp Syntax Under Test + +The experiment now has two list syntaxes. + +Round lists have no separators: + +```lisp +(a b c d) +``` + +Square lists require commas between neighboring elements: + +```lisp +[a, b, c, d] +``` + +Square lists allow optional leading and trailing commas: + +```lisp +[,a, b, c, d] +[a, b, c, d,] +[,a, b, c, d,] +``` + +Adjacent top-level expressions are allowed: + +```lisp +foo(bar) +``` + +This is equivalent to: + +```lisp +foo (bar) +``` + +But malformed token fragments should not silently split into valid expressions: + +```lisp +123fasd +``` + +This should probably become an `error-number`, not `number 123` followed by `identifier fasd`. + +## Examples To Drive Implementation + +### Valid Program + +```lisp +foo 123 (a b) [c, d, e] +``` + +Expected: `ConcreteSyntaxResult.valid`. + +### Unexpected Top-Level Close + +```lisp +foo ) +``` + +Likely: valid `foo` plus `program.error`, or invalid program containing a top-level recovery error. + +### Unknown Expression In Round List + +```lisp +(foo @@@ 1) +``` + +Likely: `error-expression` item inside the list, with recovery continuing at `1`. + +### Missing Close Delimiter + +```lisp +(foo 1 +``` + +Likely: list node with `open`, no `close`, and `list.error`. + +### Mismatched Close Delimiter + +```lisp +[foo) +``` + +Likely: list node preserving `open-bracket` and `close-paren`, plus `list.error`. + +### Missing Square List Separator + +```lisp +[a, b c, d] +``` + +Likely: `error-list-separator` between `b` and `c`. + +### Extra Square List Separator + +```lisp +[a,, b] +``` + +Possible interpretations: + +- allow repeated commas as empty separators +- produce `error-list-separator` +- produce `error-expression` for a missing element + +This needs a deliberate choice. + +### Malformed Number + +```lisp +123fasd +``` + +Likely: `error-number` covering the full malformed fragment. + +## Recovery Strategies To Compare + +### Panic Until Expression Start + +Skip until a plausible expression start appears. + +Good for simple garbage recovery, but may split malformed token fragments too aggressively. + +### Panic Until Delimiter Or Expression Start + +Inside a list, skip until: + +- close delimiter +- expression start +- EOF + +Good for preserving list structure. + +### Panic Until Whitespace Boundary + +For token-like errors, skip the rest of the non-whitespace fragment. + +Useful for: + +```lisp +123fasd +``` + +### Separator-Aware Recovery + +Inside square lists, use commas and close brackets as synchronization points. + +Useful for: + +```lisp +[a, b c, d] +[a,, b] +``` + +### Delimiter-Aware Recovery + +Preserve exact open and close delimiter tokens, even if they mismatch. + +Useful for: + +```lisp +[foo) +(foo] +``` + +## Current Recommendation + +The current type design is good enough to try. + +Implementation should focus on concrete examples rather than further type abstraction: + +```lisp +123fasd +(foo @@@ 1) +(foo 1 +[a, b c, d] +[foo) +``` + +After implementing those, the UI should reveal whether node-owned errors, `error-list-separator`, and explicit delimiter tokens feel useful or too heavy. diff --git a/SOURCE_REGION_EXPERIMENTS.md b/SOURCE_REGION_EXPERIMENTS.md new file mode 100644 index 0000000..e0e5393 --- /dev/null +++ b/SOURCE_REGION_EXPERIMENTS.md @@ -0,0 +1,16 @@ +# Source Region Parser Experiment Notes + +## Current Parser Experiment + +- The parser works directly with `CodePointIndex` and `CodePointSpan`; rich `Span` conversion is intentionally left to rendering or reporting code. +- Structured errors are currently enough to identify the failing region and the syntactic fact that failed, without committing to message text. +- Empty documents are valid because the document grammar is a sequence of expressions. + +## Source Region Observations + +- `SourceText.getSpan(CodePointSpan)` is the key boundary operation: parsers can stay low-level, while diagnostics can opt into line and column information later. +- Zero-width EOF spans are important for parse errors such as an unclosed list. The existing `eofSpan` support fits this well. +- Parser clients still need a few tiny local span helpers, such as current-code-point span and EOF cursor span. +- `renderSpan` returning structured `LineView` data works well for UI rendering because the application can choose its own DOM and styling without reimplementing line slicing. + +## Potential Nice-To-Haves diff --git a/src/languages/json/SYNTAX.md b/src/languages/json/SYNTAX.md new file mode 100644 index 0000000..e69de29 diff --git a/src/languages/json/index.ts b/src/languages/json/index.ts new file mode 100644 index 0000000..e69de29 diff --git a/src/languages/json/syntax.ts b/src/languages/json/syntax.ts new file mode 100644 index 0000000..e69de29 diff --git a/src/ui/languages/json/App.tsx b/src/ui/languages/json/App.tsx new file mode 100644 index 0000000..e69de29 diff --git a/src/ui/languages/json/StructurePane.tsx b/src/ui/languages/json/StructurePane.tsx new file mode 100644 index 0000000..e69de29 diff --git a/src/ui/languages/json/format.ts b/src/ui/languages/json/format.ts new file mode 100644 index 0000000..e69de29