diff --git a/src/languages/lisp/experiments.ts b/src/languages/lisp/experiments.ts new file mode 100644 index 0000000..8875f9c --- /dev/null +++ b/src/languages/lisp/experiments.ts @@ -0,0 +1,78 @@ +import { CodePointString, sourceText } from 'source-region'; +import { parseDocument, programOf } from './parser'; +import { matchCodePointString } from '../../recognizers'; +import { Program } from './syntax'; + +// === Experiments === + +function experiment00_emptyDocument(): void { + logParse("empty document", ""); +} + +function experiment01_topLevelExpressions(): void { + logParse("top-level expressions", "foo 123 (bar baz_1 qux-2) [a, b, c]"); +} + +function experiment02_nestedLists(): void { + logParse("nested lists", "(define square (_ x) (* x x))"); +} + +function experiment03_unclosedList(): void { + logParse("unclosed list", "(foo 123\n (bar 456)"); +} + +function experiment04_recoverAtDocumentLevel(): void { + logParse("document recovery", "foo ) @@@ (bar 1) 99"); +} + +function experiment05_recoverInsideList(): void { + logParse("list recovery", "(foo @@@ 1 (bar # 2) baz)"); +} + +function experiment06_unicodeSpans(): void { + logParse("unicode spans", "alpha 💥 (beta 2)"); +} + +function experiment07_matchCodePointString(): void { + const region = sourceText("λx").fullRegion(); + const cursor = region.makeCursor(); + const lambda = CodePointString.makeFromString("λ"); + console.log("==== recognizer:match code point string ===="); + console.dir(matchCodePointString(cursor, lambda), { depth: null }); + console.log("cursor", cursor.current()); +} + +function experiment08_squareListSeparator(): void { + logParse("square list separator", "[a, b c, d]"); +} + +function experiment09_invalidNumberFragment(): void { + logParse("invalid number fragment", "123fasd"); +} + +function experiment10_repeatedLeadingComma(): void { + logParse("repeated leading comma", "[, , foo, bar]"); +} + +function logParse(name: string, input: string): void { + const region = sourceText(input).fullRegion(); + const result = parseDocument(region); + console.log(`==== parser:${name} ====`); + console.log(input); + console.log(result.syntax.tag, Program.show(programOf(result.syntax))); + console.dir(result.errors, { depth: null }); +} + +[ + experiment00_emptyDocument, + experiment01_topLevelExpressions, + experiment02_nestedLists, + experiment03_unclosedList, + experiment04_recoverAtDocumentLevel, + experiment05_recoverInsideList, + experiment06_unicodeSpans, + experiment07_matchCodePointString, + experiment08_squareListSeparator, + experiment09_invalidNumberFragment, + experiment10_repeatedLeadingComma, +].forEach((experiment) => experiment()); diff --git a/src/languages/lisp/index.ts b/src/languages/lisp/index.ts new file mode 100644 index 0000000..36a0d85 --- /dev/null +++ b/src/languages/lisp/index.ts @@ -0,0 +1,22 @@ +export { parseDocument, programOf } from './parser'; +export type { + ConcreteSyntaxResult, + ParseDocumentResult, + PartialConcreteSyntax, + ValidConcreteSyntax, + PartialExpr, + PartialList, + PartialListItem, +} from './parser'; +export type { FoundSyntax, ParseError } from './parse_errors'; +export type { + ConcreteError, + ConcreteErrorNode, + ConcreteInfo, + DelimiterToken, + Expr, + List, + ListItem, + Program, +} from './syntax'; +export { Expr as LispExpr } from './syntax'; diff --git a/src/languages/lisp/parse_errors.ts b/src/languages/lisp/parse_errors.ts new file mode 100644 index 0000000..d53f775 --- /dev/null +++ b/src/languages/lisp/parse_errors.ts @@ -0,0 +1,40 @@ +import type { CodePoint, CodePointSpan } from 'source-region'; + +export type ParseError = +| { + tag: "expected-expression"; + span: CodePointSpan; + found: FoundSyntax; + } +| { + tag: "expected-close-delimiter"; + span: CodePointSpan; + open: CodePointSpan; + expected: "paren" | "bracket"; + found: FoundSyntax; + } +| { + tag: "unexpected-close-delimiter"; + span: CodePointSpan; + delimiter: "paren" | "bracket"; + } +| { + tag: "expected-list-separator"; + span: CodePointSpan; + found: FoundSyntax; + } +| { + tag: "unexpected-code-point"; + span: CodePointSpan; + found: FoundSyntax; + } +| { + tag: "invalid-number"; + span: CodePointSpan; + text: string; + reason: "unsafe-integer" | "identifier-suffix"; + }; + +export type FoundSyntax = +| { tag: "code-point"; value: CodePoint; span: CodePointSpan } +| { tag: "eof"; span: CodePointSpan }; diff --git a/src/languages/lisp/parser.ts b/src/languages/lisp/parser.ts new file mode 100644 index 0000000..8b5680f --- /dev/null +++ b/src/languages/lisp/parser.ts @@ -0,0 +1,402 @@ +import { + SourceCursor, + char, + isAsciiAlpha, + isAsciiAlphanumeric, + isAsciiWhitespace, + isDigit, +} from 'source-region'; +import type { + CodePoint, + CodePointIndex, + CodePointSpan, + SourceRegion, +} from 'source-region'; +import type { FoundSyntax, ParseError } from './parse_errors'; +import { consumeWhile, consumeWhile1, skipWhile } from '../../recognizers'; +import { + ConcreteError, + DelimiterToken, + Expr, + ListItem, + Program, +} from './syntax'; +import type { + ConcreteInfo, + ListItem as ListItemType, + List as ListType, + Expr as ExprType, +} from './syntax'; + +// Whitespace convention: +// - parseDocument consumes leading whitespace before each top-level expression. +// - parseExpr assumes leading whitespace has already been consumed. +// - Successful expression parsers stop immediately after the expression. +// - list parsers own whitespace between list items and before the closing delimiter. +// +// Recovery policy: +// - Unknown expressions consume at least one code point, then panic until a +// delimiter, whitespace, or plausible expression start. +// - Round lists do not require separators. +// - Square lists require commas between neighboring expressions, but allow +// optional leading and trailing commas. +// +// Span convention: +// - Parser internals and diagnostics use CodePointSpan. +// - Rendering can convert these later with SourceText.getSpan. + +const OPEN_PAREN = char('('); +const CLOSE_PAREN = char(')'); +const OPEN_BRACKET = char('['); +const CLOSE_BRACKET = char(']'); +const COMMA = char(','); +const DASH = char('-'); +const UNDERSCORE = char('_'); + + +export type ConcreteSyntaxResult = +| { tag: "valid", value: ValidConcreteSyntax } +| { tag: "invalid", value: PartialConcreteSyntax } + +export type ParseDocumentResult = { + syntax: ConcreteSyntaxResult; + errors: ParseError[]; +}; + +// The main constraints are +// - `ValidConcreteSyntax` should be a subtype of `PartialConcreteSyntax` +// - if `PartialConcreteSyntax` doesn't contain any sort of error nodes, we should be able to coerce it to `ValidConcreteSyntax` without rebuilding the whole tree +export type ValidConcreteSyntax = Program +export type PartialConcreteSyntax = Program +export type PartialExpr = ExprType; +export type PartialList = ListType; +export type PartialListItem = ListItemType; + +export namespace ConcreteSyntaxResult { + export function valid(value: ValidConcreteSyntax): ConcreteSyntaxResult { + return { tag: "valid", value }; + } + + export function invalid(value: PartialConcreteSyntax): ConcreteSyntaxResult { + return { tag: "invalid", value }; + } +} + +export function programOf(result: ConcreteSyntaxResult): PartialConcreteSyntax { + return result.value; +} + + +export function parseDocument(region: SourceRegion): ParseDocumentResult { + return new Parser(region).parseDocument(); +} + +class Parser { + private readonly cursor: SourceCursor; + private readonly errors: ParseError[] = []; + + constructor(private readonly region: SourceRegion) { + this.cursor = region.makeCursor(); + } + + parseDocument(): ParseDocumentResult { + const expressions: PartialExpr[] = []; + + while (true) { + this.skipWhitespace(); + if (this.cursor.isAtEnd()) break; + + expressions.push(this.parseExpr()); + } + + const program = Program.make(expressions, { span: this.region.codePointSpan }); + return { + syntax: this.errors.length === 0 + ? ConcreteSyntaxResult.valid(program as ValidConcreteSyntax) + : ConcreteSyntaxResult.invalid(program as PartialConcreteSyntax), + errors: this.errors, + }; + } + + private parseExpr(): PartialExpr { + const cp = this.cursor.peek(); + + if (cp === undefined) { + return this.errorExpression(this.makeError({ + tag: "expected-expression", + span: this.cursor.eofSpan(), + found: this.found(), + })); + } + + if (cp === CLOSE_PAREN || cp === CLOSE_BRACKET) { + const delimiter = cp === CLOSE_PAREN ? "paren" : "bracket"; + const span = this.cursor.currentSpan(); + this.cursor.advance(); + return this.errorExpression(this.makeError({ + tag: "unexpected-close-delimiter", + span, + delimiter, + })); + } + + if (cp === OPEN_PAREN) return this.parseRoundList(); + if (cp === OPEN_BRACKET) return this.parseSquareList(); + if (isDigit(cp)) return this.parseNumber(); + if (isIdentifierStart(cp)) return this.parseIdentifier(); + + return this.parseUnknownExpression(); + } + + private parseRoundList(): PartialExpr { + const start = this.cursor.checkpoint(); + const open = DelimiterToken.openParen(this.cursor.currentSpan()); + this.cursor.advance(); + + const items: PartialListItem[] = []; + + while (true) { + this.skipWhitespace(); + + const cp = this.cursor.peek(); + if (cp === CLOSE_PAREN) { + const close = DelimiterToken.closeParen(this.cursor.currentSpan()); + this.cursor.advance(); + return Expr.list(open, items, this.cursor.spanFrom(start), close); + } + + if (cp === CLOSE_BRACKET) { + const close = DelimiterToken.closeBracket(this.cursor.currentSpan()); + const error = this.makeError({ + tag: "expected-close-delimiter", + span: this.cursor.currentSpan(), + open: open.span, + expected: "paren", + found: this.found(), + }); + this.cursor.advance(); + return Expr.list(open, items, this.cursor.spanFrom(start), close, error); + } + + if (cp === undefined) { + const error = this.makeError({ + tag: "expected-close-delimiter", + span: this.cursor.eofSpan(), + open: open.span, + expected: "paren", + found: this.found(), + }); + return Expr.list(open, items, this.cursor.spanFrom(start), undefined, error); + } + + items.push(this.parseExpr()); + } + } + + private parseSquareList(): PartialExpr { + const start = this.cursor.checkpoint(); + const open = DelimiterToken.openBracket(this.cursor.currentSpan()); + this.cursor.advance(); + + const items: PartialListItem[] = []; + let sawExpression = false; + let sawLeadingComma = false; + let needsSeparator = false; + + while (true) { + this.skipWhitespace(); + + const cp = this.cursor.peek(); + if (cp === CLOSE_BRACKET) { + const close = DelimiterToken.closeBracket(this.cursor.currentSpan()); + this.cursor.advance(); + return Expr.list(open, items, this.cursor.spanFrom(start), close); + } + + if (cp === CLOSE_PAREN) { + const close = DelimiterToken.closeParen(this.cursor.currentSpan()); + const error = this.makeError({ + tag: "expected-close-delimiter", + span: this.cursor.currentSpan(), + open: open.span, + expected: "bracket", + found: this.found(), + }); + this.cursor.advance(); + return Expr.list(open, items, this.cursor.spanFrom(start), close, error); + } + + if (cp === undefined) { + const error = this.makeError({ + tag: "expected-close-delimiter", + span: this.cursor.eofSpan(), + open: open.span, + expected: "bracket", + found: this.found(), + }); + return Expr.list(open, items, this.cursor.spanFrom(start), undefined, error); + } + + if (needsSeparator) { + if (cp === COMMA) { + this.cursor.advance(); + needsSeparator = false; + continue; + } + + const error = this.makeError({ + tag: "expected-list-separator", + span: this.cursor.currentSpan(), + found: this.found(), + }); + items.push(ListItem.errorSeparator(error, this.cursor.currentSpan())); + needsSeparator = false; + continue; + } + + if (cp === COMMA) { + const commaSpan = this.cursor.currentSpan(); + this.cursor.advance(); + + if (sawExpression) { + const error = this.makeError({ + tag: "expected-expression", + span: commaSpan, + found: { tag: "code-point", value: COMMA, span: commaSpan }, + }); + items.push(this.errorExpression(error, commaSpan)); + } else if (sawLeadingComma) { + const error = this.makeError({ + tag: "expected-expression", + span: commaSpan, + found: { tag: "code-point", value: COMMA, span: commaSpan }, + }); + items.push(this.errorExpression(error, commaSpan)); + } else { + sawLeadingComma = true; + } + + continue; + } + + items.push(this.parseExpr()); + sawExpression = true; + needsSeparator = true; + } + } + + private parseNumber(): PartialExpr { + const start = this.cursor.checkpoint(); + const match = consumeWhile1(this.cursor, isDigit); + if (match.tag === "none") { + throw new Error("parseNumber called when cursor is not at a number"); + } + + if (isIdentifierStart(this.cursor.peek() ?? -1)) { + consumeWhile(this.cursor, isIdentifierPart); + const span = this.cursor.spanFrom(start); + const text = this.cursor.slice(span); + return Expr.errorNumber(this.makeError({ + tag: "invalid-number", + span, + text, + reason: "identifier-suffix", + }), span); + } + + const { span, text } = match; + const value = Number(text); + + if (!Number.isSafeInteger(value)) { + return Expr.errorNumber(this.makeError({ + tag: "invalid-number", + span, + text, + reason: "unsafe-integer", + }), span); + } + + return Expr.number(value, span); + } + + private parseIdentifier(): PartialExpr { + const start = this.cursor.checkpoint(); + this.cursor.advance(); + + consumeWhile(this.cursor, isIdentifierPart); + const span = this.cursor.spanFrom(start); + return Expr.identifier(this.cursor.slice(span), span); + } + + private parseUnknownExpression(): PartialExpr { + const start = this.cursor.checkpoint(); + const focus = this.cursor.currentSpan(); + + this.cursor.advance(); + while (true) { + const cp = this.cursor.peek(); + if ( + cp === undefined + || isAsciiWhitespace(cp) + || isClosingDelimiter(cp) + || isExpressionStart(cp) + ) { + break; + } + this.cursor.advance(); + } + + const panickedOver = this.cursor.spanFrom(start); + const error = this.makeError({ + tag: "expected-expression", + span: focus, + found: { tag: "code-point", value: this.region.source.codePointAt(focus.start), span: focus }, + }, panickedOver); + + return this.errorExpression(error, panickedOver); + } + + private skipWhitespace(): void { + skipWhile(this.cursor, isAsciiWhitespace); + } + + private found(): FoundSyntax { + const cp = this.cursor.peek(); + if (cp === undefined) return { tag: "eof", span: this.cursor.eofSpan() }; + return { tag: "code-point", value: cp, span: this.cursor.currentSpan() }; + } + + private makeError(error: ParseError, panickedOver?: CodePointSpan): ConcreteError { + this.errors.push(error); + return ConcreteError.single({ + span: error.span, + error, + panickedOver, + }); + } + + private errorExpression(error: ConcreteError, span?: CodePointSpan): PartialExpr { + return Expr.errorExpression(error, span ?? error[0].span); + } +} + +function isExpressionStart(cp: CodePoint | undefined): boolean { + return cp !== undefined && ( + cp === OPEN_PAREN + || cp === OPEN_BRACKET + || isDigit(cp) + || isIdentifierStart(cp) + ); +} + +function isClosingDelimiter(cp: CodePoint): boolean { + return cp === CLOSE_PAREN || cp === CLOSE_BRACKET; +} + +function isIdentifierStart(cp: CodePoint): boolean { + return isAsciiAlpha(cp) || cp === DASH || cp === UNDERSCORE; +} + +function isIdentifierPart(cp: CodePoint): boolean { + return isAsciiAlphanumeric(cp) || cp === DASH || cp === UNDERSCORE; +} diff --git a/src/languages/lisp/syntax.ts b/src/languages/lisp/syntax.ts new file mode 100644 index 0000000..d377730 --- /dev/null +++ b/src/languages/lisp/syntax.ts @@ -0,0 +1,152 @@ +import type { CodePointSpan } from 'source-region'; +import type { ParseError } from './parse_errors'; + +export type ConcreteInfo = { span: CodePointSpan }; + +export type ConcreteError = ConcreteErrorNode[] // Convention: can't be empty. +export type ConcreteErrorNode = { + span: CodePointSpan, + error: ParseError, + panickedOver?: CodePointSpan, +} + +export namespace ConcreteError { + export function single(node: ConcreteErrorNode): ConcreteError { + return [node]; + } +} + +export type DelimiterToken = + | { tag: "open-paren"; span: CodePointSpan } + | { tag: "close-paren"; span: CodePointSpan } + | { tag: "open-bracket"; span: CodePointSpan } + | { tag: "close-bracket"; span: CodePointSpan }; + +export namespace DelimiterToken { + export function openParen(span: CodePointSpan): DelimiterToken { + return { tag: "open-paren", span }; + } + + export function closeParen(span: CodePointSpan): DelimiterToken { + return { tag: "close-paren", span }; + } + + export function openBracket(span: CodePointSpan): DelimiterToken { + return { tag: "open-bracket", span }; + } + + export function closeBracket(span: CodePointSpan): DelimiterToken { + return { tag: "close-bracket", span }; + } +} + +export type Program = { + tag: "program", + expressions: Expr[], + error?: Error, +} & Info + +export type Expr = +| Literal +| List +| { tag: "error-expression", error: Error } & Info // This is for errors that don't really correspond to any sort of node. Unknown errors. + +export type List = + { tag: "list", open: DelimiterToken, items: ListItem[], close?: DelimiterToken, error?: Error } & Info + +export type ListItem = +| Expr +| { tag: "error-list-separator", error: Error } & Info + +export type Literal = +// === number === +| { tag: "number", value: number } & Info +| { tag: "error-number", error: Error } & Info +// === identifier === +| { tag: "identifier", value: Identifier } & Info +| { tag: "error-identifier", error: Error } & Info + +export type Identifier = string + +export namespace Program { + export function make( + expressions: Expr[], + info: Info, + error?: Error, + ): Program { + return error === undefined + ? { tag: "program", expressions, ...info } + : { tag: "program", expressions, error, ...info }; + } + + export function show(program: Program): string { + return program.expressions.map(Expr.show).join(" "); + } +} + +export namespace Expr { + export function number(value: number, span: CodePointSpan): Expr { + return { tag: "number", value, span }; + } + + export function errorNumber(error: ConcreteError, span: CodePointSpan): Expr { + return { tag: "error-number", error, span }; + } + + export function identifier(value: Identifier, span: CodePointSpan): Expr { + return { tag: "identifier", value, span }; + } + + export function errorIdentifier(error: ConcreteError, span: CodePointSpan): Expr { + return { tag: "error-identifier", error, span }; + } + + export function list( + open: DelimiterToken, + items: ListItem[], + span: CodePointSpan, + close?: DelimiterToken, + error?: ConcreteError, + ): Expr { + return { tag: "list", open, items, close, error, span }; + } + + export function errorExpression(error: ConcreteError, span: CodePointSpan): Expr { + return { tag: "error-expression", error, span }; + } + + export function show(expr: Expr): string { + switch (expr.tag) { + case "number": + return `${expr.value}`; + case "identifier": + return expr.value; + case "error-number": + return ""; + case "error-identifier": + return ""; + case "error-expression": + return ""; + case "list": + return showList(expr); + } + } + + function showList(list: List): string { + const open = list.open.tag === "open-bracket" ? "[" : "("; + const close = list.open.tag === "open-bracket" ? "]" : ")"; + const sep = list.open.tag === "open-bracket" ? ", " : " "; + return `${open}${list.items.map(ListItem.show).join(sep)}${close}`; + } +} + +export namespace ListItem { + export function errorSeparator(error: ConcreteError, span: CodePointSpan): ListItem { + return { tag: "error-list-separator", error, span }; + } + + export function show(item: ListItem): string { + if (item.tag === "error-list-separator") return ""; + return Expr.show(item); + } +} diff --git a/src/parse_errors.ts b/src/parse_errors.ts index d53f775..e25c7c1 100644 --- a/src/parse_errors.ts +++ b/src/parse_errors.ts @@ -1,40 +1 @@ -import type { CodePoint, CodePointSpan } from 'source-region'; - -export type ParseError = -| { - tag: "expected-expression"; - span: CodePointSpan; - found: FoundSyntax; - } -| { - tag: "expected-close-delimiter"; - span: CodePointSpan; - open: CodePointSpan; - expected: "paren" | "bracket"; - found: FoundSyntax; - } -| { - tag: "unexpected-close-delimiter"; - span: CodePointSpan; - delimiter: "paren" | "bracket"; - } -| { - tag: "expected-list-separator"; - span: CodePointSpan; - found: FoundSyntax; - } -| { - tag: "unexpected-code-point"; - span: CodePointSpan; - found: FoundSyntax; - } -| { - tag: "invalid-number"; - span: CodePointSpan; - text: string; - reason: "unsafe-integer" | "identifier-suffix"; - }; - -export type FoundSyntax = -| { tag: "code-point"; value: CodePoint; span: CodePointSpan } -| { tag: "eof"; span: CodePointSpan }; +export * from './languages/lisp/parse_errors'; diff --git a/src/parser.experiments.ts b/src/parser.experiments.ts index 21b0678..83262b4 100644 --- a/src/parser.experiments.ts +++ b/src/parser.experiments.ts @@ -1,78 +1 @@ -import { CodePointString, sourceText } from 'source-region'; -import { parseDocument, programOf } from './parser'; -import { matchCodePointString } from './recognizers'; -import { Program } from './syntax'; - -// === Experiments === - -function experiment00_emptyDocument(): void { - logParse("empty document", ""); -} - -function experiment01_topLevelExpressions(): void { - logParse("top-level expressions", "foo 123 (bar baz_1 qux-2) [a, b, c]"); -} - -function experiment02_nestedLists(): void { - logParse("nested lists", "(define square (_ x) (* x x))"); -} - -function experiment03_unclosedList(): void { - logParse("unclosed list", "(foo 123\n (bar 456)"); -} - -function experiment04_recoverAtDocumentLevel(): void { - logParse("document recovery", "foo ) @@@ (bar 1) 99"); -} - -function experiment05_recoverInsideList(): void { - logParse("list recovery", "(foo @@@ 1 (bar # 2) baz)"); -} - -function experiment06_unicodeSpans(): void { - logParse("unicode spans", "alpha 💥 (beta 2)"); -} - -function experiment07_matchCodePointString(): void { - const region = sourceText("λx").fullRegion(); - const cursor = region.makeCursor(); - const lambda = CodePointString.makeFromString("λ"); - console.log("==== recognizer:match code point string ===="); - console.dir(matchCodePointString(cursor, lambda), { depth: null }); - console.log("cursor", cursor.current()); -} - -function experiment08_squareListSeparator(): void { - logParse("square list separator", "[a, b c, d]"); -} - -function experiment09_invalidNumberFragment(): void { - logParse("invalid number fragment", "123fasd"); -} - -function experiment10_repeatedLeadingComma(): void { - logParse("repeated leading comma", "[, , foo, bar]"); -} - -function logParse(name: string, input: string): void { - const region = sourceText(input).fullRegion(); - const result = parseDocument(region); - console.log(`==== parser:${name} ====`); - console.log(input); - console.log(result.syntax.tag, Program.show(programOf(result.syntax))); - console.dir(result.errors, { depth: null }); -} - -[ - experiment00_emptyDocument, - experiment01_topLevelExpressions, - experiment02_nestedLists, - experiment03_unclosedList, - experiment04_recoverAtDocumentLevel, - experiment05_recoverInsideList, - experiment06_unicodeSpans, - experiment07_matchCodePointString, - experiment08_squareListSeparator, - experiment09_invalidNumberFragment, - experiment10_repeatedLeadingComma, -].forEach((experiment) => experiment()); +import './languages/lisp/experiments'; diff --git a/src/parser.ts b/src/parser.ts index ea33ed8..692a212 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1,400 +1 @@ -import { - SourceCursor, - char, - isAsciiAlpha, - isAsciiAlphanumeric, - isAsciiWhitespace, - isDigit, -} from 'source-region'; -import type { - CodePoint, - CodePointIndex, - CodePointSpan, - SourceRegion, -} from 'source-region'; -import type { FoundSyntax, ParseError } from './parse_errors'; -import { consumeWhile, consumeWhile1, skipWhile } from './recognizers'; -import { - ConcreteError, - DelimiterToken, - Expr, - ListItem, - Program, -} from './syntax'; -import type { - ConcreteInfo, - ListItem as ListItemType, - Expr as ExprType, -} from './syntax'; - -// Whitespace convention: -// - parseDocument consumes leading whitespace before each top-level expression. -// - parseExpr assumes leading whitespace has already been consumed. -// - Successful expression parsers stop immediately after the expression. -// - list parsers own whitespace between list items and before the closing delimiter. -// -// Recovery policy: -// - Unknown expressions consume at least one code point, then panic until a -// delimiter, whitespace, or plausible expression start. -// - Round lists do not require separators. -// - Square lists require commas between neighboring expressions, but allow -// optional leading and trailing commas. -// -// Span convention: -// - Parser internals and diagnostics use CodePointSpan. -// - Rendering can convert these later with SourceText.getSpan. - -const OPEN_PAREN = char('('); -const CLOSE_PAREN = char(')'); -const OPEN_BRACKET = char('['); -const CLOSE_BRACKET = char(']'); -const COMMA = char(','); -const DASH = char('-'); -const UNDERSCORE = char('_'); - - -export type ConcreteSyntaxResult = -| { tag: "valid", value: ValidConcreteSyntax } -| { tag: "invalid", value: PartialConcreteSyntax } - -export type ParseDocumentResult = { - syntax: ConcreteSyntaxResult; - errors: ParseError[]; -}; - -// The main constraints are -// - `ValidConcreteSyntax` should be a subtype of `PartialConcreteSyntax` -// - if `PartialConcreteSyntax` doesn't contain any sort of error nodes, we should be able to coerce it to `ValidConcreteSyntax` without rebuilding the whole tree -export type ValidConcreteSyntax = Program -export type PartialConcreteSyntax = Program -type PartialExpr = ExprType; -type PartialListItem = ListItemType; - -export namespace ConcreteSyntaxResult { - export function valid(value: ValidConcreteSyntax): ConcreteSyntaxResult { - return { tag: "valid", value }; - } - - export function invalid(value: PartialConcreteSyntax): ConcreteSyntaxResult { - return { tag: "invalid", value }; - } -} - -export function programOf(result: ConcreteSyntaxResult): PartialConcreteSyntax { - return result.value; -} - - -export function parseDocument(region: SourceRegion): ParseDocumentResult { - return new Parser(region).parseDocument(); -} - -class Parser { - private readonly cursor: SourceCursor; - private readonly errors: ParseError[] = []; - - constructor(private readonly region: SourceRegion) { - this.cursor = region.makeCursor(); - } - - parseDocument(): ParseDocumentResult { - const expressions: PartialExpr[] = []; - - while (true) { - this.skipWhitespace(); - if (this.cursor.isAtEnd()) break; - - expressions.push(this.parseExpr()); - } - - const program = Program.make(expressions, { span: this.region.codePointSpan }); - return { - syntax: this.errors.length === 0 - ? ConcreteSyntaxResult.valid(program as ValidConcreteSyntax) - : ConcreteSyntaxResult.invalid(program as PartialConcreteSyntax), - errors: this.errors, - }; - } - - private parseExpr(): PartialExpr { - const cp = this.cursor.peek(); - - if (cp === undefined) { - return this.errorExpression(this.makeError({ - tag: "expected-expression", - span: this.cursor.eofSpan(), - found: this.found(), - })); - } - - if (cp === CLOSE_PAREN || cp === CLOSE_BRACKET) { - const delimiter = cp === CLOSE_PAREN ? "paren" : "bracket"; - const span = this.cursor.currentSpan(); - this.cursor.advance(); - return this.errorExpression(this.makeError({ - tag: "unexpected-close-delimiter", - span, - delimiter, - })); - } - - if (cp === OPEN_PAREN) return this.parseRoundList(); - if (cp === OPEN_BRACKET) return this.parseSquareList(); - if (isDigit(cp)) return this.parseNumber(); - if (isIdentifierStart(cp)) return this.parseIdentifier(); - - return this.parseUnknownExpression(); - } - - private parseRoundList(): PartialExpr { - const start = this.cursor.checkpoint(); - const open = DelimiterToken.openParen(this.cursor.currentSpan()); - this.cursor.advance(); - - const items: PartialListItem[] = []; - - while (true) { - this.skipWhitespace(); - - const cp = this.cursor.peek(); - if (cp === CLOSE_PAREN) { - const close = DelimiterToken.closeParen(this.cursor.currentSpan()); - this.cursor.advance(); - return Expr.list(open, items, this.cursor.spanFrom(start), close); - } - - if (cp === CLOSE_BRACKET) { - const close = DelimiterToken.closeBracket(this.cursor.currentSpan()); - const error = this.makeError({ - tag: "expected-close-delimiter", - span: this.cursor.currentSpan(), - open: open.span, - expected: "paren", - found: this.found(), - }); - this.cursor.advance(); - return Expr.list(open, items, this.cursor.spanFrom(start), close, error); - } - - if (cp === undefined) { - const error = this.makeError({ - tag: "expected-close-delimiter", - span: this.cursor.eofSpan(), - open: open.span, - expected: "paren", - found: this.found(), - }); - return Expr.list(open, items, this.cursor.spanFrom(start), undefined, error); - } - - items.push(this.parseExpr()); - } - } - - private parseSquareList(): PartialExpr { - const start = this.cursor.checkpoint(); - const open = DelimiterToken.openBracket(this.cursor.currentSpan()); - this.cursor.advance(); - - const items: PartialListItem[] = []; - let sawExpression = false; - let sawLeadingComma = false; - let needsSeparator = false; - - while (true) { - this.skipWhitespace(); - - const cp = this.cursor.peek(); - if (cp === CLOSE_BRACKET) { - const close = DelimiterToken.closeBracket(this.cursor.currentSpan()); - this.cursor.advance(); - return Expr.list(open, items, this.cursor.spanFrom(start), close); - } - - if (cp === CLOSE_PAREN) { - const close = DelimiterToken.closeParen(this.cursor.currentSpan()); - const error = this.makeError({ - tag: "expected-close-delimiter", - span: this.cursor.currentSpan(), - open: open.span, - expected: "bracket", - found: this.found(), - }); - this.cursor.advance(); - return Expr.list(open, items, this.cursor.spanFrom(start), close, error); - } - - if (cp === undefined) { - const error = this.makeError({ - tag: "expected-close-delimiter", - span: this.cursor.eofSpan(), - open: open.span, - expected: "bracket", - found: this.found(), - }); - return Expr.list(open, items, this.cursor.spanFrom(start), undefined, error); - } - - if (needsSeparator) { - if (cp === COMMA) { - this.cursor.advance(); - needsSeparator = false; - continue; - } - - const error = this.makeError({ - tag: "expected-list-separator", - span: this.cursor.currentSpan(), - found: this.found(), - }); - items.push(ListItem.errorSeparator(error, this.cursor.currentSpan())); - needsSeparator = false; - continue; - } - - if (cp === COMMA) { - const commaSpan = this.cursor.currentSpan(); - this.cursor.advance(); - - if (sawExpression) { - const error = this.makeError({ - tag: "expected-expression", - span: commaSpan, - found: { tag: "code-point", value: COMMA, span: commaSpan }, - }); - items.push(this.errorExpression(error, commaSpan)); - } else if (sawLeadingComma) { - const error = this.makeError({ - tag: "expected-expression", - span: commaSpan, - found: { tag: "code-point", value: COMMA, span: commaSpan }, - }); - items.push(this.errorExpression(error, commaSpan)); - } else { - sawLeadingComma = true; - } - - continue; - } - - items.push(this.parseExpr()); - sawExpression = true; - needsSeparator = true; - } - } - - private parseNumber(): PartialExpr { - const start = this.cursor.checkpoint(); - const match = consumeWhile1(this.cursor, isDigit); - if (match.tag === "none") { - throw new Error("parseNumber called when cursor is not at a number"); - } - - if (isIdentifierStart(this.cursor.peek() ?? -1)) { - consumeWhile(this.cursor, isIdentifierPart); - const span = this.cursor.spanFrom(start); - const text = this.cursor.slice(span); - return Expr.errorNumber(this.makeError({ - tag: "invalid-number", - span, - text, - reason: "identifier-suffix", - }), span); - } - - const { span, text } = match; - const value = Number(text); - - if (!Number.isSafeInteger(value)) { - return Expr.errorNumber(this.makeError({ - tag: "invalid-number", - span, - text, - reason: "unsafe-integer", - }), span); - } - - return Expr.number(value, span); - } - - private parseIdentifier(): PartialExpr { - const start = this.cursor.checkpoint(); - this.cursor.advance(); - - consumeWhile(this.cursor, isIdentifierPart); - const span = this.cursor.spanFrom(start); - return Expr.identifier(this.cursor.slice(span), span); - } - - private parseUnknownExpression(): PartialExpr { - const start = this.cursor.checkpoint(); - const focus = this.cursor.currentSpan(); - - this.cursor.advance(); - while (true) { - const cp = this.cursor.peek(); - if ( - cp === undefined - || isAsciiWhitespace(cp) - || isClosingDelimiter(cp) - || isExpressionStart(cp) - ) { - break; - } - this.cursor.advance(); - } - - const panickedOver = this.cursor.spanFrom(start); - const error = this.makeError({ - tag: "expected-expression", - span: focus, - found: { tag: "code-point", value: this.region.source.codePointAt(focus.start), span: focus }, - }, panickedOver); - - return this.errorExpression(error, panickedOver); - } - - private skipWhitespace(): void { - skipWhile(this.cursor, isAsciiWhitespace); - } - - private found(): FoundSyntax { - const cp = this.cursor.peek(); - if (cp === undefined) return { tag: "eof", span: this.cursor.eofSpan() }; - return { tag: "code-point", value: cp, span: this.cursor.currentSpan() }; - } - - private makeError(error: ParseError, panickedOver?: CodePointSpan): ConcreteError { - this.errors.push(error); - return ConcreteError.single({ - span: error.span, - error, - panickedOver, - }); - } - - private errorExpression(error: ConcreteError, span?: CodePointSpan): PartialExpr { - return Expr.errorExpression(error, span ?? error[0].span); - } -} - -function isExpressionStart(cp: CodePoint | undefined): boolean { - return cp !== undefined && ( - cp === OPEN_PAREN - || cp === OPEN_BRACKET - || isDigit(cp) - || isIdentifierStart(cp) - ); -} - -function isClosingDelimiter(cp: CodePoint): boolean { - return cp === CLOSE_PAREN || cp === CLOSE_BRACKET; -} - -function isIdentifierStart(cp: CodePoint): boolean { - return isAsciiAlpha(cp) || cp === DASH || cp === UNDERSCORE; -} - -function isIdentifierPart(cp: CodePoint): boolean { - return isAsciiAlphanumeric(cp) || cp === DASH || cp === UNDERSCORE; -} +export * from './languages/lisp/parser'; diff --git a/src/styles/layout.css b/src/styles/layout.css index 3763aa0..369a43c 100644 --- a/src/styles/layout.css +++ b/src/styles/layout.css @@ -1,8 +1,33 @@ +.app-root { + height: 100vh; + display: grid; + grid-template-rows: auto 1fr; + min-width: 1200px; +} + +.language-bar { + display: flex; + align-items: center; + gap: var(--gap-2); + padding: var(--gap-2) var(--gap-4); + border-bottom: 1px solid var(--border); + background: var(--panel); + color: var(--text-muted); + font-size: var(--text-sm); +} + +.language-bar select { + border: 1px solid var(--border); + color: var(--text); + background: var(--panel-raised); + padding: var(--gap-1) var(--gap-2); +} + .app-shell { display: grid; grid-template-columns: var(--left-width) 0.45rem var(--middle-width) 0.45rem minmax(360px, 1fr); gap: var(--gap-2); - height: 100vh; + min-height: 0; padding: var(--gap-4); } diff --git a/src/syntax.ts b/src/syntax.ts index d377730..b7e3214 100644 --- a/src/syntax.ts +++ b/src/syntax.ts @@ -1,152 +1 @@ -import type { CodePointSpan } from 'source-region'; -import type { ParseError } from './parse_errors'; - -export type ConcreteInfo = { span: CodePointSpan }; - -export type ConcreteError = ConcreteErrorNode[] // Convention: can't be empty. -export type ConcreteErrorNode = { - span: CodePointSpan, - error: ParseError, - panickedOver?: CodePointSpan, -} - -export namespace ConcreteError { - export function single(node: ConcreteErrorNode): ConcreteError { - return [node]; - } -} - -export type DelimiterToken = - | { tag: "open-paren"; span: CodePointSpan } - | { tag: "close-paren"; span: CodePointSpan } - | { tag: "open-bracket"; span: CodePointSpan } - | { tag: "close-bracket"; span: CodePointSpan }; - -export namespace DelimiterToken { - export function openParen(span: CodePointSpan): DelimiterToken { - return { tag: "open-paren", span }; - } - - export function closeParen(span: CodePointSpan): DelimiterToken { - return { tag: "close-paren", span }; - } - - export function openBracket(span: CodePointSpan): DelimiterToken { - return { tag: "open-bracket", span }; - } - - export function closeBracket(span: CodePointSpan): DelimiterToken { - return { tag: "close-bracket", span }; - } -} - -export type Program = { - tag: "program", - expressions: Expr[], - error?: Error, -} & Info - -export type Expr = -| Literal -| List -| { tag: "error-expression", error: Error } & Info // This is for errors that don't really correspond to any sort of node. Unknown errors. - -export type List = - { tag: "list", open: DelimiterToken, items: ListItem[], close?: DelimiterToken, error?: Error } & Info - -export type ListItem = -| Expr -| { tag: "error-list-separator", error: Error } & Info - -export type Literal = -// === number === -| { tag: "number", value: number } & Info -| { tag: "error-number", error: Error } & Info -// === identifier === -| { tag: "identifier", value: Identifier } & Info -| { tag: "error-identifier", error: Error } & Info - -export type Identifier = string - -export namespace Program { - export function make( - expressions: Expr[], - info: Info, - error?: Error, - ): Program { - return error === undefined - ? { tag: "program", expressions, ...info } - : { tag: "program", expressions, error, ...info }; - } - - export function show(program: Program): string { - return program.expressions.map(Expr.show).join(" "); - } -} - -export namespace Expr { - export function number(value: number, span: CodePointSpan): Expr { - return { tag: "number", value, span }; - } - - export function errorNumber(error: ConcreteError, span: CodePointSpan): Expr { - return { tag: "error-number", error, span }; - } - - export function identifier(value: Identifier, span: CodePointSpan): Expr { - return { tag: "identifier", value, span }; - } - - export function errorIdentifier(error: ConcreteError, span: CodePointSpan): Expr { - return { tag: "error-identifier", error, span }; - } - - export function list( - open: DelimiterToken, - items: ListItem[], - span: CodePointSpan, - close?: DelimiterToken, - error?: ConcreteError, - ): Expr { - return { tag: "list", open, items, close, error, span }; - } - - export function errorExpression(error: ConcreteError, span: CodePointSpan): Expr { - return { tag: "error-expression", error, span }; - } - - export function show(expr: Expr): string { - switch (expr.tag) { - case "number": - return `${expr.value}`; - case "identifier": - return expr.value; - case "error-number": - return ""; - case "error-identifier": - return ""; - case "error-expression": - return ""; - case "list": - return showList(expr); - } - } - - function showList(list: List): string { - const open = list.open.tag === "open-bracket" ? "[" : "("; - const close = list.open.tag === "open-bracket" ? "]" : ")"; - const sep = list.open.tag === "open-bracket" ? ", " : " "; - return `${open}${list.items.map(ListItem.show).join(sep)}${close}`; - } -} - -export namespace ListItem { - export function errorSeparator(error: ConcreteError, span: CodePointSpan): ListItem { - return { tag: "error-list-separator", error, span }; - } - - export function show(item: ListItem): string { - if (item.tag === "error-list-separator") return ""; - return Expr.show(item); - } -} +export * from './languages/lisp/syntax'; diff --git a/src/ui/App.tsx b/src/ui/App.tsx index 611f605..a9363b9 100644 --- a/src/ui/App.tsx +++ b/src/ui/App.tsx @@ -1,121 +1,29 @@ -import { createMemo, createSignal } from 'solid-js'; -import { sourceText } from 'source-region'; -import type { CodePointSpan, SourceRegion, SourceText } from 'source-region'; -import { parseDocument, programOf } from '../parser'; -import type { ConcreteSyntaxResult, PartialConcreteSyntax } from '../parser'; -import type { ParseError } from '../parse_errors'; -import { spanLabel } from './format'; -import { PaneHeader, PaneSplitter } from './Pane'; -import { SourceGrid } from './SourceGrid'; -import type { SourceGridAnnotation } from './SourceGrid'; -import { StructureTree } from './SyntaxPane'; -import type { HoverTarget } from './types'; +import { createSignal, Switch, Match } from 'solid-js'; +import { LispApp } from './languages/lisp/LispApp'; -type ParsedDocument = { - source: SourceText; - region: SourceRegion; - syntax: ConcreteSyntaxResult; - program: PartialConcreteSyntax; - errors: ParseError[]; -}; - -const SAMPLE_INPUT = `(define square (_ x) (mul x x)) - -[add, 1, 2] - -(define pyth (_ x y) (+ (square x) (square y))) - -foo ) @@@ (bar 1) -(nested [list, 123, abc_9, name-with-dash]) -[a, b c, d] -123fasd`; +type LanguageId = "lisp"; export function App() { - const [input, setInput] = createSignal(SAMPLE_INPUT); - const [hovered, setHovered] = createSignal(); - const [leftWidth, setLeftWidth] = createSignal(420); - const [middleWidth, setMiddleWidth] = createSignal(420); - - const parsed = createMemo(() => { - const source = sourceText(input()); - const region = source.fullRegion(); - const result = parseDocument(region); - return { source, region, syntax: result.syntax, program: programOf(result.syntax), errors: result.errors }; - }); + const [language, setLanguage] = createSignal("lisp"); return ( -
-
- -