diff --git a/libs/source-region b/libs/source-region index 9c72959..3ec7005 160000 --- a/libs/source-region +++ b/libs/source-region @@ -1 +1 @@ -Subproject commit 9c72959cd398909139137b0831a19c2e05161fe2 +Subproject commit 3ec70051987a74bcc3e885e59a19536fc9c77772 diff --git a/src/parser.experiments.ts b/src/parser.experiments.ts index 96f283c..9c5469f 100644 --- a/src/parser.experiments.ts +++ b/src/parser.experiments.ts @@ -1,5 +1,6 @@ -import { sourceText } from 'source-region'; +import { CodePointString, sourceText } from 'source-region'; import { parseDocument } from './parser'; +import { matchCodePointString } from './recognizers'; import { Expr } from './syntax'; // === Experiments === @@ -32,6 +33,15 @@ function experiment06_unicodeSpans(): void { logParse("unicode spans", "alpha 💥 (beta 2)"); } +function experiment07_matchCodePointString(): void { + const region = sourceText("λx").fullRegion(); + const cursor = region.makeCursor(); + const lambda = CodePointString.makeFromString("λ"); + console.log("==== recognizer:match code point string ===="); + console.dir(matchCodePointString(cursor, lambda), { depth: null }); + console.log("cursor", cursor.current()); +} + function logParse(name: string, input: string): void { const region = sourceText(input).fullRegion(); const result = parseDocument(region); @@ -49,4 +59,5 @@ function logParse(name: string, input: string): void { experiment04_recoverAtDocumentLevel, experiment05_recoverInsideList, experiment06_unicodeSpans, + experiment07_matchCodePointString, ].forEach((experiment) => experiment()); diff --git a/src/parser.ts b/src/parser.ts index 85f49cd..b967d30 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -12,6 +12,7 @@ import type { CodePointSpan, SourceRegion, } from 'source-region'; +import { consumeWhile, consumeWhile1, skipWhile } from './recognizers'; import { ConcreteSyntax } from './syntax'; // Whitespace convention: @@ -82,7 +83,7 @@ class Parser { private readonly errors: ParseError[] = []; constructor(private readonly region: SourceRegion) { - this.cursor = new SourceCursor(region); + this.cursor = region.makeCursor(); } parseDocument(): ParseDocumentResult { @@ -176,14 +177,12 @@ class Parser { } private parseNumber(): ConcreteSyntax { - const start = this.cursor.checkpoint(); - - while (isDigit(this.cursor.peek() ?? -1)) { - this.cursor.advance(); + const match = consumeWhile1(this.cursor, isDigit); + if (match.tag === "none") { + throw new Error("parseNumber called when cursor is not at a number"); } - const span = this.cursor.spanFrom(start); - const text = this.cursor.slice(span); + const { span, text } = match; const value = Number(text); if (!Number.isSafeInteger(value)) { @@ -202,10 +201,7 @@ class Parser { const start = this.cursor.checkpoint(); this.cursor.advance(); - while (isIdentifierPart(this.cursor.peek() ?? -1)) { - this.cursor.advance(); - } - + consumeWhile(this.cursor, isIdentifierPart); const span = this.cursor.spanFrom(start); return ConcreteSyntax.identifier(this.cursor.slice(span), span); } @@ -240,9 +236,7 @@ class Parser { } private skipWhitespace(): void { - while (isAsciiWhitespace(this.cursor.peek() ?? -1)) { - this.cursor.advance(); - } + skipWhile(this.cursor, isAsciiWhitespace); } private found(): FoundSyntax { diff --git a/src/recognizers.ts b/src/recognizers.ts new file mode 100644 index 0000000..51d42d8 --- /dev/null +++ b/src/recognizers.ts @@ -0,0 +1,70 @@ +import type { CodePoint, CodePointSpan, CodePointString, SourceCursor } from 'source-region'; + +export type TextMatch = +| { tag: "match"; span: CodePointSpan; text: string } +| { tag: "none" }; + +export namespace TextMatch { + export function match(span: CodePointSpan, text: string): TextMatch { + return { tag: "match", span, text }; + } + + export function none(): TextMatch { + return { tag: "none" }; + } +} + +export function consumeWhile( + cursor: SourceCursor, + predicate: (cp: CodePoint) => boolean, +): CodePointSpan { + const start = cursor.checkpoint(); + + while (true) { + const cp = cursor.peek(); + if (cp === undefined || !predicate(cp)) break; + cursor.advance(); + } + + return cursor.spanFrom(start); +} + +export function consumeWhile1( + cursor: SourceCursor, + predicate: (cp: CodePoint) => boolean, +): TextMatch { + const start = cursor.checkpoint(); + const span = consumeWhile(cursor, predicate); + + if (span.start === span.end) { + cursor.restore(start); + return TextMatch.none(); + } + + return TextMatch.match(span, cursor.slice(span)); +} + +export function skipWhile( + cursor: SourceCursor, + predicate: (cp: CodePoint) => boolean, +): CodePointSpan { + return consumeWhile(cursor, predicate); +} + +export function matchCodePointString( + cursor: SourceCursor, + pattern: CodePointString, +): TextMatch { + const start = cursor.checkpoint(); + + for (const expected of pattern.codePoints) { + if (cursor.peek() !== expected) { + cursor.restore(start); + return TextMatch.none(); + } + cursor.advance(); + } + + const span = cursor.spanFrom(start); + return TextMatch.match(span, cursor.slice(span)); +}