Introduce recognizers

This commit is contained in:
Yura Dupyn 2026-04-25 11:55:46 +02:00
parent a56020cd9f
commit bb9ca93f4e
4 changed files with 91 additions and 16 deletions

View file

@ -1,5 +1,6 @@
import { sourceText } from 'source-region';
import { CodePointString, sourceText } from 'source-region';
import { parseDocument } from './parser';
import { matchCodePointString } from './recognizers';
import { Expr } from './syntax';
// === Experiments ===
@ -32,6 +33,15 @@ function experiment06_unicodeSpans(): void {
logParse("unicode spans", "alpha 💥 (beta 2)");
}
function experiment07_matchCodePointString(): void {
const region = sourceText("λx").fullRegion();
const cursor = region.makeCursor();
const lambda = CodePointString.makeFromString("λ");
console.log("==== recognizer:match code point string ====");
console.dir(matchCodePointString(cursor, lambda), { depth: null });
console.log("cursor", cursor.current());
}
function logParse(name: string, input: string): void {
const region = sourceText(input).fullRegion();
const result = parseDocument(region);
@ -49,4 +59,5 @@ function logParse(name: string, input: string): void {
experiment04_recoverAtDocumentLevel,
experiment05_recoverInsideList,
experiment06_unicodeSpans,
experiment07_matchCodePointString,
].forEach((experiment) => experiment());

View file

@ -12,6 +12,7 @@ import type {
CodePointSpan,
SourceRegion,
} from 'source-region';
import { consumeWhile, consumeWhile1, skipWhile } from './recognizers';
import { ConcreteSyntax } from './syntax';
// Whitespace convention:
@ -82,7 +83,7 @@ class Parser {
private readonly errors: ParseError[] = [];
constructor(private readonly region: SourceRegion) {
this.cursor = new SourceCursor(region);
this.cursor = region.makeCursor();
}
parseDocument(): ParseDocumentResult {
@ -176,14 +177,12 @@ class Parser {
}
private parseNumber(): ConcreteSyntax {
const start = this.cursor.checkpoint();
while (isDigit(this.cursor.peek() ?? -1)) {
this.cursor.advance();
const match = consumeWhile1(this.cursor, isDigit);
if (match.tag === "none") {
throw new Error("parseNumber called when cursor is not at a number");
}
const span = this.cursor.spanFrom(start);
const text = this.cursor.slice(span);
const { span, text } = match;
const value = Number(text);
if (!Number.isSafeInteger(value)) {
@ -202,10 +201,7 @@ class Parser {
const start = this.cursor.checkpoint();
this.cursor.advance();
while (isIdentifierPart(this.cursor.peek() ?? -1)) {
this.cursor.advance();
}
consumeWhile(this.cursor, isIdentifierPart);
const span = this.cursor.spanFrom(start);
return ConcreteSyntax.identifier(this.cursor.slice(span), span);
}
@ -240,9 +236,7 @@ class Parser {
}
private skipWhitespace(): void {
while (isAsciiWhitespace(this.cursor.peek() ?? -1)) {
this.cursor.advance();
}
skipWhile(this.cursor, isAsciiWhitespace);
}
private found(): FoundSyntax {

70
src/recognizers.ts Normal file
View file

@ -0,0 +1,70 @@
import type { CodePoint, CodePointSpan, CodePointString, SourceCursor } from 'source-region';
export type TextMatch =
| { tag: "match"; span: CodePointSpan; text: string }
| { tag: "none" };
export namespace TextMatch {
export function match(span: CodePointSpan, text: string): TextMatch {
return { tag: "match", span, text };
}
export function none(): TextMatch {
return { tag: "none" };
}
}
export function consumeWhile(
cursor: SourceCursor,
predicate: (cp: CodePoint) => boolean,
): CodePointSpan {
const start = cursor.checkpoint();
while (true) {
const cp = cursor.peek();
if (cp === undefined || !predicate(cp)) break;
cursor.advance();
}
return cursor.spanFrom(start);
}
export function consumeWhile1(
cursor: SourceCursor,
predicate: (cp: CodePoint) => boolean,
): TextMatch {
const start = cursor.checkpoint();
const span = consumeWhile(cursor, predicate);
if (span.start === span.end) {
cursor.restore(start);
return TextMatch.none();
}
return TextMatch.match(span, cursor.slice(span));
}
export function skipWhile(
cursor: SourceCursor,
predicate: (cp: CodePoint) => boolean,
): CodePointSpan {
return consumeWhile(cursor, predicate);
}
export function matchCodePointString(
cursor: SourceCursor,
pattern: CodePointString,
): TextMatch {
const start = cursor.checkpoint();
for (const expected of pattern.codePoints) {
if (cursor.peek() !== expected) {
cursor.restore(start);
return TextMatch.none();
}
cursor.advance();
}
const span = cursor.spanFrom(start);
return TextMatch.match(span, cursor.slice(span));
}