Introduce recognizers
This commit is contained in:
parent
a56020cd9f
commit
bb9ca93f4e
4 changed files with 91 additions and 16 deletions
|
|
@ -1,5 +1,6 @@
|
|||
import { sourceText } from 'source-region';
|
||||
import { CodePointString, sourceText } from 'source-region';
|
||||
import { parseDocument } from './parser';
|
||||
import { matchCodePointString } from './recognizers';
|
||||
import { Expr } from './syntax';
|
||||
|
||||
// === Experiments ===
|
||||
|
|
@ -32,6 +33,15 @@ function experiment06_unicodeSpans(): void {
|
|||
logParse("unicode spans", "alpha 💥 (beta 2)");
|
||||
}
|
||||
|
||||
function experiment07_matchCodePointString(): void {
|
||||
const region = sourceText("λx").fullRegion();
|
||||
const cursor = region.makeCursor();
|
||||
const lambda = CodePointString.makeFromString("λ");
|
||||
console.log("==== recognizer:match code point string ====");
|
||||
console.dir(matchCodePointString(cursor, lambda), { depth: null });
|
||||
console.log("cursor", cursor.current());
|
||||
}
|
||||
|
||||
function logParse(name: string, input: string): void {
|
||||
const region = sourceText(input).fullRegion();
|
||||
const result = parseDocument(region);
|
||||
|
|
@ -49,4 +59,5 @@ function logParse(name: string, input: string): void {
|
|||
experiment04_recoverAtDocumentLevel,
|
||||
experiment05_recoverInsideList,
|
||||
experiment06_unicodeSpans,
|
||||
experiment07_matchCodePointString,
|
||||
].forEach((experiment) => experiment());
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ import type {
|
|||
CodePointSpan,
|
||||
SourceRegion,
|
||||
} from 'source-region';
|
||||
import { consumeWhile, consumeWhile1, skipWhile } from './recognizers';
|
||||
import { ConcreteSyntax } from './syntax';
|
||||
|
||||
// Whitespace convention:
|
||||
|
|
@ -82,7 +83,7 @@ class Parser {
|
|||
private readonly errors: ParseError[] = [];
|
||||
|
||||
constructor(private readonly region: SourceRegion) {
|
||||
this.cursor = new SourceCursor(region);
|
||||
this.cursor = region.makeCursor();
|
||||
}
|
||||
|
||||
parseDocument(): ParseDocumentResult {
|
||||
|
|
@ -176,14 +177,12 @@ class Parser {
|
|||
}
|
||||
|
||||
private parseNumber(): ConcreteSyntax {
|
||||
const start = this.cursor.checkpoint();
|
||||
|
||||
while (isDigit(this.cursor.peek() ?? -1)) {
|
||||
this.cursor.advance();
|
||||
const match = consumeWhile1(this.cursor, isDigit);
|
||||
if (match.tag === "none") {
|
||||
throw new Error("parseNumber called when cursor is not at a number");
|
||||
}
|
||||
|
||||
const span = this.cursor.spanFrom(start);
|
||||
const text = this.cursor.slice(span);
|
||||
const { span, text } = match;
|
||||
const value = Number(text);
|
||||
|
||||
if (!Number.isSafeInteger(value)) {
|
||||
|
|
@ -202,10 +201,7 @@ class Parser {
|
|||
const start = this.cursor.checkpoint();
|
||||
this.cursor.advance();
|
||||
|
||||
while (isIdentifierPart(this.cursor.peek() ?? -1)) {
|
||||
this.cursor.advance();
|
||||
}
|
||||
|
||||
consumeWhile(this.cursor, isIdentifierPart);
|
||||
const span = this.cursor.spanFrom(start);
|
||||
return ConcreteSyntax.identifier(this.cursor.slice(span), span);
|
||||
}
|
||||
|
|
@ -240,9 +236,7 @@ class Parser {
|
|||
}
|
||||
|
||||
private skipWhitespace(): void {
|
||||
while (isAsciiWhitespace(this.cursor.peek() ?? -1)) {
|
||||
this.cursor.advance();
|
||||
}
|
||||
skipWhile(this.cursor, isAsciiWhitespace);
|
||||
}
|
||||
|
||||
private found(): FoundSyntax {
|
||||
|
|
|
|||
70
src/recognizers.ts
Normal file
70
src/recognizers.ts
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
import type { CodePoint, CodePointSpan, CodePointString, SourceCursor } from 'source-region';
|
||||
|
||||
export type TextMatch =
|
||||
| { tag: "match"; span: CodePointSpan; text: string }
|
||||
| { tag: "none" };
|
||||
|
||||
export namespace TextMatch {
|
||||
export function match(span: CodePointSpan, text: string): TextMatch {
|
||||
return { tag: "match", span, text };
|
||||
}
|
||||
|
||||
export function none(): TextMatch {
|
||||
return { tag: "none" };
|
||||
}
|
||||
}
|
||||
|
||||
export function consumeWhile(
|
||||
cursor: SourceCursor,
|
||||
predicate: (cp: CodePoint) => boolean,
|
||||
): CodePointSpan {
|
||||
const start = cursor.checkpoint();
|
||||
|
||||
while (true) {
|
||||
const cp = cursor.peek();
|
||||
if (cp === undefined || !predicate(cp)) break;
|
||||
cursor.advance();
|
||||
}
|
||||
|
||||
return cursor.spanFrom(start);
|
||||
}
|
||||
|
||||
export function consumeWhile1(
|
||||
cursor: SourceCursor,
|
||||
predicate: (cp: CodePoint) => boolean,
|
||||
): TextMatch {
|
||||
const start = cursor.checkpoint();
|
||||
const span = consumeWhile(cursor, predicate);
|
||||
|
||||
if (span.start === span.end) {
|
||||
cursor.restore(start);
|
||||
return TextMatch.none();
|
||||
}
|
||||
|
||||
return TextMatch.match(span, cursor.slice(span));
|
||||
}
|
||||
|
||||
export function skipWhile(
|
||||
cursor: SourceCursor,
|
||||
predicate: (cp: CodePoint) => boolean,
|
||||
): CodePointSpan {
|
||||
return consumeWhile(cursor, predicate);
|
||||
}
|
||||
|
||||
export function matchCodePointString(
|
||||
cursor: SourceCursor,
|
||||
pattern: CodePointString,
|
||||
): TextMatch {
|
||||
const start = cursor.checkpoint();
|
||||
|
||||
for (const expected of pattern.codePoints) {
|
||||
if (cursor.peek() !== expected) {
|
||||
cursor.restore(start);
|
||||
return TextMatch.none();
|
||||
}
|
||||
cursor.advance();
|
||||
}
|
||||
|
||||
const span = cursor.spanFrom(start);
|
||||
return TextMatch.match(span, cursor.slice(span));
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue