Introduce recognizers
This commit is contained in:
parent
a56020cd9f
commit
bb9ca93f4e
4 changed files with 91 additions and 16 deletions
|
|
@ -1 +1 @@
|
||||||
Subproject commit 9c72959cd398909139137b0831a19c2e05161fe2
|
Subproject commit 3ec70051987a74bcc3e885e59a19536fc9c77772
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
import { sourceText } from 'source-region';
|
import { CodePointString, sourceText } from 'source-region';
|
||||||
import { parseDocument } from './parser';
|
import { parseDocument } from './parser';
|
||||||
|
import { matchCodePointString } from './recognizers';
|
||||||
import { Expr } from './syntax';
|
import { Expr } from './syntax';
|
||||||
|
|
||||||
// === Experiments ===
|
// === Experiments ===
|
||||||
|
|
@ -32,6 +33,15 @@ function experiment06_unicodeSpans(): void {
|
||||||
logParse("unicode spans", "alpha 💥 (beta 2)");
|
logParse("unicode spans", "alpha 💥 (beta 2)");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function experiment07_matchCodePointString(): void {
|
||||||
|
const region = sourceText("λx").fullRegion();
|
||||||
|
const cursor = region.makeCursor();
|
||||||
|
const lambda = CodePointString.makeFromString("λ");
|
||||||
|
console.log("==== recognizer:match code point string ====");
|
||||||
|
console.dir(matchCodePointString(cursor, lambda), { depth: null });
|
||||||
|
console.log("cursor", cursor.current());
|
||||||
|
}
|
||||||
|
|
||||||
function logParse(name: string, input: string): void {
|
function logParse(name: string, input: string): void {
|
||||||
const region = sourceText(input).fullRegion();
|
const region = sourceText(input).fullRegion();
|
||||||
const result = parseDocument(region);
|
const result = parseDocument(region);
|
||||||
|
|
@ -49,4 +59,5 @@ function logParse(name: string, input: string): void {
|
||||||
experiment04_recoverAtDocumentLevel,
|
experiment04_recoverAtDocumentLevel,
|
||||||
experiment05_recoverInsideList,
|
experiment05_recoverInsideList,
|
||||||
experiment06_unicodeSpans,
|
experiment06_unicodeSpans,
|
||||||
|
experiment07_matchCodePointString,
|
||||||
].forEach((experiment) => experiment());
|
].forEach((experiment) => experiment());
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ import type {
|
||||||
CodePointSpan,
|
CodePointSpan,
|
||||||
SourceRegion,
|
SourceRegion,
|
||||||
} from 'source-region';
|
} from 'source-region';
|
||||||
|
import { consumeWhile, consumeWhile1, skipWhile } from './recognizers';
|
||||||
import { ConcreteSyntax } from './syntax';
|
import { ConcreteSyntax } from './syntax';
|
||||||
|
|
||||||
// Whitespace convention:
|
// Whitespace convention:
|
||||||
|
|
@ -82,7 +83,7 @@ class Parser {
|
||||||
private readonly errors: ParseError[] = [];
|
private readonly errors: ParseError[] = [];
|
||||||
|
|
||||||
constructor(private readonly region: SourceRegion) {
|
constructor(private readonly region: SourceRegion) {
|
||||||
this.cursor = new SourceCursor(region);
|
this.cursor = region.makeCursor();
|
||||||
}
|
}
|
||||||
|
|
||||||
parseDocument(): ParseDocumentResult {
|
parseDocument(): ParseDocumentResult {
|
||||||
|
|
@ -176,14 +177,12 @@ class Parser {
|
||||||
}
|
}
|
||||||
|
|
||||||
private parseNumber(): ConcreteSyntax {
|
private parseNumber(): ConcreteSyntax {
|
||||||
const start = this.cursor.checkpoint();
|
const match = consumeWhile1(this.cursor, isDigit);
|
||||||
|
if (match.tag === "none") {
|
||||||
while (isDigit(this.cursor.peek() ?? -1)) {
|
throw new Error("parseNumber called when cursor is not at a number");
|
||||||
this.cursor.advance();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const span = this.cursor.spanFrom(start);
|
const { span, text } = match;
|
||||||
const text = this.cursor.slice(span);
|
|
||||||
const value = Number(text);
|
const value = Number(text);
|
||||||
|
|
||||||
if (!Number.isSafeInteger(value)) {
|
if (!Number.isSafeInteger(value)) {
|
||||||
|
|
@ -202,10 +201,7 @@ class Parser {
|
||||||
const start = this.cursor.checkpoint();
|
const start = this.cursor.checkpoint();
|
||||||
this.cursor.advance();
|
this.cursor.advance();
|
||||||
|
|
||||||
while (isIdentifierPart(this.cursor.peek() ?? -1)) {
|
consumeWhile(this.cursor, isIdentifierPart);
|
||||||
this.cursor.advance();
|
|
||||||
}
|
|
||||||
|
|
||||||
const span = this.cursor.spanFrom(start);
|
const span = this.cursor.spanFrom(start);
|
||||||
return ConcreteSyntax.identifier(this.cursor.slice(span), span);
|
return ConcreteSyntax.identifier(this.cursor.slice(span), span);
|
||||||
}
|
}
|
||||||
|
|
@ -240,9 +236,7 @@ class Parser {
|
||||||
}
|
}
|
||||||
|
|
||||||
private skipWhitespace(): void {
|
private skipWhitespace(): void {
|
||||||
while (isAsciiWhitespace(this.cursor.peek() ?? -1)) {
|
skipWhile(this.cursor, isAsciiWhitespace);
|
||||||
this.cursor.advance();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private found(): FoundSyntax {
|
private found(): FoundSyntax {
|
||||||
|
|
|
||||||
70
src/recognizers.ts
Normal file
70
src/recognizers.ts
Normal file
|
|
@ -0,0 +1,70 @@
|
||||||
|
import type { CodePoint, CodePointSpan, CodePointString, SourceCursor } from 'source-region';
|
||||||
|
|
||||||
|
export type TextMatch =
|
||||||
|
| { tag: "match"; span: CodePointSpan; text: string }
|
||||||
|
| { tag: "none" };
|
||||||
|
|
||||||
|
export namespace TextMatch {
|
||||||
|
export function match(span: CodePointSpan, text: string): TextMatch {
|
||||||
|
return { tag: "match", span, text };
|
||||||
|
}
|
||||||
|
|
||||||
|
export function none(): TextMatch {
|
||||||
|
return { tag: "none" };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function consumeWhile(
|
||||||
|
cursor: SourceCursor,
|
||||||
|
predicate: (cp: CodePoint) => boolean,
|
||||||
|
): CodePointSpan {
|
||||||
|
const start = cursor.checkpoint();
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const cp = cursor.peek();
|
||||||
|
if (cp === undefined || !predicate(cp)) break;
|
||||||
|
cursor.advance();
|
||||||
|
}
|
||||||
|
|
||||||
|
return cursor.spanFrom(start);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function consumeWhile1(
|
||||||
|
cursor: SourceCursor,
|
||||||
|
predicate: (cp: CodePoint) => boolean,
|
||||||
|
): TextMatch {
|
||||||
|
const start = cursor.checkpoint();
|
||||||
|
const span = consumeWhile(cursor, predicate);
|
||||||
|
|
||||||
|
if (span.start === span.end) {
|
||||||
|
cursor.restore(start);
|
||||||
|
return TextMatch.none();
|
||||||
|
}
|
||||||
|
|
||||||
|
return TextMatch.match(span, cursor.slice(span));
|
||||||
|
}
|
||||||
|
|
||||||
|
export function skipWhile(
|
||||||
|
cursor: SourceCursor,
|
||||||
|
predicate: (cp: CodePoint) => boolean,
|
||||||
|
): CodePointSpan {
|
||||||
|
return consumeWhile(cursor, predicate);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function matchCodePointString(
|
||||||
|
cursor: SourceCursor,
|
||||||
|
pattern: CodePointString,
|
||||||
|
): TextMatch {
|
||||||
|
const start = cursor.checkpoint();
|
||||||
|
|
||||||
|
for (const expected of pattern.codePoints) {
|
||||||
|
if (cursor.peek() !== expected) {
|
||||||
|
cursor.restore(start);
|
||||||
|
return TextMatch.none();
|
||||||
|
}
|
||||||
|
cursor.advance();
|
||||||
|
}
|
||||||
|
|
||||||
|
const span = cursor.spanFrom(start);
|
||||||
|
return TextMatch.match(span, cursor.slice(span));
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue