diff --git a/libs/source-region b/libs/source-region index 8471c60..9c72959 160000 --- a/libs/source-region +++ b/libs/source-region @@ -1 +1 @@ -Subproject commit 8471c60967eca6178d25fa3221035286a19856f2 +Subproject commit 9c72959cd398909139137b0831a19c2e05161fe2 diff --git a/src/parser.ts b/src/parser.ts index 2673f74..85f49cd 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1,9 +1,9 @@ import { - CARRIAGE_RETURN, - NEW_LINE, - SPACE, - TAB, + SourceCursor, char, + isAsciiAlpha, + isAsciiAlphanumeric, + isAsciiWhitespace, isDigit, } from 'source-region'; import type { @@ -35,10 +35,6 @@ const OPEN_PAREN = char('('); const CLOSE_PAREN = char(')'); const DASH = char('-'); const UNDERSCORE = char('_'); -const LOWERCASE_A = char('a'); -const LOWERCASE_Z = char('z'); -const UPPERCASE_A = char('A'); -const UPPERCASE_Z = char('Z'); export type ParseDocumentResult = { values: ConcreteSyntax[]; @@ -82,11 +78,11 @@ export function parseDocument(region: SourceRegion): ParseDocumentResult { } class Parser { - private index: CodePointIndex; + private readonly cursor: SourceCursor; private readonly errors: ParseError[] = []; constructor(private readonly region: SourceRegion) { - this.index = region.span.start.index; + this.cursor = new SourceCursor(region); } parseDocument(): ParseDocumentResult { @@ -94,9 +90,9 @@ class Parser { while (true) { this.skipWhitespace(); - if (this.isAtEnd()) break; + if (this.cursor.isAtEnd()) break; - const before = this.index; + const before = this.cursor.checkpoint(); const value = this.parseExpr(); if (value) { values.push(value); @@ -110,12 +106,12 @@ class Parser { } private parseExpr(): ConcreteSyntax | undefined { - const cp = this.peek(); + const cp = this.cursor.peek(); if (cp === undefined) { this.errors.push({ tag: "expected-expression", - span: this.eofSpan(), + span: this.cursor.eofSpan(), found: this.found(), }); return undefined; @@ -124,7 +120,7 @@ class Parser { if (cp === CLOSE_PAREN) { this.errors.push({ tag: "unexpected-close-paren", - span: this.currentSpan(), + span: this.cursor.currentSpan(), }); return undefined; } @@ -135,16 +131,16 @@ class Parser { this.errors.push({ tag: "expected-expression", - span: this.currentSpan(), + span: this.cursor.currentSpan(), found: this.found(), }); return undefined; } private parseList(): ConcreteSyntax | undefined { - const start = this.index; - const openParen = this.currentSpan(); - this.advance(); + const start = this.cursor.checkpoint(); + const openParen = this.cursor.currentSpan(); + this.cursor.advance(); const values: ConcreteSyntax[] = []; @@ -152,23 +148,23 @@ class Parser { while (true) { this.skipWhitespace(); - const cp = this.peek(); + const cp = this.cursor.peek(); if (cp === CLOSE_PAREN) { - this.advance(); - return ConcreteSyntax.list(values, this.spanFrom(start)); + this.cursor.advance(); + return ConcreteSyntax.list(values, this.cursor.spanFrom(start)); } if (cp === undefined) { this.errors.push({ tag: "expected-close-paren", - span: this.eofSpan(), + span: this.cursor.eofSpan(), openParen, found: this.found(), }); - return ConcreteSyntax.list(values, this.spanFrom(start)); + return ConcreteSyntax.list(values, this.cursor.spanFrom(start)); } - const before = this.index; + const before = this.cursor.checkpoint(); const value = this.parseExpr(); if (value) { values.push(value); @@ -180,14 +176,14 @@ class Parser { } private parseNumber(): ConcreteSyntax { - const start = this.index; + const start = this.cursor.checkpoint(); - while (isDigit(this.peekOrInvalid())) { - this.advance(); + while (isDigit(this.cursor.peek() ?? -1)) { + this.cursor.advance(); } - const span = this.spanFrom(start); - const text = this.slice(span); + const span = this.cursor.spanFrom(start); + const text = this.cursor.slice(span); const value = Number(text); if (!Number.isSafeInteger(value)) { @@ -203,99 +199,57 @@ class Parser { } private parseIdentifier(): ConcreteSyntax { - const start = this.index; - this.advance(); + const start = this.cursor.checkpoint(); + this.cursor.advance(); - while (isIdentifierPart(this.peekOrInvalid())) { - this.advance(); + while (isIdentifierPart(this.cursor.peek() ?? -1)) { + this.cursor.advance(); } - const span = this.spanFrom(start); - return ConcreteSyntax.identifier(this.slice(span), span); + const span = this.cursor.spanFrom(start); + return ConcreteSyntax.identifier(this.cursor.slice(span), span); } private recoverDocument(failedAt: CodePointIndex): void { - if (this.index === failedAt) this.advance(); + if (this.cursor.current() === failedAt) this.cursor.advance(); - while (!this.isAtEnd()) { - const cp = this.peek(); + while (!this.cursor.isAtEnd()) { + const cp = this.cursor.peek(); if (cp === CLOSE_PAREN) { this.errors.push({ tag: "unexpected-close-paren", - span: this.currentSpan(), + span: this.cursor.currentSpan(), }); - this.advance(); + this.cursor.advance(); return; } if (isExpressionStart(cp)) return; - this.advance(); + this.cursor.advance(); } } private recoverList(failedAt: CodePointIndex): void { - if (this.index === failedAt) this.advance(); + if (this.cursor.current() === failedAt) this.cursor.advance(); - while (!this.isAtEnd()) { - const cp = this.peek(); + while (!this.cursor.isAtEnd()) { + const cp = this.cursor.peek(); if (cp === CLOSE_PAREN || isExpressionStart(cp)) return; - this.advance(); + this.cursor.advance(); } } private skipWhitespace(): void { - while (isWhitespace(this.peekOrInvalid())) { - this.advance(); + while (isAsciiWhitespace(this.cursor.peek() ?? -1)) { + this.cursor.advance(); } } - private peek(): CodePoint | undefined { - if (this.index >= this.region.span.end.index) return undefined; - return this.region.codePointAt(this.index); - } - - private peekOrInvalid(): CodePoint { - return this.peek() ?? -1; - } - - private advance(): CodePoint | undefined { - const cp = this.peek(); - if (cp === undefined) return undefined; - this.index += 1; - return cp; - } - - private isAtEnd(): boolean { - return this.index >= this.region.span.end.index; - } - - private spanFrom(start: CodePointIndex): CodePointSpan { - return { start, end: this.index }; - } - - private currentSpan(): CodePointSpan { - const start = this.index; - const end = this.isAtEnd() ? start : start + 1; - return { start, end }; - } - - private eofSpan(): CodePointSpan { - return { start: this.region.span.end.index, end: this.region.span.end.index }; - } - private found(): FoundSyntax { - const cp = this.peek(); - if (cp === undefined) return { tag: "eof", span: this.eofSpan() }; - return { tag: "code-point", value: cp, span: this.currentSpan() }; + const cp = this.cursor.peek(); + if (cp === undefined) return { tag: "eof", span: this.cursor.eofSpan() }; + return { tag: "code-point", value: cp, span: this.cursor.currentSpan() }; } - - private slice(span: CodePointSpan): string { - return this.region.source.sliceByCp(span.start, span.end); - } -} - -function isWhitespace(cp: CodePoint): boolean { - return cp === SPACE || cp === TAB || cp === NEW_LINE || cp === CARRIAGE_RETURN; } function isExpressionStart(cp: CodePoint | undefined): boolean { @@ -303,14 +257,9 @@ function isExpressionStart(cp: CodePoint | undefined): boolean { } function isIdentifierStart(cp: CodePoint): boolean { - return isAsciiLetter(cp) || cp === DASH || cp === UNDERSCORE; + return isAsciiAlpha(cp) || cp === DASH || cp === UNDERSCORE; } function isIdentifierPart(cp: CodePoint): boolean { - return isIdentifierStart(cp) || isDigit(cp); -} - -function isAsciiLetter(cp: CodePoint): boolean { - return (LOWERCASE_A <= cp && cp <= LOWERCASE_Z) - || (UPPERCASE_A <= cp && cp <= UPPERCASE_Z); + return isAsciiAlphanumeric(cp) || cp === DASH || cp === UNDERSCORE; } diff --git a/src/ui/SourceGrid.tsx b/src/ui/SourceGrid.tsx index f560ac3..0e59f60 100644 --- a/src/ui/SourceGrid.tsx +++ b/src/ui/SourceGrid.tsx @@ -5,6 +5,7 @@ import { NEW_LINE, SPACE, TAB, + containsIndex, } from 'source-region'; import type { CodePoint, @@ -123,8 +124,8 @@ function makeSourceGrid(source: SourceText, region: SourceRegion): SourceGridMod for (let lineNo = region.span.start.line; lineNo <= region.span.end.line; lineNo++) { const range = source.getLineRange(lineNo); - const start = Math.max(range.start, region.span.start.index); - const end = Math.min(range.end, region.span.end.index); + const start = Math.max(range.start, region.codePointSpan.start); + const end = Math.min(range.end, region.codePointSpan.end); const cells: SourceGridCell[] = []; for (let index = start; index < end; index++) { @@ -183,8 +184,7 @@ function cellTitle(cell: SourceGridCell, annotations: SourceGridAnnotation[]): s function annotationsForCell(cell: SourceGridCell, annotations: SourceGridAnnotation[]): SourceGridAnnotation[] { return annotations.filter((annotation) => annotation.span.start < annotation.span.end - && annotation.span.start <= cell.index - && cell.index < annotation.span.end + && containsIndex(annotation.span, cell.index) ); }