diff --git a/README.md b/README.md index 5d92fc7..7c9a8fd 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,5 @@ TypeScript library for handling source code strings without having to deal with intricacies of JS's UTF16 encoding. -# CodePointString -A wrapper for a string that's just an array of codepoints. There's no newline or offset tracking to the original string. - # SourceText A sane, UTF-16-safe string wrapper specifically designed for parsing source code, tracking line numbers, and generating CLI error messages. Think of it as a fat wrapper for a string that understand more info about the string like line structure. @@ -22,8 +19,6 @@ It also allows for Spatial Tracking or various sub-regions within the source. It - `SourceLocation` is basically a smart 2D coordinate equivalent to `(line, col)` (but also tracks `CodePointIndex`) - `Span` an interval determined by `start` and `end` SourceLocations -# Source Cursor -- `SourceCursor` is a mutable cursor over `SourceRegion`. Primarily useful to build parsers on top of `SourceRegion`. It is line-aware. # Rendering CLI Errors Secondary functionality is `function renderSpan(region: SourceRegion, span: Span, contextLines = 1): LineView[]` which is able to render spans of source-code as follows diff --git a/src/index.ts b/src/index.ts index 2db4378..82ed23a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -21,14 +21,13 @@ export const DIGIT_9: CodePoint = char('9'); export const DOT: CodePoint = char('.'); // Hex Boundaries -export const LOWERCASE_A: CodePoint = char('a'); +export const LOWERCASE_a: CodePoint = char('a'); export const UPPERCASE_A: CodePoint = char('A'); -export const LOWERCASE_F: CodePoint = char('f'); +export const LOWERCASE_f: CodePoint = char('f'); export const UPPERCASE_F: CodePoint = char('F'); -export const LOWERCASE_Z: CodePoint = char('z'); +export const LOWERCASE_z: CodePoint = char('z'); export const UPPERCASE_Z: CodePoint = char('Z'); -// === Predicates === export function isBetween(a: CodePoint, x: CodePoint, b: CodePoint): boolean { return a <= x && x <= b; @@ -39,7 +38,7 @@ export function isDigit(x: CodePoint): boolean { } export function isAsciiAlpha(x: CodePoint): boolean { - return isBetween(LOWERCASE_A, x, LOWERCASE_Z) + return isBetween(LOWERCASE_a, x, LOWERCASE_z) || isBetween(UPPERCASE_A, x, UPPERCASE_Z); } @@ -47,17 +46,6 @@ export function isAsciiAlphanumeric(x: CodePoint): boolean { return isAsciiAlpha(x) || isDigit(x); } -export function isAsciiWhitespace(cp: CodePoint): boolean { - return cp === SPACE - || cp === TAB - || cp === NEW_LINE - || cp === CARRIAGE_RETURN; -} - -export function isAsciiInlineWhitespace(cp: CodePoint): boolean { - return cp === SPACE || cp === TAB; -} - export type CodePointRef = { char: CodePoint, offset: StringIndex, @@ -68,51 +56,7 @@ export type CodePointSpan = { end: CodePointIndex, } -// === CodePointString === -export class CodePointString { - readonly codePoints: readonly CodePoint[]; - - constructor(source: string) { - const codePointsInternal: CodePoint[] = []; - let i = 0; - while (i < source.length) { - const char = source.codePointAt(i) as CodePoint; - codePointsInternal.push(char); - - const size =(char > 0xFFFF ? 2 : 1); - i += size; - } - this.codePoints = Object.freeze(codePointsInternal); - } - - static makeFromString(s: string): CodePointString { - return new CodePointString(s); - } - - codePointAt(index: CodePointIndex): CodePoint { - return this.codePoints[index]; - } - - get length(): CodePointIndex { - return this.codePoints.length; - } - - toString(): string { - let result = ""; - for (const cp of this.codePoints) { - result += String.fromCodePoint(cp); - } - return result; - } -} - // === Source Text === -// TODO: -// @deprecated and say to use `SourceText.makeFromString` instead. -export function sourceText(s: string): SourceText { - return SourceText.makeFromString(s); -} - export class SourceText { readonly source: string; // TODO: Later you can try to change this to two `Uint32Array`s - one for codepoints (each 20 bit but whatever), the other for pointers to original string. @@ -121,10 +65,6 @@ export class SourceText { // Stores the CodePointIndex where each line begins readonly lineStarts: CodePointIndex[]; - static makeFromString(s: string): SourceText { - return new SourceText(s); - } - constructor(rawSource: string) { // TODO: This shouldn't really be a concern of the library. // const source = rawSource.normalize('NFC'); @@ -288,29 +228,24 @@ export class SourceText { return this.sliceByCp(startCp, endCp); } - - tryGetLineRange(line: number): CodePointSpan | undefined { + getLineRange(line: number): { start: CodePointIndex, end: CodePointIndex } { const lineIndex = line - 1; - if (lineIndex < 0 || lineIndex >= this.lineStarts.length) { - return undefined; + // TODO: This is a bit suspicious. Maybe return undefined? + return { start: 0, end: 0 }; } const start = this.lineStarts[lineIndex]; const end = (lineIndex + 1 < this.lineStarts.length) ? this.lineStarts[lineIndex + 1] : this.#chars.length; - - return rawSpan(start, end); + + return { start, end }; } +} - getLineRange(line: number): { start: CodePointIndex, end: CodePointIndex } { - const range = this.tryGetLineRange(line); - if (range === undefined) { - throw new Error(`Line ${line} is out of bounds (line count: ${this.lineCount})`); - } - return range; - } +export function sourceText(s: string): SourceText { + return new SourceText(s); } // Creates a Span from two SourceLocations. @@ -361,10 +296,6 @@ export class SourceRegion { return span(loc, loc); } - get codePointSpan(): CodePointSpan { - return rawSpan(this.span.start.index, this.span.end.index); - } - *codePoints(): IterableIterator<[CodePointIndex, CodePoint]> { const start = this.span.start.index; const end = this.span.end.index; @@ -435,90 +366,6 @@ export type SourceLocation = { column: number; // 1-based } -export function containsSpan(outer: CodePointSpan, inner: CodePointSpan): boolean { - return outer.start <= inner.start && inner.end <= outer.end; -} - -export function containsIndex(span: CodePointSpan, index: CodePointIndex): boolean { - return span.start <= index && index < span.end; -} - -// === Cursor === - -export class SourceCursor { - private index: CodePointIndex; - - constructor(public readonly region: SourceRegion) { - this.index = region.span.start.index; - } - - current(): CodePointIndex { - return this.index; - } - - checkpoint(): CodePointIndex { - return this.index; - } - - restore(index: CodePointIndex) { - this.index = index; - } - - peek(): CodePoint | undefined { - if (this.index >= this.region.span.end.index) return undefined; - return this.region.codePointAt(this.index); - } - - advance(): CodePoint | undefined { - const cp = this.peek(); - if (cp === undefined) return undefined; - this.index += 1; - return cp; - } - - isAtEnd(): boolean { - return this.index >= this.region.span.end.index; - } - - spanFrom(start: CodePointIndex): CodePointSpan { - return rawSpan(start, this.index); - } - - currentSpan(): CodePointSpan { - return this.isAtEnd() - ? pointSpan(this.index) - : rawSpan(this.index, this.index + 1); - } - - eofSpan(): CodePointSpan { - return pointSpan(this.region.span.end.index); - } - - slice(span: CodePointSpan): string { - return this.region.slice(span); - } - - - location(): SourceLocation { - return this.region.source.getLocation(this.index); - } - - moveToNextLineStart(): void { - const loc = this.region.source.getLocation(this.index); - const nextLine = loc.line + 1; - - if (nextLine > this.region.span.end.line) { - this.index = this.region.span.end.index; - return; - } - - const range = this.region.source.getLineRange(nextLine); - this.index = Math.min(range.start, this.region.span.end.index); - } -} - - - // === Rendering Utilities === export type LineView = {