Compare commits
4 commits
f72575ae54
...
3ec7005198
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3ec7005198 | ||
|
|
ec6ba36220 | ||
|
|
9c72959cd3 | ||
|
|
85bc9b05e1 |
2 changed files with 171 additions and 13 deletions
|
|
@ -1,5 +1,8 @@
|
|||
TypeScript library for handling source code strings without having to deal with intricacies of JS's UTF16 encoding.
|
||||
|
||||
# CodePointString
|
||||
A wrapper for a string that's just an array of codepoints. There's no newline or offset tracking to the original string.
|
||||
|
||||
# SourceText
|
||||
A sane, UTF-16-safe string wrapper specifically designed for parsing source code, tracking line numbers, and generating CLI error messages.
|
||||
Think of it as a fat wrapper for a string that understand more info about the string like line structure.
|
||||
|
|
@ -19,6 +22,8 @@ It also allows for Spatial Tracking or various sub-regions within the source. It
|
|||
- `SourceLocation` is basically a smart 2D coordinate equivalent to `(line, col)` (but also tracks `CodePointIndex`)
|
||||
- `Span` an interval determined by `start` and `end` SourceLocations
|
||||
|
||||
# Source Cursor
|
||||
- `SourceCursor` is a mutable cursor over `SourceRegion`. Primarily useful to build parsers on top of `SourceRegion`. It is line-aware.
|
||||
|
||||
# Rendering CLI Errors
|
||||
Secondary functionality is `function renderSpan(region: SourceRegion, span: Span, contextLines = 1): LineView[]` which is able to render spans of source-code as follows
|
||||
|
|
|
|||
179
src/index.ts
179
src/index.ts
|
|
@ -21,13 +21,14 @@ export const DIGIT_9: CodePoint = char('9');
|
|||
export const DOT: CodePoint = char('.');
|
||||
|
||||
// Hex Boundaries
|
||||
export const LOWERCASE_a: CodePoint = char('a');
|
||||
export const LOWERCASE_A: CodePoint = char('a');
|
||||
export const UPPERCASE_A: CodePoint = char('A');
|
||||
export const LOWERCASE_f: CodePoint = char('f');
|
||||
export const LOWERCASE_F: CodePoint = char('f');
|
||||
export const UPPERCASE_F: CodePoint = char('F');
|
||||
export const LOWERCASE_z: CodePoint = char('z');
|
||||
export const LOWERCASE_Z: CodePoint = char('z');
|
||||
export const UPPERCASE_Z: CodePoint = char('Z');
|
||||
|
||||
// === Predicates ===
|
||||
|
||||
export function isBetween(a: CodePoint, x: CodePoint, b: CodePoint): boolean {
|
||||
return a <= x && x <= b;
|
||||
|
|
@ -38,7 +39,7 @@ export function isDigit(x: CodePoint): boolean {
|
|||
}
|
||||
|
||||
export function isAsciiAlpha(x: CodePoint): boolean {
|
||||
return isBetween(LOWERCASE_a, x, LOWERCASE_z)
|
||||
return isBetween(LOWERCASE_A, x, LOWERCASE_Z)
|
||||
|| isBetween(UPPERCASE_A, x, UPPERCASE_Z);
|
||||
}
|
||||
|
||||
|
|
@ -46,6 +47,17 @@ export function isAsciiAlphanumeric(x: CodePoint): boolean {
|
|||
return isAsciiAlpha(x) || isDigit(x);
|
||||
}
|
||||
|
||||
export function isAsciiWhitespace(cp: CodePoint): boolean {
|
||||
return cp === SPACE
|
||||
|| cp === TAB
|
||||
|| cp === NEW_LINE
|
||||
|| cp === CARRIAGE_RETURN;
|
||||
}
|
||||
|
||||
export function isAsciiInlineWhitespace(cp: CodePoint): boolean {
|
||||
return cp === SPACE || cp === TAB;
|
||||
}
|
||||
|
||||
export type CodePointRef = {
|
||||
char: CodePoint,
|
||||
offset: StringIndex,
|
||||
|
|
@ -56,7 +68,51 @@ export type CodePointSpan = {
|
|||
end: CodePointIndex,
|
||||
}
|
||||
|
||||
// === CodePointString ===
|
||||
export class CodePointString {
|
||||
readonly codePoints: readonly CodePoint[];
|
||||
|
||||
constructor(source: string) {
|
||||
const codePointsInternal: CodePoint[] = [];
|
||||
let i = 0;
|
||||
while (i < source.length) {
|
||||
const char = source.codePointAt(i) as CodePoint;
|
||||
codePointsInternal.push(char);
|
||||
|
||||
const size =(char > 0xFFFF ? 2 : 1);
|
||||
i += size;
|
||||
}
|
||||
this.codePoints = Object.freeze(codePointsInternal);
|
||||
}
|
||||
|
||||
static makeFromString(s: string): CodePointString {
|
||||
return new CodePointString(s);
|
||||
}
|
||||
|
||||
codePointAt(index: CodePointIndex): CodePoint {
|
||||
return this.codePoints[index];
|
||||
}
|
||||
|
||||
get length(): CodePointIndex {
|
||||
return this.codePoints.length;
|
||||
}
|
||||
|
||||
toString(): string {
|
||||
let result = "";
|
||||
for (const cp of this.codePoints) {
|
||||
result += String.fromCodePoint(cp);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// === Source Text ===
|
||||
// TODO:
|
||||
// @deprecated and say to use `SourceText.makeFromString` instead.
|
||||
export function sourceText(s: string): SourceText {
|
||||
return SourceText.makeFromString(s);
|
||||
}
|
||||
|
||||
export class SourceText {
|
||||
readonly source: string;
|
||||
// TODO: Later you can try to change this to two `Uint32Array`s - one for codepoints (each 20 bit but whatever), the other for pointers to original string.
|
||||
|
|
@ -65,6 +121,10 @@ export class SourceText {
|
|||
// Stores the CodePointIndex where each line begins
|
||||
readonly lineStarts: CodePointIndex[];
|
||||
|
||||
static makeFromString(s: string): SourceText {
|
||||
return new SourceText(s);
|
||||
}
|
||||
|
||||
constructor(rawSource: string) {
|
||||
// TODO: This shouldn't really be a concern of the library.
|
||||
// const source = rawSource.normalize('NFC');
|
||||
|
|
@ -228,24 +288,29 @@ export class SourceText {
|
|||
return this.sliceByCp(startCp, endCp);
|
||||
}
|
||||
|
||||
getLineRange(line: number): { start: CodePointIndex, end: CodePointIndex } {
|
||||
|
||||
tryGetLineRange(line: number): CodePointSpan | undefined {
|
||||
const lineIndex = line - 1;
|
||||
|
||||
if (lineIndex < 0 || lineIndex >= this.lineStarts.length) {
|
||||
// TODO: This is a bit suspicious. Maybe return undefined?
|
||||
return { start: 0, end: 0 };
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const start = this.lineStarts[lineIndex];
|
||||
const end = (lineIndex + 1 < this.lineStarts.length)
|
||||
? this.lineStarts[lineIndex + 1]
|
||||
: this.#chars.length;
|
||||
|
||||
return { start, end };
|
||||
}
|
||||
}
|
||||
|
||||
export function sourceText(s: string): SourceText {
|
||||
return new SourceText(s);
|
||||
return rawSpan(start, end);
|
||||
}
|
||||
|
||||
getLineRange(line: number): { start: CodePointIndex, end: CodePointIndex } {
|
||||
const range = this.tryGetLineRange(line);
|
||||
if (range === undefined) {
|
||||
throw new Error(`Line ${line} is out of bounds (line count: ${this.lineCount})`);
|
||||
}
|
||||
return range;
|
||||
}
|
||||
}
|
||||
|
||||
// Creates a Span from two SourceLocations.
|
||||
|
|
@ -296,6 +361,10 @@ export class SourceRegion {
|
|||
return span(loc, loc);
|
||||
}
|
||||
|
||||
get codePointSpan(): CodePointSpan {
|
||||
return rawSpan(this.span.start.index, this.span.end.index);
|
||||
}
|
||||
|
||||
*codePoints(): IterableIterator<[CodePointIndex, CodePoint]> {
|
||||
const start = this.span.start.index;
|
||||
const end = this.span.end.index;
|
||||
|
|
@ -366,6 +435,90 @@ export type SourceLocation = {
|
|||
column: number; // 1-based
|
||||
}
|
||||
|
||||
export function containsSpan(outer: CodePointSpan, inner: CodePointSpan): boolean {
|
||||
return outer.start <= inner.start && inner.end <= outer.end;
|
||||
}
|
||||
|
||||
export function containsIndex(span: CodePointSpan, index: CodePointIndex): boolean {
|
||||
return span.start <= index && index < span.end;
|
||||
}
|
||||
|
||||
// === Cursor ===
|
||||
|
||||
export class SourceCursor {
|
||||
private index: CodePointIndex;
|
||||
|
||||
constructor(public readonly region: SourceRegion) {
|
||||
this.index = region.span.start.index;
|
||||
}
|
||||
|
||||
current(): CodePointIndex {
|
||||
return this.index;
|
||||
}
|
||||
|
||||
checkpoint(): CodePointIndex {
|
||||
return this.index;
|
||||
}
|
||||
|
||||
restore(index: CodePointIndex) {
|
||||
this.index = index;
|
||||
}
|
||||
|
||||
peek(): CodePoint | undefined {
|
||||
if (this.index >= this.region.span.end.index) return undefined;
|
||||
return this.region.codePointAt(this.index);
|
||||
}
|
||||
|
||||
advance(): CodePoint | undefined {
|
||||
const cp = this.peek();
|
||||
if (cp === undefined) return undefined;
|
||||
this.index += 1;
|
||||
return cp;
|
||||
}
|
||||
|
||||
isAtEnd(): boolean {
|
||||
return this.index >= this.region.span.end.index;
|
||||
}
|
||||
|
||||
spanFrom(start: CodePointIndex): CodePointSpan {
|
||||
return rawSpan(start, this.index);
|
||||
}
|
||||
|
||||
currentSpan(): CodePointSpan {
|
||||
return this.isAtEnd()
|
||||
? pointSpan(this.index)
|
||||
: rawSpan(this.index, this.index + 1);
|
||||
}
|
||||
|
||||
eofSpan(): CodePointSpan {
|
||||
return pointSpan(this.region.span.end.index);
|
||||
}
|
||||
|
||||
slice(span: CodePointSpan): string {
|
||||
return this.region.slice(span);
|
||||
}
|
||||
|
||||
|
||||
location(): SourceLocation {
|
||||
return this.region.source.getLocation(this.index);
|
||||
}
|
||||
|
||||
moveToNextLineStart(): void {
|
||||
const loc = this.region.source.getLocation(this.index);
|
||||
const nextLine = loc.line + 1;
|
||||
|
||||
if (nextLine > this.region.span.end.line) {
|
||||
this.index = this.region.span.end.index;
|
||||
return;
|
||||
}
|
||||
|
||||
const range = this.region.source.getLineRange(nextLine);
|
||||
this.index = Math.min(range.start, this.region.span.end.index);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// === Rendering Utilities ===
|
||||
|
||||
export type LineView = {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue