Compare commits
4 commits
f72575ae54
...
3ec7005198
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3ec7005198 | ||
|
|
ec6ba36220 | ||
|
|
9c72959cd3 | ||
|
|
85bc9b05e1 |
2 changed files with 171 additions and 13 deletions
|
|
@ -1,5 +1,8 @@
|
||||||
TypeScript library for handling source code strings without having to deal with intricacies of JS's UTF16 encoding.
|
TypeScript library for handling source code strings without having to deal with intricacies of JS's UTF16 encoding.
|
||||||
|
|
||||||
|
# CodePointString
|
||||||
|
A wrapper for a string that's just an array of codepoints. There's no newline or offset tracking to the original string.
|
||||||
|
|
||||||
# SourceText
|
# SourceText
|
||||||
A sane, UTF-16-safe string wrapper specifically designed for parsing source code, tracking line numbers, and generating CLI error messages.
|
A sane, UTF-16-safe string wrapper specifically designed for parsing source code, tracking line numbers, and generating CLI error messages.
|
||||||
Think of it as a fat wrapper for a string that understand more info about the string like line structure.
|
Think of it as a fat wrapper for a string that understand more info about the string like line structure.
|
||||||
|
|
@ -19,6 +22,8 @@ It also allows for Spatial Tracking or various sub-regions within the source. It
|
||||||
- `SourceLocation` is basically a smart 2D coordinate equivalent to `(line, col)` (but also tracks `CodePointIndex`)
|
- `SourceLocation` is basically a smart 2D coordinate equivalent to `(line, col)` (but also tracks `CodePointIndex`)
|
||||||
- `Span` an interval determined by `start` and `end` SourceLocations
|
- `Span` an interval determined by `start` and `end` SourceLocations
|
||||||
|
|
||||||
|
# Source Cursor
|
||||||
|
- `SourceCursor` is a mutable cursor over `SourceRegion`. Primarily useful to build parsers on top of `SourceRegion`. It is line-aware.
|
||||||
|
|
||||||
# Rendering CLI Errors
|
# Rendering CLI Errors
|
||||||
Secondary functionality is `function renderSpan(region: SourceRegion, span: Span, contextLines = 1): LineView[]` which is able to render spans of source-code as follows
|
Secondary functionality is `function renderSpan(region: SourceRegion, span: Span, contextLines = 1): LineView[]` which is able to render spans of source-code as follows
|
||||||
|
|
|
||||||
175
src/index.ts
175
src/index.ts
|
|
@ -21,13 +21,14 @@ export const DIGIT_9: CodePoint = char('9');
|
||||||
export const DOT: CodePoint = char('.');
|
export const DOT: CodePoint = char('.');
|
||||||
|
|
||||||
// Hex Boundaries
|
// Hex Boundaries
|
||||||
export const LOWERCASE_a: CodePoint = char('a');
|
export const LOWERCASE_A: CodePoint = char('a');
|
||||||
export const UPPERCASE_A: CodePoint = char('A');
|
export const UPPERCASE_A: CodePoint = char('A');
|
||||||
export const LOWERCASE_f: CodePoint = char('f');
|
export const LOWERCASE_F: CodePoint = char('f');
|
||||||
export const UPPERCASE_F: CodePoint = char('F');
|
export const UPPERCASE_F: CodePoint = char('F');
|
||||||
export const LOWERCASE_z: CodePoint = char('z');
|
export const LOWERCASE_Z: CodePoint = char('z');
|
||||||
export const UPPERCASE_Z: CodePoint = char('Z');
|
export const UPPERCASE_Z: CodePoint = char('Z');
|
||||||
|
|
||||||
|
// === Predicates ===
|
||||||
|
|
||||||
export function isBetween(a: CodePoint, x: CodePoint, b: CodePoint): boolean {
|
export function isBetween(a: CodePoint, x: CodePoint, b: CodePoint): boolean {
|
||||||
return a <= x && x <= b;
|
return a <= x && x <= b;
|
||||||
|
|
@ -38,7 +39,7 @@ export function isDigit(x: CodePoint): boolean {
|
||||||
}
|
}
|
||||||
|
|
||||||
export function isAsciiAlpha(x: CodePoint): boolean {
|
export function isAsciiAlpha(x: CodePoint): boolean {
|
||||||
return isBetween(LOWERCASE_a, x, LOWERCASE_z)
|
return isBetween(LOWERCASE_A, x, LOWERCASE_Z)
|
||||||
|| isBetween(UPPERCASE_A, x, UPPERCASE_Z);
|
|| isBetween(UPPERCASE_A, x, UPPERCASE_Z);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -46,6 +47,17 @@ export function isAsciiAlphanumeric(x: CodePoint): boolean {
|
||||||
return isAsciiAlpha(x) || isDigit(x);
|
return isAsciiAlpha(x) || isDigit(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function isAsciiWhitespace(cp: CodePoint): boolean {
|
||||||
|
return cp === SPACE
|
||||||
|
|| cp === TAB
|
||||||
|
|| cp === NEW_LINE
|
||||||
|
|| cp === CARRIAGE_RETURN;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isAsciiInlineWhitespace(cp: CodePoint): boolean {
|
||||||
|
return cp === SPACE || cp === TAB;
|
||||||
|
}
|
||||||
|
|
||||||
export type CodePointRef = {
|
export type CodePointRef = {
|
||||||
char: CodePoint,
|
char: CodePoint,
|
||||||
offset: StringIndex,
|
offset: StringIndex,
|
||||||
|
|
@ -56,7 +68,51 @@ export type CodePointSpan = {
|
||||||
end: CodePointIndex,
|
end: CodePointIndex,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// === CodePointString ===
|
||||||
|
export class CodePointString {
|
||||||
|
readonly codePoints: readonly CodePoint[];
|
||||||
|
|
||||||
|
constructor(source: string) {
|
||||||
|
const codePointsInternal: CodePoint[] = [];
|
||||||
|
let i = 0;
|
||||||
|
while (i < source.length) {
|
||||||
|
const char = source.codePointAt(i) as CodePoint;
|
||||||
|
codePointsInternal.push(char);
|
||||||
|
|
||||||
|
const size =(char > 0xFFFF ? 2 : 1);
|
||||||
|
i += size;
|
||||||
|
}
|
||||||
|
this.codePoints = Object.freeze(codePointsInternal);
|
||||||
|
}
|
||||||
|
|
||||||
|
static makeFromString(s: string): CodePointString {
|
||||||
|
return new CodePointString(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
codePointAt(index: CodePointIndex): CodePoint {
|
||||||
|
return this.codePoints[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
get length(): CodePointIndex {
|
||||||
|
return this.codePoints.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
toString(): string {
|
||||||
|
let result = "";
|
||||||
|
for (const cp of this.codePoints) {
|
||||||
|
result += String.fromCodePoint(cp);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// === Source Text ===
|
// === Source Text ===
|
||||||
|
// TODO:
|
||||||
|
// @deprecated and say to use `SourceText.makeFromString` instead.
|
||||||
|
export function sourceText(s: string): SourceText {
|
||||||
|
return SourceText.makeFromString(s);
|
||||||
|
}
|
||||||
|
|
||||||
export class SourceText {
|
export class SourceText {
|
||||||
readonly source: string;
|
readonly source: string;
|
||||||
// TODO: Later you can try to change this to two `Uint32Array`s - one for codepoints (each 20 bit but whatever), the other for pointers to original string.
|
// TODO: Later you can try to change this to two `Uint32Array`s - one for codepoints (each 20 bit but whatever), the other for pointers to original string.
|
||||||
|
|
@ -65,6 +121,10 @@ export class SourceText {
|
||||||
// Stores the CodePointIndex where each line begins
|
// Stores the CodePointIndex where each line begins
|
||||||
readonly lineStarts: CodePointIndex[];
|
readonly lineStarts: CodePointIndex[];
|
||||||
|
|
||||||
|
static makeFromString(s: string): SourceText {
|
||||||
|
return new SourceText(s);
|
||||||
|
}
|
||||||
|
|
||||||
constructor(rawSource: string) {
|
constructor(rawSource: string) {
|
||||||
// TODO: This shouldn't really be a concern of the library.
|
// TODO: This shouldn't really be a concern of the library.
|
||||||
// const source = rawSource.normalize('NFC');
|
// const source = rawSource.normalize('NFC');
|
||||||
|
|
@ -228,11 +288,12 @@ export class SourceText {
|
||||||
return this.sliceByCp(startCp, endCp);
|
return this.sliceByCp(startCp, endCp);
|
||||||
}
|
}
|
||||||
|
|
||||||
getLineRange(line: number): { start: CodePointIndex, end: CodePointIndex } {
|
|
||||||
|
tryGetLineRange(line: number): CodePointSpan | undefined {
|
||||||
const lineIndex = line - 1;
|
const lineIndex = line - 1;
|
||||||
|
|
||||||
if (lineIndex < 0 || lineIndex >= this.lineStarts.length) {
|
if (lineIndex < 0 || lineIndex >= this.lineStarts.length) {
|
||||||
// TODO: This is a bit suspicious. Maybe return undefined?
|
return undefined;
|
||||||
return { start: 0, end: 0 };
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const start = this.lineStarts[lineIndex];
|
const start = this.lineStarts[lineIndex];
|
||||||
|
|
@ -240,12 +301,16 @@ export class SourceText {
|
||||||
? this.lineStarts[lineIndex + 1]
|
? this.lineStarts[lineIndex + 1]
|
||||||
: this.#chars.length;
|
: this.#chars.length;
|
||||||
|
|
||||||
return { start, end };
|
return rawSpan(start, end);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function sourceText(s: string): SourceText {
|
getLineRange(line: number): { start: CodePointIndex, end: CodePointIndex } {
|
||||||
return new SourceText(s);
|
const range = this.tryGetLineRange(line);
|
||||||
|
if (range === undefined) {
|
||||||
|
throw new Error(`Line ${line} is out of bounds (line count: ${this.lineCount})`);
|
||||||
|
}
|
||||||
|
return range;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Creates a Span from two SourceLocations.
|
// Creates a Span from two SourceLocations.
|
||||||
|
|
@ -296,6 +361,10 @@ export class SourceRegion {
|
||||||
return span(loc, loc);
|
return span(loc, loc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
get codePointSpan(): CodePointSpan {
|
||||||
|
return rawSpan(this.span.start.index, this.span.end.index);
|
||||||
|
}
|
||||||
|
|
||||||
*codePoints(): IterableIterator<[CodePointIndex, CodePoint]> {
|
*codePoints(): IterableIterator<[CodePointIndex, CodePoint]> {
|
||||||
const start = this.span.start.index;
|
const start = this.span.start.index;
|
||||||
const end = this.span.end.index;
|
const end = this.span.end.index;
|
||||||
|
|
@ -366,6 +435,90 @@ export type SourceLocation = {
|
||||||
column: number; // 1-based
|
column: number; // 1-based
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function containsSpan(outer: CodePointSpan, inner: CodePointSpan): boolean {
|
||||||
|
return outer.start <= inner.start && inner.end <= outer.end;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function containsIndex(span: CodePointSpan, index: CodePointIndex): boolean {
|
||||||
|
return span.start <= index && index < span.end;
|
||||||
|
}
|
||||||
|
|
||||||
|
// === Cursor ===
|
||||||
|
|
||||||
|
export class SourceCursor {
|
||||||
|
private index: CodePointIndex;
|
||||||
|
|
||||||
|
constructor(public readonly region: SourceRegion) {
|
||||||
|
this.index = region.span.start.index;
|
||||||
|
}
|
||||||
|
|
||||||
|
current(): CodePointIndex {
|
||||||
|
return this.index;
|
||||||
|
}
|
||||||
|
|
||||||
|
checkpoint(): CodePointIndex {
|
||||||
|
return this.index;
|
||||||
|
}
|
||||||
|
|
||||||
|
restore(index: CodePointIndex) {
|
||||||
|
this.index = index;
|
||||||
|
}
|
||||||
|
|
||||||
|
peek(): CodePoint | undefined {
|
||||||
|
if (this.index >= this.region.span.end.index) return undefined;
|
||||||
|
return this.region.codePointAt(this.index);
|
||||||
|
}
|
||||||
|
|
||||||
|
advance(): CodePoint | undefined {
|
||||||
|
const cp = this.peek();
|
||||||
|
if (cp === undefined) return undefined;
|
||||||
|
this.index += 1;
|
||||||
|
return cp;
|
||||||
|
}
|
||||||
|
|
||||||
|
isAtEnd(): boolean {
|
||||||
|
return this.index >= this.region.span.end.index;
|
||||||
|
}
|
||||||
|
|
||||||
|
spanFrom(start: CodePointIndex): CodePointSpan {
|
||||||
|
return rawSpan(start, this.index);
|
||||||
|
}
|
||||||
|
|
||||||
|
currentSpan(): CodePointSpan {
|
||||||
|
return this.isAtEnd()
|
||||||
|
? pointSpan(this.index)
|
||||||
|
: rawSpan(this.index, this.index + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
eofSpan(): CodePointSpan {
|
||||||
|
return pointSpan(this.region.span.end.index);
|
||||||
|
}
|
||||||
|
|
||||||
|
slice(span: CodePointSpan): string {
|
||||||
|
return this.region.slice(span);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
location(): SourceLocation {
|
||||||
|
return this.region.source.getLocation(this.index);
|
||||||
|
}
|
||||||
|
|
||||||
|
moveToNextLineStart(): void {
|
||||||
|
const loc = this.region.source.getLocation(this.index);
|
||||||
|
const nextLine = loc.line + 1;
|
||||||
|
|
||||||
|
if (nextLine > this.region.span.end.line) {
|
||||||
|
this.index = this.region.span.end.index;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const range = this.region.source.getLineRange(nextLine);
|
||||||
|
this.index = Math.min(range.start, this.region.span.end.index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// === Rendering Utilities ===
|
// === Rendering Utilities ===
|
||||||
|
|
||||||
export type LineView = {
|
export type LineView = {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue