Compare commits
No commits in common. "3ec70051987a74bcc3e885e59a19536fc9c77772" and "f72575ae54ef53e76662e25b5b5cb26400f09b46" have entirely different histories.
3ec7005198
...
f72575ae54
2 changed files with 12 additions and 170 deletions
|
|
@ -1,8 +1,5 @@
|
||||||
TypeScript library for handling source code strings without having to deal with intricacies of JS's UTF16 encoding.
|
TypeScript library for handling source code strings without having to deal with intricacies of JS's UTF16 encoding.
|
||||||
|
|
||||||
# CodePointString
|
|
||||||
A wrapper for a string that's just an array of codepoints. There's no newline or offset tracking to the original string.
|
|
||||||
|
|
||||||
# SourceText
|
# SourceText
|
||||||
A sane, UTF-16-safe string wrapper specifically designed for parsing source code, tracking line numbers, and generating CLI error messages.
|
A sane, UTF-16-safe string wrapper specifically designed for parsing source code, tracking line numbers, and generating CLI error messages.
|
||||||
Think of it as a fat wrapper for a string that understand more info about the string like line structure.
|
Think of it as a fat wrapper for a string that understand more info about the string like line structure.
|
||||||
|
|
@ -22,8 +19,6 @@ It also allows for Spatial Tracking or various sub-regions within the source. It
|
||||||
- `SourceLocation` is basically a smart 2D coordinate equivalent to `(line, col)` (but also tracks `CodePointIndex`)
|
- `SourceLocation` is basically a smart 2D coordinate equivalent to `(line, col)` (but also tracks `CodePointIndex`)
|
||||||
- `Span` an interval determined by `start` and `end` SourceLocations
|
- `Span` an interval determined by `start` and `end` SourceLocations
|
||||||
|
|
||||||
# Source Cursor
|
|
||||||
- `SourceCursor` is a mutable cursor over `SourceRegion`. Primarily useful to build parsers on top of `SourceRegion`. It is line-aware.
|
|
||||||
|
|
||||||
# Rendering CLI Errors
|
# Rendering CLI Errors
|
||||||
Secondary functionality is `function renderSpan(region: SourceRegion, span: Span, contextLines = 1): LineView[]` which is able to render spans of source-code as follows
|
Secondary functionality is `function renderSpan(region: SourceRegion, span: Span, contextLines = 1): LineView[]` which is able to render spans of source-code as follows
|
||||||
|
|
|
||||||
175
src/index.ts
175
src/index.ts
|
|
@ -21,14 +21,13 @@ export const DIGIT_9: CodePoint = char('9');
|
||||||
export const DOT: CodePoint = char('.');
|
export const DOT: CodePoint = char('.');
|
||||||
|
|
||||||
// Hex Boundaries
|
// Hex Boundaries
|
||||||
export const LOWERCASE_A: CodePoint = char('a');
|
export const LOWERCASE_a: CodePoint = char('a');
|
||||||
export const UPPERCASE_A: CodePoint = char('A');
|
export const UPPERCASE_A: CodePoint = char('A');
|
||||||
export const LOWERCASE_F: CodePoint = char('f');
|
export const LOWERCASE_f: CodePoint = char('f');
|
||||||
export const UPPERCASE_F: CodePoint = char('F');
|
export const UPPERCASE_F: CodePoint = char('F');
|
||||||
export const LOWERCASE_Z: CodePoint = char('z');
|
export const LOWERCASE_z: CodePoint = char('z');
|
||||||
export const UPPERCASE_Z: CodePoint = char('Z');
|
export const UPPERCASE_Z: CodePoint = char('Z');
|
||||||
|
|
||||||
// === Predicates ===
|
|
||||||
|
|
||||||
export function isBetween(a: CodePoint, x: CodePoint, b: CodePoint): boolean {
|
export function isBetween(a: CodePoint, x: CodePoint, b: CodePoint): boolean {
|
||||||
return a <= x && x <= b;
|
return a <= x && x <= b;
|
||||||
|
|
@ -39,7 +38,7 @@ export function isDigit(x: CodePoint): boolean {
|
||||||
}
|
}
|
||||||
|
|
||||||
export function isAsciiAlpha(x: CodePoint): boolean {
|
export function isAsciiAlpha(x: CodePoint): boolean {
|
||||||
return isBetween(LOWERCASE_A, x, LOWERCASE_Z)
|
return isBetween(LOWERCASE_a, x, LOWERCASE_z)
|
||||||
|| isBetween(UPPERCASE_A, x, UPPERCASE_Z);
|
|| isBetween(UPPERCASE_A, x, UPPERCASE_Z);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -47,17 +46,6 @@ export function isAsciiAlphanumeric(x: CodePoint): boolean {
|
||||||
return isAsciiAlpha(x) || isDigit(x);
|
return isAsciiAlpha(x) || isDigit(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function isAsciiWhitespace(cp: CodePoint): boolean {
|
|
||||||
return cp === SPACE
|
|
||||||
|| cp === TAB
|
|
||||||
|| cp === NEW_LINE
|
|
||||||
|| cp === CARRIAGE_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
export function isAsciiInlineWhitespace(cp: CodePoint): boolean {
|
|
||||||
return cp === SPACE || cp === TAB;
|
|
||||||
}
|
|
||||||
|
|
||||||
export type CodePointRef = {
|
export type CodePointRef = {
|
||||||
char: CodePoint,
|
char: CodePoint,
|
||||||
offset: StringIndex,
|
offset: StringIndex,
|
||||||
|
|
@ -68,51 +56,7 @@ export type CodePointSpan = {
|
||||||
end: CodePointIndex,
|
end: CodePointIndex,
|
||||||
}
|
}
|
||||||
|
|
||||||
// === CodePointString ===
|
|
||||||
export class CodePointString {
|
|
||||||
readonly codePoints: readonly CodePoint[];
|
|
||||||
|
|
||||||
constructor(source: string) {
|
|
||||||
const codePointsInternal: CodePoint[] = [];
|
|
||||||
let i = 0;
|
|
||||||
while (i < source.length) {
|
|
||||||
const char = source.codePointAt(i) as CodePoint;
|
|
||||||
codePointsInternal.push(char);
|
|
||||||
|
|
||||||
const size =(char > 0xFFFF ? 2 : 1);
|
|
||||||
i += size;
|
|
||||||
}
|
|
||||||
this.codePoints = Object.freeze(codePointsInternal);
|
|
||||||
}
|
|
||||||
|
|
||||||
static makeFromString(s: string): CodePointString {
|
|
||||||
return new CodePointString(s);
|
|
||||||
}
|
|
||||||
|
|
||||||
codePointAt(index: CodePointIndex): CodePoint {
|
|
||||||
return this.codePoints[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
get length(): CodePointIndex {
|
|
||||||
return this.codePoints.length;
|
|
||||||
}
|
|
||||||
|
|
||||||
toString(): string {
|
|
||||||
let result = "";
|
|
||||||
for (const cp of this.codePoints) {
|
|
||||||
result += String.fromCodePoint(cp);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// === Source Text ===
|
// === Source Text ===
|
||||||
// TODO:
|
|
||||||
// @deprecated and say to use `SourceText.makeFromString` instead.
|
|
||||||
export function sourceText(s: string): SourceText {
|
|
||||||
return SourceText.makeFromString(s);
|
|
||||||
}
|
|
||||||
|
|
||||||
export class SourceText {
|
export class SourceText {
|
||||||
readonly source: string;
|
readonly source: string;
|
||||||
// TODO: Later you can try to change this to two `Uint32Array`s - one for codepoints (each 20 bit but whatever), the other for pointers to original string.
|
// TODO: Later you can try to change this to two `Uint32Array`s - one for codepoints (each 20 bit but whatever), the other for pointers to original string.
|
||||||
|
|
@ -121,10 +65,6 @@ export class SourceText {
|
||||||
// Stores the CodePointIndex where each line begins
|
// Stores the CodePointIndex where each line begins
|
||||||
readonly lineStarts: CodePointIndex[];
|
readonly lineStarts: CodePointIndex[];
|
||||||
|
|
||||||
static makeFromString(s: string): SourceText {
|
|
||||||
return new SourceText(s);
|
|
||||||
}
|
|
||||||
|
|
||||||
constructor(rawSource: string) {
|
constructor(rawSource: string) {
|
||||||
// TODO: This shouldn't really be a concern of the library.
|
// TODO: This shouldn't really be a concern of the library.
|
||||||
// const source = rawSource.normalize('NFC');
|
// const source = rawSource.normalize('NFC');
|
||||||
|
|
@ -288,12 +228,11 @@ export class SourceText {
|
||||||
return this.sliceByCp(startCp, endCp);
|
return this.sliceByCp(startCp, endCp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
getLineRange(line: number): { start: CodePointIndex, end: CodePointIndex } {
|
||||||
tryGetLineRange(line: number): CodePointSpan | undefined {
|
|
||||||
const lineIndex = line - 1;
|
const lineIndex = line - 1;
|
||||||
|
|
||||||
if (lineIndex < 0 || lineIndex >= this.lineStarts.length) {
|
if (lineIndex < 0 || lineIndex >= this.lineStarts.length) {
|
||||||
return undefined;
|
// TODO: This is a bit suspicious. Maybe return undefined?
|
||||||
|
return { start: 0, end: 0 };
|
||||||
}
|
}
|
||||||
|
|
||||||
const start = this.lineStarts[lineIndex];
|
const start = this.lineStarts[lineIndex];
|
||||||
|
|
@ -301,16 +240,12 @@ export class SourceText {
|
||||||
? this.lineStarts[lineIndex + 1]
|
? this.lineStarts[lineIndex + 1]
|
||||||
: this.#chars.length;
|
: this.#chars.length;
|
||||||
|
|
||||||
return rawSpan(start, end);
|
return { start, end };
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
getLineRange(line: number): { start: CodePointIndex, end: CodePointIndex } {
|
export function sourceText(s: string): SourceText {
|
||||||
const range = this.tryGetLineRange(line);
|
return new SourceText(s);
|
||||||
if (range === undefined) {
|
|
||||||
throw new Error(`Line ${line} is out of bounds (line count: ${this.lineCount})`);
|
|
||||||
}
|
|
||||||
return range;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Creates a Span from two SourceLocations.
|
// Creates a Span from two SourceLocations.
|
||||||
|
|
@ -361,10 +296,6 @@ export class SourceRegion {
|
||||||
return span(loc, loc);
|
return span(loc, loc);
|
||||||
}
|
}
|
||||||
|
|
||||||
get codePointSpan(): CodePointSpan {
|
|
||||||
return rawSpan(this.span.start.index, this.span.end.index);
|
|
||||||
}
|
|
||||||
|
|
||||||
*codePoints(): IterableIterator<[CodePointIndex, CodePoint]> {
|
*codePoints(): IterableIterator<[CodePointIndex, CodePoint]> {
|
||||||
const start = this.span.start.index;
|
const start = this.span.start.index;
|
||||||
const end = this.span.end.index;
|
const end = this.span.end.index;
|
||||||
|
|
@ -435,90 +366,6 @@ export type SourceLocation = {
|
||||||
column: number; // 1-based
|
column: number; // 1-based
|
||||||
}
|
}
|
||||||
|
|
||||||
export function containsSpan(outer: CodePointSpan, inner: CodePointSpan): boolean {
|
|
||||||
return outer.start <= inner.start && inner.end <= outer.end;
|
|
||||||
}
|
|
||||||
|
|
||||||
export function containsIndex(span: CodePointSpan, index: CodePointIndex): boolean {
|
|
||||||
return span.start <= index && index < span.end;
|
|
||||||
}
|
|
||||||
|
|
||||||
// === Cursor ===
|
|
||||||
|
|
||||||
export class SourceCursor {
|
|
||||||
private index: CodePointIndex;
|
|
||||||
|
|
||||||
constructor(public readonly region: SourceRegion) {
|
|
||||||
this.index = region.span.start.index;
|
|
||||||
}
|
|
||||||
|
|
||||||
current(): CodePointIndex {
|
|
||||||
return this.index;
|
|
||||||
}
|
|
||||||
|
|
||||||
checkpoint(): CodePointIndex {
|
|
||||||
return this.index;
|
|
||||||
}
|
|
||||||
|
|
||||||
restore(index: CodePointIndex) {
|
|
||||||
this.index = index;
|
|
||||||
}
|
|
||||||
|
|
||||||
peek(): CodePoint | undefined {
|
|
||||||
if (this.index >= this.region.span.end.index) return undefined;
|
|
||||||
return this.region.codePointAt(this.index);
|
|
||||||
}
|
|
||||||
|
|
||||||
advance(): CodePoint | undefined {
|
|
||||||
const cp = this.peek();
|
|
||||||
if (cp === undefined) return undefined;
|
|
||||||
this.index += 1;
|
|
||||||
return cp;
|
|
||||||
}
|
|
||||||
|
|
||||||
isAtEnd(): boolean {
|
|
||||||
return this.index >= this.region.span.end.index;
|
|
||||||
}
|
|
||||||
|
|
||||||
spanFrom(start: CodePointIndex): CodePointSpan {
|
|
||||||
return rawSpan(start, this.index);
|
|
||||||
}
|
|
||||||
|
|
||||||
currentSpan(): CodePointSpan {
|
|
||||||
return this.isAtEnd()
|
|
||||||
? pointSpan(this.index)
|
|
||||||
: rawSpan(this.index, this.index + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
eofSpan(): CodePointSpan {
|
|
||||||
return pointSpan(this.region.span.end.index);
|
|
||||||
}
|
|
||||||
|
|
||||||
slice(span: CodePointSpan): string {
|
|
||||||
return this.region.slice(span);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
location(): SourceLocation {
|
|
||||||
return this.region.source.getLocation(this.index);
|
|
||||||
}
|
|
||||||
|
|
||||||
moveToNextLineStart(): void {
|
|
||||||
const loc = this.region.source.getLocation(this.index);
|
|
||||||
const nextLine = loc.line + 1;
|
|
||||||
|
|
||||||
if (nextLine > this.region.span.end.line) {
|
|
||||||
this.index = this.region.span.end.index;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const range = this.region.source.getLineRange(nextLine);
|
|
||||||
this.index = Math.min(range.start, this.region.span.end.index);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// === Rendering Utilities ===
|
// === Rendering Utilities ===
|
||||||
|
|
||||||
export type LineView = {
|
export type LineView = {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue