Skip to content

Instantly share code, notes, and snippets.

@Momijiichigo
Last active May 22, 2022 05:02
Show Gist options
  • Select an option

  • Save Momijiichigo/a04c6546c8d168b74f1bbd38531a259b to your computer and use it in GitHub Desktop.

Select an option

Save Momijiichigo/a04c6546c8d168b74f1bbd38531a259b to your computer and use it in GitHub Desktop.
MyString
class MString {
private static encoder = new TextEncoder()
private static decoder = new TextDecoder()
private static DEFAULT_BUFFER = new ArrayBuffer(0)
private static DEFAULT_VIEW = new DataView(MString.DEFAULT_BUFFER)
private static DEFAULT_BORDER_INDEXES = new Array<number>()
/**
* Memory buffer where raw string byte data are stored
*/
private buffer: ArrayBuffer = MString.DEFAULT_BUFFER
/**
* The interface that interacts with `this.buffer`
* - read from buffer
* - write to buffer
* - make only a portion of buffer accessible
*/
private view: DataView = MString.DEFAULT_VIEW
/**
* Each `UTF-8` character occupies different length of bytes.
*
* Therefore, this string implementation creates an array to be used as a dictionary
* which
* - the key is the character index
* - the value is the number of bytes from the beginning of the buffer.
*/
private border_indexes = MString.DEFAULT_BORDER_INDEXES
private start_char_index = 0
private end_char_index = 1
constructor(value?: string) {
if (value) {
const uint_array = new Uint8Array(value.length * 2 + 5)
MString.encoder.encodeInto(value, uint_array)
this.initializeData(uint_array.buffer)
}
}
private initializeData(buffer: ArrayBuffer) {
this.buffer = buffer
const view = new DataView(buffer)
this.border_indexes = [0]
for (let i = 0; i < view.byteLength;) {
const code = view.getUint8(i)
let byte_size = 1
// determine how many bytes a single char uses
// https://www.unicode.org/versions/Unicode9.0.0/ch03.pdf#page=54
if(code === 0){
break;
} if (code >> 7 === 0) {
byte_size = 1
} else if (code >> 5 === 6) {
byte_size = 2
} else if (code >> 4 === 14) {
byte_size = 3
} else {
byte_size = 4
}
i += byte_size
this.border_indexes.push(i)
}
this.view = view
this.end_char_index = this.border_indexes.length - 1
}
/**
* uses `buffer` and `border_indexes` dictionary from another instance
*
* instead of creating a new one.
*/
private setDataRef(
view: DataView,
border_indexes: Array<number>,
start_char_index: number,
end_char_index: number
) {
this.view = view
this.buffer = view.buffer
this.border_indexes = border_indexes
this.start_char_index = start_char_index
this.end_char_index = end_char_index
}
private getByteIndex(charIndex: number) {
return this.border_indexes[charIndex + this.start_char_index]
}
private getRefRange(start: number, end: number): MString {
const ns = new MString()
const startOffset = this.getByteIndex(start)
// passing in the original data buffer
// no copy of memory data is happening
ns.setDataRef(
new DataView(
this.buffer,
startOffset,
(this.getByteIndex(end) - startOffset)
),
this.border_indexes,
start + this.start_char_index,
end + this.start_char_index
)
return ns
}
toString() {
return MString.decoder.decode(this.view)
}
get length() {
return this.end_char_index - this.start_char_index - 1
}
substring(start: number, end?: number): MString {
// no copy of data is happening
return this.getRefRange(start, end ? end : this.end_char_index)
}
charAt(index: number): MString {
return this.getRefRange(index, index + 1)
}
add(value: MString) {
// this creates new buffer & copy occurs
const self_byte_len = this.view.byteLength
const another_byte_len = value.view.byteLength
const size = self_byte_len + another_byte_len
const buffer = new ArrayBuffer(size)
const view = new DataView(buffer)
for (let i = 0; i < self_byte_len; i++) {
view.setUint8(i, this.view.getUint8(i))
}
for (let i = 0; i < another_byte_len; i++) {
view.setUint8(self_byte_len + i, value.view.getUint8(i))
}
const ns = new MString()
ns.initializeData(buffer)
return ns
}
}
const original = new MString("hello 日本語!💩")
console.log(original + '');
for (let i = 0; i < original.length; i++) {
console.log(original.charAt(i) + '');
}
console.log(
original.add(
original.substring(6) // creates string using the buffer that already exists
) // creates string with new buffer
+ ''
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment