Last active
May 22, 2022 05:02
-
-
Save Momijiichigo/a04c6546c8d168b74f1bbd38531a259b to your computer and use it in GitHub Desktop.
MyString
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class MString { | |
| private static encoder = new TextEncoder() | |
| private static decoder = new TextDecoder() | |
| private static DEFAULT_BUFFER = new ArrayBuffer(0) | |
| private static DEFAULT_VIEW = new DataView(MString.DEFAULT_BUFFER) | |
| private static DEFAULT_BORDER_INDEXES = new Array<number>() | |
| /** | |
| * Memory buffer where raw string byte data are stored | |
| */ | |
| private buffer: ArrayBuffer = MString.DEFAULT_BUFFER | |
| /** | |
| * The interface that interacts with `this.buffer` | |
| * - read from buffer | |
| * - write to buffer | |
| * - make only a portion of buffer accessible | |
| */ | |
| private view: DataView = MString.DEFAULT_VIEW | |
| /** | |
| * Each `UTF-8` character occupies different length of bytes. | |
| * | |
| * Therefore, this string implementation creates an array to be used as a dictionary | |
| * which | |
| * - the key is the character index | |
| * - the value is the number of bytes from the beginning of the buffer. | |
| */ | |
| private border_indexes = MString.DEFAULT_BORDER_INDEXES | |
| private start_char_index = 0 | |
| private end_char_index = 1 | |
| constructor(value?: string) { | |
| if (value) { | |
| const uint_array = new Uint8Array(value.length * 2 + 5) | |
| MString.encoder.encodeInto(value, uint_array) | |
| this.initializeData(uint_array.buffer) | |
| } | |
| } | |
| private initializeData(buffer: ArrayBuffer) { | |
| this.buffer = buffer | |
| const view = new DataView(buffer) | |
| this.border_indexes = [0] | |
| for (let i = 0; i < view.byteLength;) { | |
| const code = view.getUint8(i) | |
| let byte_size = 1 | |
| // determine how many bytes a single char uses | |
| // https://www.unicode.org/versions/Unicode9.0.0/ch03.pdf#page=54 | |
| if(code === 0){ | |
| break; | |
| } if (code >> 7 === 0) { | |
| byte_size = 1 | |
| } else if (code >> 5 === 6) { | |
| byte_size = 2 | |
| } else if (code >> 4 === 14) { | |
| byte_size = 3 | |
| } else { | |
| byte_size = 4 | |
| } | |
| i += byte_size | |
| this.border_indexes.push(i) | |
| } | |
| this.view = view | |
| this.end_char_index = this.border_indexes.length - 1 | |
| } | |
| /** | |
| * uses `buffer` and `border_indexes` dictionary from another instance | |
| * | |
| * instead of creating a new one. | |
| */ | |
| private setDataRef( | |
| view: DataView, | |
| border_indexes: Array<number>, | |
| start_char_index: number, | |
| end_char_index: number | |
| ) { | |
| this.view = view | |
| this.buffer = view.buffer | |
| this.border_indexes = border_indexes | |
| this.start_char_index = start_char_index | |
| this.end_char_index = end_char_index | |
| } | |
| private getByteIndex(charIndex: number) { | |
| return this.border_indexes[charIndex + this.start_char_index] | |
| } | |
| private getRefRange(start: number, end: number): MString { | |
| const ns = new MString() | |
| const startOffset = this.getByteIndex(start) | |
| // passing in the original data buffer | |
| // no copy of memory data is happening | |
| ns.setDataRef( | |
| new DataView( | |
| this.buffer, | |
| startOffset, | |
| (this.getByteIndex(end) - startOffset) | |
| ), | |
| this.border_indexes, | |
| start + this.start_char_index, | |
| end + this.start_char_index | |
| ) | |
| return ns | |
| } | |
| toString() { | |
| return MString.decoder.decode(this.view) | |
| } | |
| get length() { | |
| return this.end_char_index - this.start_char_index - 1 | |
| } | |
| substring(start: number, end?: number): MString { | |
| // no copy of data is happening | |
| return this.getRefRange(start, end ? end : this.end_char_index) | |
| } | |
| charAt(index: number): MString { | |
| return this.getRefRange(index, index + 1) | |
| } | |
| add(value: MString) { | |
| // this creates new buffer & copy occurs | |
| const self_byte_len = this.view.byteLength | |
| const another_byte_len = value.view.byteLength | |
| const size = self_byte_len + another_byte_len | |
| const buffer = new ArrayBuffer(size) | |
| const view = new DataView(buffer) | |
| for (let i = 0; i < self_byte_len; i++) { | |
| view.setUint8(i, this.view.getUint8(i)) | |
| } | |
| for (let i = 0; i < another_byte_len; i++) { | |
| view.setUint8(self_byte_len + i, value.view.getUint8(i)) | |
| } | |
| const ns = new MString() | |
| ns.initializeData(buffer) | |
| return ns | |
| } | |
| } | |
| const original = new MString("hello 日本語!💩") | |
| console.log(original + ''); | |
| for (let i = 0; i < original.length; i++) { | |
| console.log(original.charAt(i) + ''); | |
| } | |
| console.log( | |
| original.add( | |
| original.substring(6) // creates string using the buffer that already exists | |
| ) // creates string with new buffer | |
| + '' | |
| ); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment