diff --git a/doc/ideas/Buffer.v3 b/doc/ideas/Buffer.v3 new file mode 100644 index 000000000..110c2197d --- /dev/null +++ b/doc/ideas/Buffer.v3 @@ -0,0 +1,539 @@ +// Represents a buffer over an underlying range of data, offers a number of utilities +// for reading, writing, and advancing through the data, and has configurable +// out-of-bounds behavior. +class Buffer { + var startAddress: u64; // Address of the start of {data}. + def var data: Range; // Complete data. + def var range: Range; // Current range in the data. + def var pos: u32; // Position of the range in the data. + + var onOob = Buffer.defaultOob; // Out-of-bounds behavior. + + // Returns {true} if there is more data to read, or more space to write. + def more() -> bool { + return range.length > 0; + } + // Gets the address of the current position, i.e. adding the {startAddress}. + def posAddress() -> u64 { + return pos + startAddress; + } + // Reset {data} with {new_data}, resetting {pos = 0} and {range = data}. + def resetData(new_data: Range) -> this { + this.data = this.range = data; + this.pos = 0; + } + // Resets the range in this window to start at {new_pos} and have length {new_len}. + // If the new range is out of bounds, it will be truncated to fit into {data}. + def resetRange(new_pos: u32, new_len: u32) -> this { + if (new_pos > data.length) { + pos = u32.!(data.length); + range = null; + return; + } + var remain = data.length - long.!(new_pos); + if (remain < 0) { + pos = new_pos; + range = data[new_pos ...]; + return; + } + pos = new_pos; + range = data[new_pos ..+ new_len]; + } + // Resets the position to {new_pos}. + def resetPos(new_pos: u32) -> this { + if (new_pos > data.length) { + pos = u32.!(data.length); + range = null; + return; + } + pos = new_pos; + range = data[new_pos ...]; + } + // Peek at the contiguous range of data starting at {pos}, i.e. {range[0 ... n]}, but + // do not advance the position. The resulting range aliases {data}. + def peek(n: u32) -> Range { + if (checkOob(n)) return null; // TODO: partial peek? + return range[0 ... n]; + } + // Peek at one element of data, but do not advance the position. + def peekOne() -> T { + var d: T; + if (checkOob(1)) return d; + return range[0]; + } + // View a contiguous range of data starting at {pos}, i.e. {range[0 ... n]} and advance + // the position. The resulting range aliases {data}. + def read(n: u32) -> Range { + if (checkOob(n)) return null; // TODO: partial read? + var result = range[0 ... n]; + range = range[n ...]; + pos += n; + return result; + } + // Read one element of data and advance the position. + def readOne() -> T { + var d: T; + if (checkOob(1)) return d; + var result = range[0]; + range = range[1 ...]; + pos++; + return result; + } + // Skip {n} elements of data and advance the position. + def skip(n: u32) -> this { + if (checkOob(n)) return; + pos += n; + range = data[pos ...]; + } + // Copy a contiguous range of data starting at {pos}, i.e. {range[0 ... n]} and advance + // the position. The result is a fresh copy. + def readCopy(n: u32) -> Array { + if (checkOob(n)) return null; // TODO: partial read? + var result = Array.new(int.!(n)); + for (i < result.length) result[i] = range[i]; + range = range[n ...]; + pos += n; + return result; + } + // Copy a contiguous range of data starting at {pos}, i.e. {range[0 ... n]} into {out} + // and advance the position. + def readInto(out: Range) -> u32 { + var n = u32.!(out.length); + if (checkOob(n)) return 0; // TODO: partial read? + for (i < out.length) out[i] = range[i]; + range = range[n ...]; + pos += n; + return n; + } + // Write a contiguous range of data starting at {pos}, i.e. {range[0 ...n]} elements + // and advance the position. + def write(input: Range) -> u32 { + var n = u32.!(input.length); + if (checkOob(n)) return 0; // TODO: partial write? + for (i < input.length) range[i] = input[i]; + range = range[n ...]; + pos += n; + return n; + } + // Write a single element of data and advance the position. + def writeOne(e: T) -> this { + var n = 1u; + if (checkOob(n)) return; + range[0] = e; + pos++; + } + + //======================================================================================== + // Out-of-bounds behavior. + //======================================================================================== + private def checkOob(n: u32) -> bool { + while (range.length < n) { + var retry = onOob(this, n, u32.!(range.length) - n); + if (!retry) return true; + } + return false; + } + def defaultOob(tried: u32, over: u32) -> bool { + var x = range[range.length]; // intentionally trigger BCE + return false; + } +} + +component Buffers { + // Creates a new buffer with a buffer size of {n} whose behavior is to internally + // grow when overflow (out-of-bounds) occurs. + def newGrowingOutputBuffer(n: u32) -> Buffer { + var w = Buffer.new().resetData(Array.new(int.!(n))); + w.onOob = growBuffer; + return w; + } + // Creates a new buffer with a buffer size of {n} whose behavior is to flush + // the data to {flush} when overflow (out-of-bounds) occurs. + def newFlushingOutputBuffer(n: u32, flush: Range -> R) -> Buffer { + var w = Buffer.new().resetData(Array.new(int.!(n))); + w.onOob = flushBuffer(_, _, _, flush); + return w; + } +} + +def growBuffer(w: Buffer, tried: u32, over: u32) -> bool { + var ppos = w.pos; + var nsize = u64.!(w.data.length) * 2 + tried; + if (nsize > int.max) return false; + var ndata = Array.new(int.!(nsize)); + for (i < w.data.length) ndata[i] = w.data[i]; + w.resetData(ndata).resetPos(ppos); + return true; +} + +def flushBuffer(w: Buffer, tried: u32, over: u32, flush: Range -> R) -> bool { + if (w.pos == 0) return false; + var ppos = w.pos; + flush(w.data[0 ... w.pos]); + var d = w.data, r = w.range; + for (i < r.length) d[i] = r[i]; + if (tried > d.length) { + w.resetRange(0, u32.!(d.length)); + return false; + } + w.resetRange(0, tried); + return true; +} + +class DataReader extends Buffer { + def var ok: bool = true; // true if no error + def var error_pos: int = int.max; // first error position + def var error_msg: string; // error message + + var onEOF = DataReader.setEOFError; // EOF callback + var onError = DataReader.setFirstError; // error callback + + // Read a 4-byte little-endian unsigned integer. + def read_u32() -> u32 { + var range = read(4); + if (range.length != 4) return 0; + return DataReaders.read_range_u32(range); + } + // Read an 8-byte little-endian unsigned integer. + def read_u64() -> u64 { + var range = read(8); + if (range.length != 8) return 0; + return DataReaders.read_range_u64(range); + } + // Read an unsigned 32-bit LEB-encoded integer. + def read_uleb32() -> u32 { + var b = readOne(); + if (b < 0x80) return b; + return read_xleb32(b, false, "LEB out of unsigned 32-bit range"); + } + // Read a signed 32-bit LEB-encoded integer. + def read_sleb32() -> i32 { + var b = readOne(); + if (b < 0x80) return i7.view(b); + return int.view(read_xleb32(b, true, "LEB out of signed 32-bit range")); + } + private def check_leb_ext(b: byte, signed: bool, shift: u3, msg: string) { + if ((b & 0x80) != 0) { + fail("overlong LEB"); + return; + } + if (signed) { + var upper = i7.view(b) >> u3.view(shift - 1); + if (upper != 0 && upper != -1) fail(msg); + } else { + var upper = i7.view(b) >> shift; + if (upper != 0) fail(msg); + } + } + private def read_xleb32(first: byte, signed: bool, msg: string) -> u32 { + var result = u32.view(first) & 0x7Fu, p = pos, shift = 7; + while (p < range.length) { + var b = range[p++]; + result = result | u32.view(b & 0x7F) << u5.view(shift); + shift += 7; + if (shift == 35) { // consumed (special) 5th byte + // check upper bits of last byte and extension + check_leb_ext(b, signed, 4, msg); + resetPos(p); + return result; + } + if (b < 0x80) { // no continuation byte + if (signed) { // perform sign-extension + var rem = u5.view(0 - shift); + result = u32.view(i32.view(result << rem) >> rem); + } + resetPos(p); + return result; + } + } + eof("unterminated LEB"); + return 0; + } + // Read an unsigned 64-bit LEB-encoded integer. + def read_uleb64() -> u64 { + var b = readOne(); + if (b < 0x80) return b; + return read_xleb64(b, false, "LEB out of unsigned 64-bit range"); + } + // Read a signed 64-bit LEB-encoded integer. + def read_sleb64() -> i64 { + var b = readOne(); + if (b < 0x80) return i7.view(b); + return i64.view(read_xleb64(b, true, "LEB out of signed 64-bit range")); + } + private def read_xleb64(first: byte, signed: bool, msg: string) -> u64 { + var result: u64 = u32.view(first) & 0x7Fu, p = pos, shift = 7; + while (p < range.length) { + var b = range[p++]; + result = result | u64.view(b & 0x7F) << u6.view(shift); + shift += 7; + if (shift == 70) { // consumed (special) 10th byte + // check upper bits of last byte and extension + check_leb_ext(b, signed, 1, msg); + resetPos(p); + return result; + } + if (b < 0x80) { // no continuation byte + if (signed) { // perform sign-extension + var rem = u6.view(0 - shift); + result = u64.view(i64.view(result << rem) >> rem); + } + resetPos(p); + return result; + } + } + eof("unterminated LEB"); + return 0; + + } + def eof(msg: string) { + } + def fail(msg: string) { + } + // Set an error on EOF. + def setEOFError(abspos: int, size: int, msg: string) { + onError(this, abspos, msg); + } + // Record the first error for this reader. + def setFirstError(abspos: int, msg: string) { + if (abspos < error_pos) { + error_pos = abspos; + error_msg = msg; + ok = false; + } + } +} + +class DataWriter extends Buffer { + private var max: u32; + + // Write a byte. + def putb(data: int) -> this { + writeOne(byte.view(data)); + } + // Write two bytes. + def putbb(b0: int, b1: int) -> this { + var a = read(2), p = pos; + a[p + 0] = byte.view(b0); + a[p + 1] = byte.view(b1); + pos = p + 2; + } + // Write three bytes. + def putbbb(b0: int, b1: int, b2: int) -> this { + var a = read(3), p = pos; + a[p + 0] = byte.view(b0); + a[p + 1] = byte.view(b1); + a[p + 2] = byte.view(b2); + pos = p + 3; + } + // Write a 16-bit little-endian integer. + def put_b16(data: int) -> this { + var a = read(2), p = pos; + a[p + 0] = byte.view(data); + a[p + 1] = byte.view(data >> 8); + pos = p + 2; + } + // Write a 32-bit little-endian integer. + def put_b32(data: int) -> this { + var a = read(4), p = pos; + a[p + 0] = byte.view(data); + a[p + 1] = byte.view(data >> 8); + a[p + 2] = byte.view(data >> 16); + a[p + 3] = byte.view(data >> 24); + pos = p + 4; + } + // Write a 64-bit little-endian integer. + def put_b64(data: long) -> this { + put_b32(int.view(data)); + put_b32(int.view(data >> 32)); + } + // Write a 16-bit big-endian integer. + def put_b16be(data: int) -> this { + var a = read(2), p = pos; + a[p + 0] = byte.view(data >> 8); + a[p + 1] = byte.view(data); + pos = p + 2; + } + // Write a 32-bit big-endian integer. + def put_b32be(data: int) -> this { + var a = read(4), p = pos; + a[p + 0] = byte.view(data >> 24); + a[p + 1] = byte.view(data >> 16); + a[p + 2] = byte.view(data >> 8); + a[p + 3] = byte.view(data); + pos = p + 4; + } + // Write a 64-bit big-endian integer. + def put_b64be(data: long) -> this { + put_b32be(int.view(data >> 32)); + put_b32be(int.view(data)); + } + // Write a signed 32-bit LEB. + def put_sleb32(data: int) -> this { + if (data >= 0) { + while (data >= 0x40) { + putb(0x80 | (data & 0x7F)); + data = data >> 7; + } + } else { + while ((data >> 6) != -1) { + putb(0x80 | (data & 0x7F)); + data = data >> 7; + } + } + putb(data & 0x7F); + } + // Write a signed 64-bit LEB. + def put_sleb64(data: long) -> this { + if (data >= 0) { + while (data >= 0x40) { + putb(0x80 | int.view(data & 0x7F)); + data = data >> 7; + } + } else { + while ((data >> 6) != -1) { + putb(0x80 | int.view(data & 0x7F)); + data = data >> 7; + } + } + putb(int.view(data & 0x7F)); + } + // Write an unsigned 32-bit LEB. + def put_uleb32(data: u32) -> this { + while (data >= 0x80) { + putb(int.view(0x80u | (data & 0x7F))); + data = data >> 7; + } + putb(int.view(data)); + } + // Write all the bytes of the given array to this writer. + def puta(data: Array) -> this { + if (data == null) return; + write(data); + } + // Write the bytes from the given range in the array to this writer. + def putk(data: Array, start: int, end: int) -> this { + if (data == null) return; + write(data[start ... end]); + } + // Write the bytes from the given range in the array to this writer and return {void}. + def putkv(data: Array, start: int, end: int) { + if (data == null) return; + write(data[start ... end]); + } + // Write the bytes from the given range to this writer. + def putr(data: Range) -> this { + write(data); + } + // Write {n} zero bytes. + def zero(n: u32) -> this { + var a = read(n); + for (i < a.length) a[i] = 0; + } + // Skip 5 bytes for a 32-bit LEB that will be written later. + def skip_leb32() -> u32 { + var oldpos = pos; + skip(5); + return oldpos; + } + // Overwrite a 32-bit LEB at the current position. + def overwrite_uleb32(val: int) -> this { + read(5); + for (i < 4) { + putb(val | 0x80); + val >>= 7; + } + putb(val); + } + // Set the current position to {npos}. + def resetPos(new_pos: u32) -> this { + if (pos > max) max = pos; // remember the maximum pos + if (new_pos > data.length) { + pos = u32.!(data.length); + range = null; + return; + } + pos = new_pos; + range = data[new_pos ...]; + } + // Align the current address to a multiple of {size} bytes. + def align(size: u32) -> this { + var rem = posAddress() & (size - 1); + if (rem > 0) skip(u32.!(size - rem)); + } + // Return the maximum position ever written. + def end() -> u32 { + if (pos > max) max = pos; + return max; + } + // Set the position to be the end. + def atEnd() -> this { + pos = end(); + range = data[pos ...]; + } + // Grow the internal storage of this data writer to the new length. + def grow(nlength: u32) -> this { + if (data == null) data = Array.new(int.!(nlength)); + else if (nlength > data.length) data = Ranges.grow(data, nlength); + } + // Copy this data into a new, appropriately-sized array. + def copy() -> Array { + if (data == null) return []; + return Ranges.copy(data, Array.new(int.!(end()))); + } + // Extract all data written into this writer, leaving it empty. + def extract() -> Range { + if (data == null) return null; + var result = data[0 ... end()]; + data = null; + range = null; + pos = 0; + max = 0; + return result; + } + // Send the data of this writer to the given function, avoiding an intermediate copy. + // Note that it is implementation dependent if {f} is called multiple times, e.g. if + // the internal storage is fragmented. + def send(f: Range -> R) -> R { + return if(data != null, f(data[0 ... end()])); + } + // Clear all bytes to 0 and reset the position and end. + def clear() -> this { + pos = max = 0; + var d = data; + range = d; + for (i < d.length) d[i] = 0; + } +} + +class TextReader extends Buffer { + def var line: int; // current line number + def var column: int; // current column + var skipWs = TextReader.skipWhitespace; + + def readToken(n: u32) -> Token; + def matchString(s: string) -> int; + def matchChar(c: byte) -> int; + def matchMultiple(f: byte -> bool) -> (int, u32); + def requireString(s: string) -> int; + def requireChar(c: byte) -> int; + + def skipWhitespace(); +} + +component DataReaders { + def read_range_u32(r: Range) -> u32; + def read_range_u64(r: Range) -> u64; +} + +component Arrays { +} + +component Ranges { + def dup(r: Range) -> Array; + def copy(src: Range, dst: Array) -> Array; + def grow(array: Range, n: u32) -> Range; +} + +class Token { +} \ No newline at end of file diff --git a/doc/ideas/Window.v3 b/doc/ideas/Window.v3 index 350052f4a..80972de5a 100644 --- a/doc/ideas/Window.v3 +++ b/doc/ideas/Window.v3 @@ -6,8 +6,6 @@ class Window { def var range: Range; // Current range in the data. def var pos: u32; // Position of the range in the data. - var onOob = Window.defaultOob; // Out-of-bounds behavior. - // Returns {true} if there is more data to read, or more space to write. def more() -> bool { return range.length > 0; @@ -22,7 +20,8 @@ class Window { this.pos = 0; } // Resets the range in this window to start at {new_pos} and have length {new_len}. - // If the new range is out of bounds, it will be truncated to fit into {data}. + // If the new range is out of bounds of {data}, it will be truncated to fit into + // {data}. def resetRange(new_pos: u32, new_len: u32) -> this { if (new_pos > data.length) { pos = u32.!(data.length); @@ -38,104 +37,48 @@ class Window { pos = new_pos; range = data[new_pos ..+ new_len]; } - // Resets the position to {new_pos}. + // Resets the position to {new_pos}. If the new position is out of bounds of {data}, + // it will be truncated. def resetPos(new_pos: u32) -> this { if (new_pos > data.length) { pos = u32.!(data.length); range = null; - return; + } else { + pos = new_pos; + range = data[new_pos ...]; } - pos = new_pos; - range = data[new_pos ...]; } // Peek at the contiguous range of data starting at {pos}, i.e. {range[0 ... n]}, but - // do not advance the position. The resulting range aliases {data}. + // do not advance the position. If the current range is smaller than {n}, then return + // only the remaining range. def peek(n: u32) -> Range { - if (checkOob(n)) return null; // TODO: partial peek? + if (range.length < n) return range; return range[0 ... n]; } - // Peek at one element of data, but do not advance the psotion. - def peekOne() -> T { - var d: T; - if (checkOob(1)) return d; - return range[0]; - } // View a contiguous range of data starting at {pos}, i.e. {range[0 ... n]} and advance - // the position. The resulting range aliases {data}. - def read(n: u32) -> Range { - if (checkOob(n)) return null; // TODO: partial read? + // the position. If the current range is smaller than {n}, then return only the remaining + // range. + def advance(n: u32) -> Range { + if (range.length < n) { + var result = range; + pos += u32.!(range.length); + range = null; + return result; + } var result = range[0 ... n]; range = range[n ...]; pos += n; return result; } - // Copy a contiguous range of data starting at {pos}, i.e. {range[0 ... n]} and advance - // the position. The result is a fresh copy. - def readCopy(n: u32) -> Array { - if (checkOob(n)) return null; // TODO: partial read? - var result = Array.new(int.!(n)); - for (i < result.length) result[i] = range[i]; - range = range[n ...]; - pos += n; - return result; - } - // Copy a contiguous range of data starting at {pos}, i.e. {range[0 ... n]} into {out} - // and advance the position. - def readInto(out: Range) -> u32 { - var n = u32.!(out.length); - if (checkOob(n)) return 0; // TODO: partial read? - for (i < out.length) out[i] = range[i]; - range = range[n ...]; - pos += n; - return n; - } - // Write a contiguous range of data starting at {pos}, i.e. {range[0 ...n]} elements - // and advance the position. - def write(input: Range) -> u32 { - var n = u32.!(input.length); - if (checkOob(n)) return 0; // TODO: partial write? - for (i < input.length) range[i] = input[i]; - range = range[n ...]; - pos += n; - return n; - } - // Write a single element of data and advance the position. - def writeOne(e: T) -> this { - var n = 1u; - if (checkOob(n)) return; - range[0] = e; - pos++; - } - - //======================================================================================== - // Out-of-bounds behavior. - //======================================================================================== - private def checkOob(n: u32) -> bool { - while (range.length < n) { - var retry = onOob(this, n, u32.!(range.length) - n); - if (!retry) return true; + // Skip {n} elements of data, advancing the position. If the current range is smaller than + // {n}, advance by its length. + def skip(n: u32) -> this { + if (range.length < n) { + pos += u32.!(range.length); + range = null; + } else { + pos += n; + range = data[n ...]; } - return false; } - - def defaultOob(tried: u32, over: u32) -> bool { - var x = range[range.length]; // trigger BCE - return false; - } -} - -def newGrowingOutputBuffer(n: u32) -> Window { - var w = Window.new().resetData(Array.new(int.!(n))); - w.onOob = growWindow; - return w; -} - -def growWindow(w: Window, tried: u32, over: u32) -> bool { - var ppos = w.pos; - var nsize = u64.!(w.data.length) * 2 + tried; - if (nsize > int.max) return false; - var ndata = Array.new(int.!(nsize)); - for (i < w.data.length) ndata[i] = w.data[i]; - w.resetData(ndata).resetPos(ppos); - return true; }