From 64557bf9ab78545c6bdb30154702252b7b5415a7 Mon Sep 17 00:00:00 2001 From: Tim Culverhouse Date: Mon, 22 Jan 2024 10:26:33 -0600 Subject: [PATCH] parser: use a ring buffer to store raw text Signed-off-by: Tim Culverhouse --- src/GraphemeCache.zig | 69 ++++----------------------------------- src/Key.zig | 11 +++++-- src/Tty.zig | 7 +--- src/parser.zig | 34 +++++++++---------- src/vaxis.zig | 6 ---- src/widgets/TextInput.zig | 26 ++++++++++----- 6 files changed, 49 insertions(+), 104 deletions(-) diff --git a/src/GraphemeCache.zig b/src/GraphemeCache.zig index 2d3dc29..ee7b323 100644 --- a/src/GraphemeCache.zig +++ b/src/GraphemeCache.zig @@ -9,70 +9,13 @@ buf: [1024 * 8]u8 = undefined, // the start index of the next grapheme idx: usize = 0, -/// the cache of graphemes. This allows up to 2048 graphemes with 4 codepoints -/// each -grapheme_buf: [1024 * 8 / 4]Grapheme = undefined, - -// index of our next grapheme -g_idx: u21 = 0, - -pub const UNICODE_MAX = 1_114_112; - -const Grapheme = struct { - // codepoint is an index into the internal storage - codepoint: u21, - start: usize, - end: usize, -}; - /// put a slice of bytes in the cache as a grapheme -pub fn put(self: *GraphemeCache, bytes: []const u8) !u21 { - // See if we already have these bytes. It's a likely case that if we get one - // grapheme, we'll get it again. So this will save a lot of storage and is - // most likely worth the cost as it's pretty rare - for (self.grapheme_buf) |grapheme| { - const g_bytes = self.buf[grapheme.start..grapheme.end]; - if (std.mem.eql(u8, g_bytes, bytes)) { - return grapheme.codepoint; - } - } - if (self.idx + bytes.len > self.buf.len) return error.OutOfGraphemeBufferMemory; - if (self.g_idx + 1 > self.grapheme_buf.len) return error.OutOfGraphemeMemory; - +pub fn put(self: *GraphemeCache, bytes: []const u8) []u8 { + // reset the idx to 0 if we would overflow + if (self.idx + bytes.len > self.buf.len) self.idx = 0; + defer self.idx += bytes.len; // copy the grapheme to our storage @memcpy(self.buf[self.idx .. self.idx + bytes.len], bytes); - - const g = Grapheme{ - // assign a codepoint that is always outside of valid unicode - .codepoint = self.g_idx + UNICODE_MAX + 1, - .start = self.idx, - .end = self.idx + bytes.len, - }; - self.grapheme_buf[self.g_idx] = g; - self.g_idx += 1; - self.idx += bytes.len; - - return g.codepoint; -} - -/// get the slice of bytes for a given grapheme -pub fn get(self: *GraphemeCache, cp: u21) ![]const u8 { - if (cp < (UNICODE_MAX + 1)) return error.InvalidGraphemeIndex; - const idx: usize = cp - UNICODE_MAX - 1; - if (idx > self.g_idx) return error.InvalidGraphemeIndex; - const g = self.grapheme_buf[idx]; - return self.buf[g.start..g.end]; -} - -test "GraphemeCache: roundtrip" { - var cache: GraphemeCache = .{}; - const cp = try cache.put("abc"); - const bytes = try cache.get(cp); - try testing.expectEqualStrings("abc", bytes); - - const cp_2 = try cache.put("abc"); - try testing.expectEqual(cp, cp_2); - - const cp_3 = try cache.put("def"); - try testing.expectEqual(cp + 1, cp_3); + // return the slice + return self.buf[self.idx .. self.idx + bytes.len]; } diff --git a/src/Key.zig b/src/Key.zig index 6bace5a..debc737 100644 --- a/src/Key.zig +++ b/src/Key.zig @@ -14,9 +14,10 @@ pub const Modifiers = packed struct(u8) { /// the unicode codepoint of the key event. codepoint: u21, -/// the text generated from the key event. This will only contain a value if the -/// event generated a multi-codepoint grapheme. If there was only a single -/// codepoint, library users can encode the codepoint directly +/// the text generated from the key event. The underlying slice has a limited +/// lifetime. Vaxis maintains an internal ring buffer to temporarily store text. +/// If the application needs these values longer than the lifetime of the event +/// it must copy the data. text: ?[]const u8 = null, /// the shifted codepoint of this key event. This will only be present if the @@ -36,6 +37,10 @@ pub const escape: u21 = 0x1B; pub const space: u21 = 0x20; pub const backspace: u21 = 0x7F; +// multicodepoint is a key which generated text but cannot be expressed as a +// single codepoint. The value is the maximum unicode codepoint + 1 +pub const multicodepoint: u21 = 1_114_112 + 1; + // kitty encodes these keys directly in the private use area. We reuse those // mappings pub const insert: u21 = 57348; diff --git a/src/Tty.zig b/src/Tty.zig index 6c9b562..6244b33 100644 --- a/src/Tty.zig +++ b/src/Tty.zig @@ -143,12 +143,7 @@ pub fn run( switch (event) { .key_press => |key| { if (@hasField(EventType, "key_press")) { - // HACK: yuck. there has to be a better way - var mut_key = key; - if (key.text) |text| { - mut_key.codepoint = try vx.g_cache.put(text); - } - vx.postEvent(.{ .key_press = mut_key }); + vx.postEvent(.{ .key_press = key }); } }, .focus_in => { diff --git a/src/parser.zig b/src/parser.zig index ca5d191..06f4ed3 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -4,7 +4,6 @@ const Event = @import("event.zig").Event; const Key = @import("Key.zig"); const CodePointIterator = @import("ziglyph").CodePointIterator; const graphemeBreak = @import("ziglyph").graphemeBreak; -const UNICODE_MAX = @import("GraphemeCache.zig").UNICODE_MAX; const log = std.log.scoped(.parser); @@ -88,26 +87,18 @@ pub fn parse(input: []const u8) !Result { var cp = iter.next() orelse return .{ .event = null, .n = 0 }; var code = cp.code; - const g_start = i; i += cp.len - 1; // subtract one for the loop iter var g_state: u3 = 0; while (iter.next()) |next_cp| { if (graphemeBreak(cp.code, next_cp.code, &g_state)) { break; } - code = UNICODE_MAX + 1; + code = Key.multicodepoint; i += next_cp.len; cp = next_cp; } - const text: ?[]const u8 = multi: { - if (code > UNICODE_MAX) { - break :multi input[g_start .. i + 1]; - } else { - break :multi null; - } - }; - break :blk .{ .codepoint = code, .text = text }; + break :blk .{ .codepoint = code, .text = input[start .. i + 1] }; }, }; return .{ @@ -366,7 +357,10 @@ pub fn parse(input: []const u8) !Result { test "parse: single xterm keypress" { const input = "a"; const result = try parse(input); - const expected_key: Key = .{ .codepoint = 'a' }; + const expected_key: Key = .{ + .codepoint = 'a', + .text = "a", + }; const expected_event: Event = .{ .key_press = expected_key }; try testing.expectEqual(1, result.n); @@ -376,11 +370,15 @@ test "parse: single xterm keypress" { test "parse: single xterm keypress with more buffer" { const input = "ab"; const result = try parse(input); - const expected_key: Key = .{ .codepoint = 'a' }; + const expected_key: Key = .{ + .codepoint = 'a', + .text = "a", + }; const expected_event: Event = .{ .key_press = expected_key }; try testing.expectEqual(1, result.n); - try testing.expectEqual(expected_event, result.event); + try testing.expectEqualStrings(expected_key.text.?, result.event.?.key_press.text.?); + try testing.expectEqualDeep(expected_event, result.event); } test "parse: xterm escape keypress" { @@ -546,6 +544,7 @@ test "parse: single codepoint" { const result = try parse(input); const expected_key: Key = .{ .codepoint = 0x1F642, + .text = input, }; const expected_event: Event = .{ .key_press = expected_key }; @@ -558,11 +557,12 @@ test "parse: single codepoint with more in buffer" { const result = try parse(input); const expected_key: Key = .{ .codepoint = 0x1F642, + .text = "🙂", }; const expected_event: Event = .{ .key_press = expected_key }; try testing.expectEqual(4, result.n); - try testing.expectEqual(expected_event, result.event); + try testing.expectEqualDeep(expected_event, result.event); } test "parse: multiple codepoint grapheme" { @@ -571,7 +571,7 @@ test "parse: multiple codepoint grapheme" { const input = "👩‍🚀"; const result = try parse(input); const expected_key: Key = .{ - .codepoint = UNICODE_MAX + 1, + .codepoint = Key.multicodepoint, .text = input, }; const expected_event: Event = .{ .key_press = expected_key }; @@ -586,7 +586,7 @@ test "parse: multiple codepoint grapheme with more after" { const input = "👩‍🚀abc"; const result = try parse(input); const expected_key: Key = .{ - .codepoint = UNICODE_MAX + 1, + .codepoint = Key.multicodepoint, .text = "👩‍🚀", }; diff --git a/src/vaxis.zig b/src/vaxis.zig index 7f231d1..68bc82b 100644 --- a/src/vaxis.zig +++ b/src/vaxis.zig @@ -82,12 +82,6 @@ pub fn Vaxis(comptime T: type) type { const tpr = @divTrunc(self.render_dur, self.renders); log.info("total renders = {d}", .{self.renders}); log.info("microseconds per render = {d}", .{tpr}); - log.info("cached graphemes n = {d} / {d}, bytes = {d} / {d}", .{ - self.g_cache.g_idx, - self.g_cache.grapheme_buf.len, - self.g_cache.idx, - self.g_cache.buf.len, - }); } } diff --git a/src/widgets/TextInput.zig b/src/widgets/TextInput.zig index 7a6ee9f..3f10803 100644 --- a/src/widgets/TextInput.zig +++ b/src/widgets/TextInput.zig @@ -2,6 +2,8 @@ const std = @import("std"); const Cell = @import("../cell.zig").Cell; const Key = @import("../Key.zig"); const Window = @import("../Window.zig"); +const GraphemeIterator = @import("ziglyph").GraphemeIterator; +const strWidth = @import("ziglyph").display_width.strWidth; const log = std.log.scoped(.text_input); @@ -22,12 +24,12 @@ buffer_idx: usize = 0, pub fn update(self: *TextInput, event: Event) void { switch (event) { .key_press => |key| { + log.info("key : {}", .{key}); + if (key.text) |text| { + @memcpy(self.buffer[self.buffer_idx .. self.buffer_idx + text.len], text); + self.buffer_idx += text.len; + } switch (key.codepoint) { - 0x20...0x7E => { - self.buffer[self.buffer_idx] = @truncate(key.codepoint); - self.buffer_idx += 1; - self.cursor_idx += 1; - }, Key.backspace => { // TODO: this only works at the end of the array. Then // again, we don't have any means to move the cursor yet @@ -41,12 +43,18 @@ pub fn update(self: *TextInput, event: Event) void { } pub fn draw(self: *TextInput, win: Window) void { - for (0.., self.buffer[0..self.buffer_idx]) |i, b| { - win.writeCell(i, 0, .{ + const input = self.buffer[0..self.buffer_idx]; + var iter = GraphemeIterator.init(input); + var col: usize = 0; + while (iter.next()) |grapheme| { + const g = grapheme.slice(input); + const w = strWidth(g, .full) catch 1; + win.writeCell(col, 0, .{ .char = .{ - .grapheme = &[_]u8{b}, - .width = 1, + .grapheme = g, + .width = w, }, }); + col += w; } }