parser: use a ring buffer to store raw text

Signed-off-by: Tim Culverhouse <tim@timculverhouse.com>
This commit is contained in:
Tim Culverhouse 2024-01-22 10:26:33 -06:00
parent 393279d978
commit 64557bf9ab
6 changed files with 49 additions and 104 deletions

View file

@ -9,70 +9,13 @@ buf: [1024 * 8]u8 = undefined,
// the start index of the next grapheme // the start index of the next grapheme
idx: usize = 0, idx: usize = 0,
/// the cache of graphemes. This allows up to 2048 graphemes with 4 codepoints
/// each
grapheme_buf: [1024 * 8 / 4]Grapheme = undefined,
// index of our next grapheme
g_idx: u21 = 0,
pub const UNICODE_MAX = 1_114_112;
const Grapheme = struct {
// codepoint is an index into the internal storage
codepoint: u21,
start: usize,
end: usize,
};
/// put a slice of bytes in the cache as a grapheme /// put a slice of bytes in the cache as a grapheme
pub fn put(self: *GraphemeCache, bytes: []const u8) !u21 { pub fn put(self: *GraphemeCache, bytes: []const u8) []u8 {
// See if we already have these bytes. It's a likely case that if we get one // reset the idx to 0 if we would overflow
// grapheme, we'll get it again. So this will save a lot of storage and is if (self.idx + bytes.len > self.buf.len) self.idx = 0;
// most likely worth the cost as it's pretty rare defer self.idx += bytes.len;
for (self.grapheme_buf) |grapheme| {
const g_bytes = self.buf[grapheme.start..grapheme.end];
if (std.mem.eql(u8, g_bytes, bytes)) {
return grapheme.codepoint;
}
}
if (self.idx + bytes.len > self.buf.len) return error.OutOfGraphemeBufferMemory;
if (self.g_idx + 1 > self.grapheme_buf.len) return error.OutOfGraphemeMemory;
// copy the grapheme to our storage // copy the grapheme to our storage
@memcpy(self.buf[self.idx .. self.idx + bytes.len], bytes); @memcpy(self.buf[self.idx .. self.idx + bytes.len], bytes);
// return the slice
const g = Grapheme{ return self.buf[self.idx .. self.idx + bytes.len];
// assign a codepoint that is always outside of valid unicode
.codepoint = self.g_idx + UNICODE_MAX + 1,
.start = self.idx,
.end = self.idx + bytes.len,
};
self.grapheme_buf[self.g_idx] = g;
self.g_idx += 1;
self.idx += bytes.len;
return g.codepoint;
}
/// get the slice of bytes for a given grapheme
pub fn get(self: *GraphemeCache, cp: u21) ![]const u8 {
if (cp < (UNICODE_MAX + 1)) return error.InvalidGraphemeIndex;
const idx: usize = cp - UNICODE_MAX - 1;
if (idx > self.g_idx) return error.InvalidGraphemeIndex;
const g = self.grapheme_buf[idx];
return self.buf[g.start..g.end];
}
test "GraphemeCache: roundtrip" {
var cache: GraphemeCache = .{};
const cp = try cache.put("abc");
const bytes = try cache.get(cp);
try testing.expectEqualStrings("abc", bytes);
const cp_2 = try cache.put("abc");
try testing.expectEqual(cp, cp_2);
const cp_3 = try cache.put("def");
try testing.expectEqual(cp + 1, cp_3);
} }

View file

@ -14,9 +14,10 @@ pub const Modifiers = packed struct(u8) {
/// the unicode codepoint of the key event. /// the unicode codepoint of the key event.
codepoint: u21, codepoint: u21,
/// the text generated from the key event. This will only contain a value if the /// the text generated from the key event. The underlying slice has a limited
/// event generated a multi-codepoint grapheme. If there was only a single /// lifetime. Vaxis maintains an internal ring buffer to temporarily store text.
/// codepoint, library users can encode the codepoint directly /// If the application needs these values longer than the lifetime of the event
/// it must copy the data.
text: ?[]const u8 = null, text: ?[]const u8 = null,
/// the shifted codepoint of this key event. This will only be present if the /// the shifted codepoint of this key event. This will only be present if the
@ -36,6 +37,10 @@ pub const escape: u21 = 0x1B;
pub const space: u21 = 0x20; pub const space: u21 = 0x20;
pub const backspace: u21 = 0x7F; pub const backspace: u21 = 0x7F;
// multicodepoint is a key which generated text but cannot be expressed as a
// single codepoint. The value is the maximum unicode codepoint + 1
pub const multicodepoint: u21 = 1_114_112 + 1;
// kitty encodes these keys directly in the private use area. We reuse those // kitty encodes these keys directly in the private use area. We reuse those
// mappings // mappings
pub const insert: u21 = 57348; pub const insert: u21 = 57348;

View file

@ -143,12 +143,7 @@ pub fn run(
switch (event) { switch (event) {
.key_press => |key| { .key_press => |key| {
if (@hasField(EventType, "key_press")) { if (@hasField(EventType, "key_press")) {
// HACK: yuck. there has to be a better way vx.postEvent(.{ .key_press = key });
var mut_key = key;
if (key.text) |text| {
mut_key.codepoint = try vx.g_cache.put(text);
}
vx.postEvent(.{ .key_press = mut_key });
} }
}, },
.focus_in => { .focus_in => {

View file

@ -4,7 +4,6 @@ const Event = @import("event.zig").Event;
const Key = @import("Key.zig"); const Key = @import("Key.zig");
const CodePointIterator = @import("ziglyph").CodePointIterator; const CodePointIterator = @import("ziglyph").CodePointIterator;
const graphemeBreak = @import("ziglyph").graphemeBreak; const graphemeBreak = @import("ziglyph").graphemeBreak;
const UNICODE_MAX = @import("GraphemeCache.zig").UNICODE_MAX;
const log = std.log.scoped(.parser); const log = std.log.scoped(.parser);
@ -88,26 +87,18 @@ pub fn parse(input: []const u8) !Result {
var cp = iter.next() orelse return .{ .event = null, .n = 0 }; var cp = iter.next() orelse return .{ .event = null, .n = 0 };
var code = cp.code; var code = cp.code;
const g_start = i;
i += cp.len - 1; // subtract one for the loop iter i += cp.len - 1; // subtract one for the loop iter
var g_state: u3 = 0; var g_state: u3 = 0;
while (iter.next()) |next_cp| { while (iter.next()) |next_cp| {
if (graphemeBreak(cp.code, next_cp.code, &g_state)) { if (graphemeBreak(cp.code, next_cp.code, &g_state)) {
break; break;
} }
code = UNICODE_MAX + 1; code = Key.multicodepoint;
i += next_cp.len; i += next_cp.len;
cp = next_cp; cp = next_cp;
} }
const text: ?[]const u8 = multi: {
if (code > UNICODE_MAX) {
break :multi input[g_start .. i + 1];
} else {
break :multi null;
}
};
break :blk .{ .codepoint = code, .text = text }; break :blk .{ .codepoint = code, .text = input[start .. i + 1] };
}, },
}; };
return .{ return .{
@ -366,7 +357,10 @@ pub fn parse(input: []const u8) !Result {
test "parse: single xterm keypress" { test "parse: single xterm keypress" {
const input = "a"; const input = "a";
const result = try parse(input); const result = try parse(input);
const expected_key: Key = .{ .codepoint = 'a' }; const expected_key: Key = .{
.codepoint = 'a',
.text = "a",
};
const expected_event: Event = .{ .key_press = expected_key }; const expected_event: Event = .{ .key_press = expected_key };
try testing.expectEqual(1, result.n); try testing.expectEqual(1, result.n);
@ -376,11 +370,15 @@ test "parse: single xterm keypress" {
test "parse: single xterm keypress with more buffer" { test "parse: single xterm keypress with more buffer" {
const input = "ab"; const input = "ab";
const result = try parse(input); const result = try parse(input);
const expected_key: Key = .{ .codepoint = 'a' }; const expected_key: Key = .{
.codepoint = 'a',
.text = "a",
};
const expected_event: Event = .{ .key_press = expected_key }; const expected_event: Event = .{ .key_press = expected_key };
try testing.expectEqual(1, result.n); try testing.expectEqual(1, result.n);
try testing.expectEqual(expected_event, result.event); try testing.expectEqualStrings(expected_key.text.?, result.event.?.key_press.text.?);
try testing.expectEqualDeep(expected_event, result.event);
} }
test "parse: xterm escape keypress" { test "parse: xterm escape keypress" {
@ -546,6 +544,7 @@ test "parse: single codepoint" {
const result = try parse(input); const result = try parse(input);
const expected_key: Key = .{ const expected_key: Key = .{
.codepoint = 0x1F642, .codepoint = 0x1F642,
.text = input,
}; };
const expected_event: Event = .{ .key_press = expected_key }; const expected_event: Event = .{ .key_press = expected_key };
@ -558,11 +557,12 @@ test "parse: single codepoint with more in buffer" {
const result = try parse(input); const result = try parse(input);
const expected_key: Key = .{ const expected_key: Key = .{
.codepoint = 0x1F642, .codepoint = 0x1F642,
.text = "🙂",
}; };
const expected_event: Event = .{ .key_press = expected_key }; const expected_event: Event = .{ .key_press = expected_key };
try testing.expectEqual(4, result.n); try testing.expectEqual(4, result.n);
try testing.expectEqual(expected_event, result.event); try testing.expectEqualDeep(expected_event, result.event);
} }
test "parse: multiple codepoint grapheme" { test "parse: multiple codepoint grapheme" {
@ -571,7 +571,7 @@ test "parse: multiple codepoint grapheme" {
const input = "👩‍🚀"; const input = "👩‍🚀";
const result = try parse(input); const result = try parse(input);
const expected_key: Key = .{ const expected_key: Key = .{
.codepoint = UNICODE_MAX + 1, .codepoint = Key.multicodepoint,
.text = input, .text = input,
}; };
const expected_event: Event = .{ .key_press = expected_key }; const expected_event: Event = .{ .key_press = expected_key };
@ -586,7 +586,7 @@ test "parse: multiple codepoint grapheme with more after" {
const input = "👩🚀abc"; const input = "👩🚀abc";
const result = try parse(input); const result = try parse(input);
const expected_key: Key = .{ const expected_key: Key = .{
.codepoint = UNICODE_MAX + 1, .codepoint = Key.multicodepoint,
.text = "👩‍🚀", .text = "👩‍🚀",
}; };

View file

@ -82,12 +82,6 @@ pub fn Vaxis(comptime T: type) type {
const tpr = @divTrunc(self.render_dur, self.renders); const tpr = @divTrunc(self.render_dur, self.renders);
log.info("total renders = {d}", .{self.renders}); log.info("total renders = {d}", .{self.renders});
log.info("microseconds per render = {d}", .{tpr}); log.info("microseconds per render = {d}", .{tpr});
log.info("cached graphemes n = {d} / {d}, bytes = {d} / {d}", .{
self.g_cache.g_idx,
self.g_cache.grapheme_buf.len,
self.g_cache.idx,
self.g_cache.buf.len,
});
} }
} }

View file

@ -2,6 +2,8 @@ const std = @import("std");
const Cell = @import("../cell.zig").Cell; const Cell = @import("../cell.zig").Cell;
const Key = @import("../Key.zig"); const Key = @import("../Key.zig");
const Window = @import("../Window.zig"); const Window = @import("../Window.zig");
const GraphemeIterator = @import("ziglyph").GraphemeIterator;
const strWidth = @import("ziglyph").display_width.strWidth;
const log = std.log.scoped(.text_input); const log = std.log.scoped(.text_input);
@ -22,12 +24,12 @@ buffer_idx: usize = 0,
pub fn update(self: *TextInput, event: Event) void { pub fn update(self: *TextInput, event: Event) void {
switch (event) { switch (event) {
.key_press => |key| { .key_press => |key| {
log.info("key : {}", .{key});
if (key.text) |text| {
@memcpy(self.buffer[self.buffer_idx .. self.buffer_idx + text.len], text);
self.buffer_idx += text.len;
}
switch (key.codepoint) { switch (key.codepoint) {
0x20...0x7E => {
self.buffer[self.buffer_idx] = @truncate(key.codepoint);
self.buffer_idx += 1;
self.cursor_idx += 1;
},
Key.backspace => { Key.backspace => {
// TODO: this only works at the end of the array. Then // TODO: this only works at the end of the array. Then
// again, we don't have any means to move the cursor yet // again, we don't have any means to move the cursor yet
@ -41,12 +43,18 @@ pub fn update(self: *TextInput, event: Event) void {
} }
pub fn draw(self: *TextInput, win: Window) void { pub fn draw(self: *TextInput, win: Window) void {
for (0.., self.buffer[0..self.buffer_idx]) |i, b| { const input = self.buffer[0..self.buffer_idx];
win.writeCell(i, 0, .{ var iter = GraphemeIterator.init(input);
var col: usize = 0;
while (iter.next()) |grapheme| {
const g = grapheme.slice(input);
const w = strWidth(g, .full) catch 1;
win.writeCell(col, 0, .{
.char = .{ .char = .{
.grapheme = &[_]u8{b}, .grapheme = g,
.width = 1, .width = w,
}, },
}); });
col += w;
} }
} }