window(wrap): refactor word wrap
Commit 74fb130797
"fix: `Window.printSegments` correctly prints all
non-trailing whitespace" fixed some bugs with word wrapping, but also
introduced a bug with printing leading whitespace in a segment.
Refactor the entire word wrap logic to use a custom LineIterator and a
tokenizer which gives whitespace tokens as well.
This commit is contained in:
parent
7c03077177
commit
935c5a54bc
1 changed files with 183 additions and 81 deletions
192
src/Window.zig
192
src/Window.zig
|
@ -328,34 +328,31 @@ pub fn print(self: Window, segments: []const Segment, opts: PrintOptions) !Print
|
||||||
.word => {
|
.word => {
|
||||||
var col: usize = opts.col_offset;
|
var col: usize = opts.col_offset;
|
||||||
var overflow: bool = false;
|
var overflow: bool = false;
|
||||||
var soft_wrapped = false;
|
var soft_wrapped: bool = false;
|
||||||
for (segments) |segment| {
|
outer: for (segments) |segment| {
|
||||||
var start: usize = 0;
|
var line_iter: LineIterator = .{ .buf = segment.text };
|
||||||
var tokenizer = std.mem.tokenizeAny(u8, segment.text, "\r\n");
|
while (line_iter.next()) |line| {
|
||||||
while (tokenizer.peek() != null) {
|
defer {
|
||||||
|
// We only set soft_wrapped to false if a segment actually contains a linebreak
|
||||||
|
if (line_iter.has_break) {
|
||||||
soft_wrapped = false;
|
soft_wrapped = false;
|
||||||
const returns = segment.text[start..tokenizer.index];
|
|
||||||
const line = tokenizer.next().?;
|
|
||||||
start = tokenizer.index;
|
|
||||||
var i: usize = 0;
|
|
||||||
while (i < returns.len) : (i += 1) {
|
|
||||||
const b = returns[i];
|
|
||||||
if (b == '\r' and i + 1 < returns.len and returns[i + 1] == '\n') {
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
row += 1;
|
row += 1;
|
||||||
col = 0;
|
col = 0;
|
||||||
}
|
}
|
||||||
var iter = std.mem.tokenizeScalar(u8, line, ' ');
|
}
|
||||||
var ws_start: usize = 0;
|
var iter: WhitespaceTokenizer = .{ .buf = line };
|
||||||
while (iter.peek() != null) {
|
while (iter.next()) |token| {
|
||||||
const whitespace = line[ws_start..iter.index];
|
switch (token) {
|
||||||
const word = iter.next().?;
|
.whitespace => |len| {
|
||||||
ws_start = iter.index;
|
if (soft_wrapped) continue;
|
||||||
var j: usize = 0;
|
for (0..len) |_| {
|
||||||
if (soft_wrapped) soft_wrapped = false else {
|
if (col >= self.width) {
|
||||||
while (j < whitespace.len) : (j += 1) {
|
col = 0;
|
||||||
if (opts.commit) self.writeCell(col, row, .{
|
row += 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (opts.commit) {
|
||||||
|
self.writeCell(col, row, .{
|
||||||
.char = .{
|
.char = .{
|
||||||
.grapheme = " ",
|
.grapheme = " ",
|
||||||
.width = 1,
|
.width = 1,
|
||||||
|
@ -363,27 +360,24 @@ pub fn print(self: Window, segments: []const Segment, opts: PrintOptions) !Print
|
||||||
.style = segment.style,
|
.style = segment.style,
|
||||||
.link = segment.link,
|
.link = segment.link,
|
||||||
});
|
});
|
||||||
|
}
|
||||||
col += 1;
|
col += 1;
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
if (col >= self.width) {
|
.word => |word| {
|
||||||
col = 0;
|
|
||||||
row += 1;
|
|
||||||
soft_wrapped = true;
|
|
||||||
}
|
|
||||||
const width = self.gwidth(word);
|
const width = self.gwidth(word);
|
||||||
if (width + col > self.width and width < self.width) {
|
if (width + col > self.width and width < self.width) {
|
||||||
row += 1;
|
row += 1;
|
||||||
col = 0;
|
col = 0;
|
||||||
}
|
}
|
||||||
if (row >= self.height) {
|
|
||||||
overflow = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
var grapheme_iterator = self.screen.unicode.graphemeIterator(word);
|
var grapheme_iterator = self.screen.unicode.graphemeIterator(word);
|
||||||
while (grapheme_iterator.next()) |grapheme| {
|
while (grapheme_iterator.next()) |grapheme| {
|
||||||
soft_wrapped = false;
|
soft_wrapped = false;
|
||||||
|
if (row >= self.height) {
|
||||||
|
overflow = true;
|
||||||
|
break :outer;
|
||||||
|
}
|
||||||
const s = grapheme.bytes(word);
|
const s = grapheme.bytes(word);
|
||||||
const w = self.gwidth(s);
|
const w = self.gwidth(s);
|
||||||
if (opts.commit) self.writeCell(col, row, .{
|
if (opts.commit) self.writeCell(col, row, .{
|
||||||
|
@ -401,18 +395,8 @@ pub fn print(self: Window, segments: []const Segment, opts: PrintOptions) !Print
|
||||||
soft_wrapped = true;
|
soft_wrapped = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
},
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
const returns = segment.text[start..tokenizer.index];
|
|
||||||
start = tokenizer.index;
|
|
||||||
var i: usize = 0;
|
|
||||||
while (i < returns.len) : (i += 1) {
|
|
||||||
const b = returns[i];
|
|
||||||
if (b == '\r' and i + 1 < returns.len and returns[i + 1] == '\n') {
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
row += 1;
|
|
||||||
col = 0;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -637,6 +621,24 @@ test "print: word" {
|
||||||
try std.testing.expectEqual(0, result.row);
|
try std.testing.expectEqual(0, result.row);
|
||||||
try std.testing.expectEqual(false, result.overflow);
|
try std.testing.expectEqual(false, result.overflow);
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
var segments = [_]Segment{
|
||||||
|
.{ .text = " " },
|
||||||
|
};
|
||||||
|
const result = try win.print(&segments, opts);
|
||||||
|
try std.testing.expectEqual(1, result.col);
|
||||||
|
try std.testing.expectEqual(0, result.row);
|
||||||
|
try std.testing.expectEqual(false, result.overflow);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
var segments = [_]Segment{
|
||||||
|
.{ .text = " a" },
|
||||||
|
};
|
||||||
|
const result = try win.print(&segments, opts);
|
||||||
|
try std.testing.expectEqual(2, result.col);
|
||||||
|
try std.testing.expectEqual(0, result.row);
|
||||||
|
try std.testing.expectEqual(false, result.overflow);
|
||||||
|
}
|
||||||
{
|
{
|
||||||
var segments = [_]Segment{
|
var segments = [_]Segment{
|
||||||
.{ .text = "a b" },
|
.{ .text = "a b" },
|
||||||
|
@ -750,4 +752,104 @@ test "print: word" {
|
||||||
try std.testing.expectEqual(1, result.row);
|
try std.testing.expectEqual(1, result.row);
|
||||||
try std.testing.expectEqual(false, result.overflow);
|
try std.testing.expectEqual(false, result.overflow);
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
var segments = [_]Segment{
|
||||||
|
.{ .text = "note" },
|
||||||
|
.{ .text = " now" },
|
||||||
|
};
|
||||||
|
const result = try win.print(&segments, opts);
|
||||||
|
try std.testing.expectEqual(3, result.col);
|
||||||
|
try std.testing.expectEqual(1, result.row);
|
||||||
|
try std.testing.expectEqual(false, result.overflow);
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
var segments = [_]Segment{
|
||||||
|
.{ .text = "note " },
|
||||||
|
.{ .text = "now" },
|
||||||
|
};
|
||||||
|
const result = try win.print(&segments, opts);
|
||||||
|
try std.testing.expectEqual(3, result.col);
|
||||||
|
try std.testing.expectEqual(1, result.row);
|
||||||
|
try std.testing.expectEqual(false, result.overflow);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Iterates a slice of bytes by linebreaks. Lines are split by '\r', '\n', or '\r\n'
|
||||||
|
const LineIterator = struct {
|
||||||
|
buf: []const u8,
|
||||||
|
index: usize = 0,
|
||||||
|
has_break: bool = true,
|
||||||
|
|
||||||
|
fn next(self: *LineIterator) ?[]const u8 {
|
||||||
|
if (self.index >= self.buf.len) return null;
|
||||||
|
|
||||||
|
const start = self.index;
|
||||||
|
const end = std.mem.indexOfAnyPos(u8, self.buf, self.index, "\r\n") orelse {
|
||||||
|
if (start == 0) self.has_break = false;
|
||||||
|
self.index = self.buf.len;
|
||||||
|
return self.buf[start..];
|
||||||
|
};
|
||||||
|
|
||||||
|
self.index = end;
|
||||||
|
self.consumeCR();
|
||||||
|
self.consumeLF();
|
||||||
|
return self.buf[start..end];
|
||||||
|
}
|
||||||
|
|
||||||
|
// consumes a \n byte
|
||||||
|
fn consumeLF(self: *LineIterator) void {
|
||||||
|
if (self.index >= self.buf.len) return;
|
||||||
|
if (self.buf[self.index] == '\n') self.index += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// consumes a \r byte
|
||||||
|
fn consumeCR(self: *LineIterator) void {
|
||||||
|
if (self.index >= self.buf.len) return;
|
||||||
|
if (self.buf[self.index] == '\r') self.index += 1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Returns tokens of text and whitespace
|
||||||
|
const WhitespaceTokenizer = struct {
|
||||||
|
buf: []const u8,
|
||||||
|
index: usize = 0,
|
||||||
|
|
||||||
|
const Token = union(enum) {
|
||||||
|
// the length of whitespace. Tab = 8
|
||||||
|
whitespace: usize,
|
||||||
|
word: []const u8,
|
||||||
|
};
|
||||||
|
|
||||||
|
fn next(self: *WhitespaceTokenizer) ?Token {
|
||||||
|
if (self.index >= self.buf.len) return null;
|
||||||
|
const Mode = enum {
|
||||||
|
whitespace,
|
||||||
|
word,
|
||||||
|
};
|
||||||
|
const first = self.buf[self.index];
|
||||||
|
const mode: Mode = if (first == ' ' or first == '\t') .whitespace else .word;
|
||||||
|
switch (mode) {
|
||||||
|
.whitespace => {
|
||||||
|
var len: usize = 0;
|
||||||
|
while (self.index < self.buf.len) : (self.index += 1) {
|
||||||
|
switch (self.buf[self.index]) {
|
||||||
|
' ' => len += 1,
|
||||||
|
'\t' => len += 8,
|
||||||
|
else => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return .{ .whitespace = len };
|
||||||
|
},
|
||||||
|
.word => {
|
||||||
|
const start = self.index;
|
||||||
|
while (self.index < self.buf.len) : (self.index += 1) {
|
||||||
|
switch (self.buf[self.index]) {
|
||||||
|
' ', '\t' => break,
|
||||||
|
else => {},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return .{ .word = self.buf[start..self.index] };
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
Loading…
Reference in a new issue