Fix utf8 length handling for shellwords (#5738)

If the last argument to shellwords ends in a multibyte utf8 character
the entire argument will be dropped.
e.g. `:sh echo test1 test2𒀀` will only output `test1`

Add additional tests based on the code review feedback
This commit is contained in:
Mike Trinkala 2023-02-01 14:07:42 -08:00 committed by GitHub
parent 685cd383a3
commit 62d046fa21
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -129,8 +129,9 @@ impl<'a> From<&'a str> for Shellwords<'a> {
DquoteEscaped => Dquoted,
};
if i >= input.len() - 1 && end == 0 {
end = i + 1;
let c_len = c.len_utf8();
if i == input.len() - c_len && end == 0 {
end = i + c_len;
}
if end > 0 {
@ -333,4 +334,17 @@ mod test {
assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]);
assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\"]);
}
#[test]
fn test_multibyte_at_end() {
assert_eq!(Shellwords::from("𒀀").parts(), &["𒀀"]);
assert_eq!(
Shellwords::from(":sh echo 𒀀").parts(),
&[":sh", "echo", "𒀀"]
);
assert_eq!(
Shellwords::from(":sh echo 𒀀 hello world𒀀").parts(),
&[":sh", "echo", "𒀀", "hello", "world𒀀"]
);
}
}