From 90348b889f397f2753473764c2d7a02986beddeb Mon Sep 17 00:00:00 2001
From: Pascal Kuthe <pascal.kuthe@semimod.de>
Date: Mon, 13 Mar 2023 19:27:54 +0100
Subject: [PATCH] revamped snippet text element parsing

Snippet text elements can contain escape sequences
that must be treated properly. Furthermore snippets
must always escape certain characters (like `}`
or `\`). The function has been updated to account
for that. `text` is now also included with
`anything` to match the grammar and can also
match empty text. To avoid infinite loops the
`non-empty` combinator has been added which is
automatically used in the `one_or_more` and
`zero_or more` combinator where the problemn would
occur.
---
 helix-lsp/src/snippet.rs | 117 ++++++++++++++++++++++++---------------
 helix-parsec/src/lib.rs  |  13 +++++
 2 files changed, 85 insertions(+), 45 deletions(-)

diff --git a/helix-lsp/src/snippet.rs b/helix-lsp/src/snippet.rs
index 77f44d4e..f64f29f2 100644
--- a/helix-lsp/src/snippet.rs
+++ b/helix-lsp/src/snippet.rs
@@ -12,7 +12,7 @@ pub enum CaseChange {
 
 #[derive(Debug, PartialEq, Eq)]
 pub enum FormatItem<'a> {
-    Text(&'a str),
+    Text(Tendril),
     Capture(usize),
     CaseChange(usize, CaseChange),
     Conditional(usize, Option<&'a str>, Option<&'a str>),
@@ -20,9 +20,9 @@ pub enum FormatItem<'a> {
 
 #[derive(Debug, PartialEq, Eq)]
 pub struct Regex<'a> {
-    value: &'a str,
+    value: Tendril,
     replacement: Vec<FormatItem<'a>>,
-    options: Option<&'a str>,
+    options: Tendril,
 }
 
 #[derive(Debug, PartialEq, Eq)]
@@ -36,14 +36,14 @@ pub enum SnippetElement<'a> {
     },
     Choice {
         tabstop: usize,
-        choices: Vec<&'a str>,
+        choices: Vec<Tendril>,
     },
     Variable {
         name: &'a str,
         default: Option<&'a str>,
         regex: Option<Regex<'a>>,
     },
-    Text(&'a str),
+    Text(Tendril),
 }
 
 #[derive(Debug, PartialEq, Eq)]
@@ -67,12 +67,12 @@ fn render_elements(
 
     for element in snippet_elements {
         match element {
-            &Text(text) => {
+            Text(text) => {
                 // small optimization to avoid calling replace when it's unnecessary
                 let text = if text.contains('\n') {
                     Cow::Owned(text.replace('\n', newline_with_offset))
                 } else {
-                    Cow::Borrowed(text)
+                    Cow::Borrowed(text.as_str())
                 };
                 *offset += text.chars().count();
                 insert.push_str(&text);
@@ -160,6 +160,7 @@ pub fn render(
 }
 
 mod parser {
+    use helix_core::Tendril;
     use helix_parsec::*;
 
     use super::{CaseChange, FormatItem, Regex, Snippet, SnippetElement};
@@ -210,8 +211,32 @@ mod parser {
         }
     }
 
-    fn text<'a, const SIZE: usize>(cs: [char; SIZE]) -> impl Parser<'a, Output = &'a str> {
-        take_while(move |c| cs.into_iter().all(|c1| c != c1))
+    const TEXT_ESCAPE_CHARS: &[char] = &['\\', '}', '$'];
+    const REPLACE_ESCAPE_CHARS: &[char] = &['\\', '}', '$', '/'];
+    const CHOICE_TEXT_ESCAPE_CHARS: &[char] = &['\\', '}', '$', '|', ','];
+
+    fn text<'a>(escape_chars: &'static [char]) -> impl Parser<'a, Output = Tendril> {
+        move |input: &'a str| {
+            let mut chars = input.char_indices();
+            let mut res = Tendril::new();
+            while let Some((i, c)) = chars.next() {
+                match c {
+                    '\\' => {
+                        if let Some((_, c)) = chars.next() {
+                            if escape_chars.contains(&c) {
+                                res.push(c);
+                                continue;
+                            }
+                        }
+                        return Ok((&input[i..], res));
+                    }
+                    c if escape_chars.contains(&c) => return Ok((&input[i..], res)),
+                    c => res.push(c),
+                }
+            }
+
+            Ok(("", res))
+        }
     }
 
     fn digit<'a>() -> impl Parser<'a, Output = usize> {
@@ -274,20 +299,18 @@ mod parser {
     }
 
     fn regex<'a>() -> impl Parser<'a, Output = Regex<'a>> {
-        let text = map(text(['$', '/']), FormatItem::Text);
-        let replacement = reparse_as(
-            take_until(|c| c == '/'),
-            one_or_more(choice!(format(), text)),
-        );
-
         map(
             seq!(
                 "/",
-                take_until(|c| c == '/'),
+                // TODO parse as ECMAScript and convert to rust regex
+                non_empty(text(&['/', '\\'])),
                 "/",
-                replacement,
+                one_or_more(choice!(
+                    format(),
+                    map(text(REPLACE_ESCAPE_CHARS), FormatItem::Text)
+                )),
                 "/",
-                optional(take_until(|c| c == '}')),
+                text(&['}', '\\',]),
             ),
             |(_, value, _, replacement, _, options)| Regex {
                 value,
@@ -308,13 +331,12 @@ mod parser {
     }
 
     fn placeholder<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> {
-        let text = map(text(['$', '}']), SnippetElement::Text);
         map(
             seq!(
                 "${",
                 digit(),
                 ":",
-                one_or_more(choice!(anything(), text)),
+                one_or_more(anything(TEXT_ESCAPE_CHARS)),
                 "}"
             ),
             |seq| SnippetElement::Placeholder {
@@ -330,7 +352,7 @@ mod parser {
                 "${",
                 digit(),
                 "|",
-                sep(take_until(|c| c == ',' || c == '|'), ","),
+                sep(text(CHOICE_TEXT_ESCAPE_CHARS), ","),
                 "|}",
             ),
             |seq| SnippetElement::Choice {
@@ -368,17 +390,21 @@ mod parser {
         )
     }
 
-    fn anything<'a>() -> impl Parser<'a, Output = SnippetElement<'a>> {
-        // The parser has to be constructed lazily to avoid infinite opaque type recursion
-        |input: &'a str| {
-            let parser = choice!(tabstop(), placeholder(), choice(), variable());
+    fn anything<'a>(escape_chars: &'static [char]) -> impl Parser<'a, Output = SnippetElement<'a>> {
+        move |input: &'a str| {
+            let parser = choice!(
+                tabstop(),
+                placeholder(),
+                choice(),
+                variable(),
+                map(text(escape_chars), SnippetElement::Text)
+            );
             parser.parse(input)
         }
     }
 
     fn snippet<'a>() -> impl Parser<'a, Output = Snippet<'a>> {
-        let text = map(text(['$']), SnippetElement::Text);
-        map(one_or_more(choice!(anything(), text)), |parts| Snippet {
+        map(one_or_more(anything(TEXT_ESCAPE_CHARS)), |parts| Snippet {
             elements: parts,
         })
     }
@@ -392,6 +418,7 @@ mod parser {
             }
         })
     }
+
     #[cfg(test)]
     mod test {
         use super::SnippetElement::*;
@@ -407,12 +434,12 @@ mod parser {
             assert_eq!(
                 Ok(Snippet {
                     elements: vec![
-                        Text("match("),
+                        Text("match(".into()),
                         Placeholder {
                             tabstop: 1,
-                            value: vec!(Text("Arg1")),
+                            value: vec!(Text("Arg1".into())),
                         },
-                        Text(")")
+                        Text(")".into())
                     ]
                 }),
                 parse("match(${1:Arg1})")
@@ -446,15 +473,15 @@ mod parser {
             assert_eq!(
                 Ok(Snippet {
                     elements: vec![
-                        Text("local "),
+                        Text("local ".into()),
                         Placeholder {
                             tabstop: 1,
-                            value: vec!(Text("var")),
+                            value: vec!(Text("var".into())),
                         },
-                        Text(" = "),
+                        Text(" = ".into()),
                         Placeholder {
                             tabstop: 1,
-                            value: vec!(Text("value")),
+                            value: vec!(Text("value".into())),
                         },
                     ]
                 }),
@@ -468,7 +495,7 @@ mod parser {
                 Ok(Snippet {
                     elements: vec![Placeholder {
                         tabstop: 1,
-                        value: vec!(Text("var, "), Tabstop { tabstop: 2 },),
+                        value: vec!(Text("var, ".into()), Tabstop { tabstop: 2 },),
                     },]
                 }),
                 parse("${1:var, $2}")
@@ -482,10 +509,10 @@ mod parser {
                     elements: vec![Placeholder {
                         tabstop: 1,
                         value: vec!(
-                            Text("foo "),
+                            Text("foo ".into()),
                             Placeholder {
                                 tabstop: 2,
-                                value: vec!(Text("bar")),
+                                value: vec!(Text("bar".into())),
                             },
                         ),
                     },]
@@ -499,27 +526,27 @@ mod parser {
             assert_eq!(
                 Ok(Snippet {
                     elements: vec![
-                        Text("hello "),
+                        Text("hello ".into()),
                         Tabstop { tabstop: 1 },
                         Tabstop { tabstop: 2 },
-                        Text(" "),
+                        Text(" ".into()),
                         Choice {
                             tabstop: 1,
-                            choices: vec!["one", "two", "three"]
+                            choices: vec!["one".into(), "two".into(), "three".into()]
                         },
-                        Text(" "),
+                        Text(" ".into()),
                         Variable {
                             name: "name",
                             default: Some("foo"),
                             regex: None
                         },
-                        Text(" "),
+                        Text(" ".into()),
                         Variable {
                             name: "var",
                             default: None,
                             regex: None
                         },
-                        Text(" "),
+                        Text(" ".into()),
                         Variable {
                             name: "TM",
                             default: None,
@@ -539,9 +566,9 @@ mod parser {
                         name: "TM_FILENAME",
                         default: None,
                         regex: Some(Regex {
-                            value: "(.*).+$",
+                            value: "(.*).+$".into(),
                             replacement: vec![FormatItem::Capture(1)],
-                            options: None,
+                            options: Tendril::new(),
                         }),
                     }]
                 }),
diff --git a/helix-parsec/src/lib.rs b/helix-parsec/src/lib.rs
index e09814b8..846d02d6 100644
--- a/helix-parsec/src/lib.rs
+++ b/helix-parsec/src/lib.rs
@@ -459,6 +459,7 @@ pub fn zero_or_more<'a, P, T>(parser: P) -> impl Parser<'a, Output = Vec<T>>
 where
     P: Parser<'a, Output = T>,
 {
+    let parser = non_empty(parser);
     move |mut input| {
         let mut values = Vec::new();
 
@@ -491,6 +492,7 @@ pub fn one_or_more<'a, P, T>(parser: P) -> impl Parser<'a, Output = Vec<T>>
 where
     P: Parser<'a, Output = T>,
 {
+    let parser = non_empty(parser);
     move |mut input| {
         let mut values = Vec::new();
 
@@ -559,3 +561,14 @@ where
         Ok((input, values))
     }
 }
+
+pub fn non_empty<'a, T>(p: impl Parser<'a, Output = T>) -> impl Parser<'a, Output = T> {
+    move |input| {
+        let (new_input, res) = p.parse(input)?;
+        if new_input.len() == input.len() {
+            Err(input)
+        } else {
+            Ok((new_input, res))
+        }
+    }
+}