From 3f88a3f4e6f75bf04246a8015652931e640e0821 Mon Sep 17 00:00:00 2001
From: woojiq <122799969+woojiq@users.noreply.github.com>
Date: Sun, 14 Jan 2024 16:46:32 +0200
Subject: [PATCH] Change path normalization strategy to not resolve symlinks
 (#9330)

---
 Cargo.lock               |   1 +
 helix-core/Cargo.toml    |   1 +
 helix-core/src/path.rs   |  71 ++++++++++++++--------
 helix-core/tests/path.rs | 124 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 171 insertions(+), 26 deletions(-)
 create mode 100644 helix-core/tests/path.rs

diff --git a/Cargo.lock b/Cargo.lock
index a7e06b6e..3643b953 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1074,6 +1074,7 @@ dependencies = [
  "slotmap",
  "smallvec",
  "smartstring",
+ "tempfile",
  "textwrap",
  "toml",
  "tree-sitter",
diff --git a/helix-core/Cargo.toml b/helix-core/Cargo.toml
index d7fff6c6..be41fa01 100644
--- a/helix-core/Cargo.toml
+++ b/helix-core/Cargo.toml
@@ -55,3 +55,4 @@ parking_lot = "0.12"
 [dev-dependencies]
 quickcheck = { version = "1", default-features = false }
 indoc = "2.0.4"
+tempfile = "3.7.0"
diff --git a/helix-core/src/path.rs b/helix-core/src/path.rs
index ede37e04..0cf6f812 100644
--- a/helix-core/src/path.rs
+++ b/helix-core/src/path.rs
@@ -30,31 +30,10 @@ pub fn expand_tilde(path: &Path) -> PathBuf {
     path.to_path_buf()
 }
 
-/// Normalize a path, removing things like `.` and `..`.
-///
-/// CAUTION: This does not resolve symlinks (unlike
-/// [`std::fs::canonicalize`]). This may cause incorrect or surprising
-/// behavior at times. This should be used carefully. Unfortunately,
-/// [`std::fs::canonicalize`] can be hard to use correctly, since it can often
-/// fail, or on Windows returns annoying device paths. This is a problem Cargo
-/// needs to improve on.
-/// Copied from cargo: <https://github.com/rust-lang/cargo/blob/070e459c2d8b79c5b2ac5218064e7603329c92ae/crates/cargo-util/src/paths.rs#L81>
+/// Normalize a path without resolving symlinks.
+// Strategy: start from the first component and move up. Cannonicalize previous path,
+// join component, cannonicalize new path, strip prefix and join to the final result.
 pub fn get_normalized_path(path: &Path) -> PathBuf {
-    // normalization strategy is to canonicalize first ancestor path that exists (i.e., canonicalize as much as possible),
-    // then run handrolled normalization on the non-existent remainder
-    let (base, path) = path
-        .ancestors()
-        .find_map(|base| {
-            let canonicalized_base = dunce::canonicalize(base).ok()?;
-            let remainder = path.strip_prefix(base).ok()?.into();
-            Some((canonicalized_base, remainder))
-        })
-        .unwrap_or_else(|| (PathBuf::new(), PathBuf::from(path)));
-
-    if path.as_os_str().is_empty() {
-        return base;
-    }
-
     let mut components = path.components().peekable();
     let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() {
         components.next();
@@ -70,20 +49,60 @@ pub fn get_normalized_path(path: &Path) -> PathBuf {
                 ret.push(component.as_os_str());
             }
             Component::CurDir => {}
+            #[cfg(not(windows))]
             Component::ParentDir => {
                 ret.pop();
             }
+            #[cfg(windows)]
+            Component::ParentDir => {
+                if let Some(head) = ret.components().next_back() {
+                    match head {
+                        Component::Prefix(_) | Component::RootDir => {}
+                        Component::CurDir => unreachable!(),
+                        // If we left previous component as ".." it means we met a symlink before and we can't pop path.
+                        Component::ParentDir => {
+                            ret.push("..");
+                        }
+                        Component::Normal(_) => {
+                            if ret.is_symlink() {
+                                ret.push("..");
+                            } else {
+                                ret.pop();
+                            }
+                        }
+                    }
+                }
+            }
+            #[cfg(not(windows))]
             Component::Normal(c) => {
                 ret.push(c);
             }
+            #[cfg(windows)]
+            Component::Normal(c) => 'normal: {
+                use std::fs::canonicalize;
+
+                let new_path = ret.join(c);
+                if new_path.is_symlink() {
+                    ret = new_path;
+                    break 'normal;
+                }
+                let (can_new, can_old) = (canonicalize(&new_path), canonicalize(&ret));
+                match (can_new, can_old) {
+                    (Ok(can_new), Ok(can_old)) => {
+                        let striped = can_new.strip_prefix(can_old);
+                        ret.push(striped.unwrap_or_else(|_| c.as_ref()));
+                    }
+                    _ => ret.push(c),
+                }
+            }
         }
     }
-    base.join(ret)
+    dunce::simplified(&ret).to_path_buf()
 }
 
 /// Returns the canonical, absolute form of a path with all intermediate components normalized.
 ///
-/// This function is used instead of `std::fs::canonicalize` because we don't want to verify
+/// This function is used instead of [`std::fs::canonicalize`] because we don't want to verify
 /// here if the path exists, just normalize it's components.
 pub fn get_canonicalized_path(path: &Path) -> PathBuf {
     let path = expand_tilde(path);
diff --git a/helix-core/tests/path.rs b/helix-core/tests/path.rs
new file mode 100644
index 00000000..cbda5e1a
--- /dev/null
+++ b/helix-core/tests/path.rs
@@ -0,0 +1,124 @@
+#![cfg(windows)]
+
+use std::{
+    env::set_current_dir,
+    error::Error,
+    path::{Component, Path, PathBuf},
+};
+
+use helix_core::path::get_normalized_path;
+use tempfile::Builder;
+
+// Paths on Windows are almost always case-insensitive.
+// Normalization should return the original path.
+// E.g. mkdir `CaSe`, normalize(`case`) = `CaSe`.
+#[test]
+fn test_case_folding_windows() -> Result<(), Box<dyn Error>> {
+    // tmp/root/case
+    let tmp_prefix = std::env::temp_dir();
+    set_current_dir(&tmp_prefix)?;
+
+    let root = Builder::new().prefix("root-").tempdir()?;
+    let case = Builder::new().prefix("CaSe-").tempdir_in(&root)?;
+
+    let root_without_prefix = root.path().strip_prefix(&tmp_prefix)?;
+
+    let lowercase_case = format!(
+        "case-{}",
+        case.path()
+            .file_name()
+            .unwrap()
+            .to_string_lossy()
+            .split_at(5)
+            .1
+    );
+    let test_path = root_without_prefix.join(lowercase_case);
+    assert_eq!(
+        get_normalized_path(&test_path),
+        case.path().strip_prefix(&tmp_prefix)?
+    );
+
+    Ok(())
+}
+
+#[test]
+fn test_normalize_path() -> Result<(), Box<dyn Error>> {
+    /*
+    tmp/root/
+    ├── link -> dir1/orig_file
+    ├── dir1/
+    │   └── orig_file
+    └── dir2/
+        └── dir_link -> ../dir1/
+    */
+
+    let tmp_prefix = std::env::temp_dir();
+    set_current_dir(&tmp_prefix)?;
+
+    // Create a tree structure as shown above
+    let root = Builder::new().prefix("root-").tempdir()?;
+    let dir1 = Builder::new().prefix("dir1-").tempdir_in(&root)?;
+    let orig_file = Builder::new().prefix("orig_file-").tempfile_in(&dir1)?;
+    let dir2 = Builder::new().prefix("dir2-").tempdir_in(&root)?;
+
+    // Create path and delete existing file
+    let dir_link = Builder::new()
+        .prefix("dir_link-")
+        .tempfile_in(&dir2)?
+        .path()
+        .to_owned();
+    let link = Builder::new()
+        .prefix("link-")
+        .tempfile_in(&root)?
+        .path()
+        .to_owned();
+
+    use std::os::windows;
+    windows::fs::symlink_dir(&dir1, &dir_link)?;
+    windows::fs::symlink_file(&orig_file, &link)?;
+
+    // root/link
+    let path = link.strip_prefix(&tmp_prefix)?;
+    assert_eq!(
+        get_normalized_path(path),
+        path,
+        "input {:?} and symlink last component shouldn't be resolved",
+        path
+    );
+
+    // root/dir2/dir_link/orig_file/../..
+    let path = dir_link
+        .strip_prefix(&tmp_prefix)
+        .unwrap()
+        .join(orig_file.path().file_name().unwrap())
+        .join(Component::ParentDir)
+        .join(Component::ParentDir);
+    let expected = dir_link
+        .strip_prefix(&tmp_prefix)
+        .unwrap()
+        .join(Component::ParentDir);
+    assert_eq!(
+        get_normalized_path(&path),
+        expected,
+        "input {:?} and \"..\" should not erase the simlink that goes ahead",
+        &path
+    );
+
+    // root/link/.././../dir2/../
+    let path = link
+        .strip_prefix(&tmp_prefix)
+        .unwrap()
+        .join(Component::ParentDir)
+        .join(Component::CurDir)
+        .join(Component::ParentDir)
+        .join(dir2.path().file_name().unwrap())
+        .join(Component::ParentDir);
+    let expected = link
+        .strip_prefix(&tmp_prefix)
+        .unwrap()
+        .join(Component::ParentDir)
+        .join(Component::ParentDir);
+    assert_eq!(get_normalized_path(&path), expected, "input {:?}", &path);
+
+    Ok(())
+}