From eb639eb2e4610ed2b440c8d95217f125005288fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bla=C5=BE=20Hrastnik?= <blaz@mxxn.io>
Date: Tue, 22 Sep 2020 18:23:48 +0900
Subject: [PATCH] More robust syntax detection/grammar loading.

---
 .gitmodules                             |   4 +
 Cargo.lock                              |  30 +--
 TODO.md                                 |   5 +-
 helix-core/Cargo.toml                   |   1 +
 helix-core/src/lib.rs                   |   1 -
 helix-core/src/state.rs                 |  37 ++--
 helix-core/src/syntax.rs                | 237 +++++++++++++++---------
 helix-syntax/languages/tree-sitter-toml |   1 +
 helix-syntax/src/lib.rs                 |   1 +
 helix-term/src/editor.rs                |  27 ++-
 helix-view/src/view.rs                  |   3 +-
 11 files changed, 206 insertions(+), 141 deletions(-)
 create mode 160000 helix-syntax/languages/tree-sitter-toml

diff --git a/.gitmodules b/.gitmodules
index 70fac3b6..f4d6456c 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -78,3 +78,7 @@
 	path = helix-syntax/languages/tree-sitter-swift
 	url = https://github.com/tree-sitter/tree-sitter-swift
 	shallow = true
+[submodule "helix-syntax/languages/tree-sitter-toml"]
+	path = helix-syntax/languages/tree-sitter-toml
+	url = https://github.com/ikatyang/tree-sitter-toml
+	shallow = true
diff --git a/Cargo.lock b/Cargo.lock
index f37c951c..def2673a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -73,15 +73,16 @@ dependencies = [
 
 [[package]]
 name = "async-executor"
-version = "1.1.1"
+version = "1.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a831e74aa1937d3bbd3a356f34c23dbc6b6f0abc5160bd5484a9f75d5e76aea8"
+checksum = "d373d78ded7d0b3fa8039375718cde0aace493f2e34fb60f51cbf567562ca801"
 dependencies = [
  "async-task",
  "concurrent-queue",
  "fastrand",
  "futures-lite",
  "once_cell",
+ "vec-arena",
 ]
 
 [[package]]
@@ -135,12 +136,13 @@ dependencies = [
 
 [[package]]
 name = "async-net"
-version = "1.3.0"
+version = "1.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a48af5438be856056bdeb6c5d895148a715be5915fccee49d1e5b50851dc9b8b"
+checksum = "fb04482b77baa38d59d56aee0a7b4266600ab28e2b8be7af03508f6a30ecbdcf"
 dependencies = [
  "async-io",
  "blocking",
+ "fastrand",
  "futures-lite",
 ]
 
@@ -162,9 +164,9 @@ dependencies = [
 
 [[package]]
 name = "async-rwlock"
-version = "1.1.0"
+version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f8978b5ae008b5177da07a1bf1bfbe428f9bdb970c3fca0e92ed1c1930d7f34"
+checksum = "806b1cc0828c2b1611ccbdd743fc0cc7af09009e62c95a0501c1e5da7b142a22"
 dependencies = [
  "async-mutex",
  "event-listener",
@@ -181,9 +183,9 @@ dependencies = [
 
 [[package]]
 name = "async-task"
-version = "3.0.0"
+version = "4.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c17772156ef2829aadc587461c7753af20b7e8db1529bc66855add962a3b35d3"
+checksum = "6725e96011a83fae25074a8734932e8d67763522839be7473dcfe8a0d6a378b1"
 
 [[package]]
 name = "atomic-waker"
@@ -428,9 +430,9 @@ dependencies = [
 
 [[package]]
 name = "hermit-abi"
-version = "0.1.15"
+version = "0.1.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3deed196b6e7f9e44a2ae8d94225d80302d81208b1bb673fd21fe634645c85a9"
+checksum = "4c30f6d0bc6b00693347368a67d41b58f2fb851215ff1da49e90fe2c5c667151"
 dependencies = [
  "libc",
 ]
@@ -615,9 +617,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
 
 [[package]]
 name = "polling"
-version = "1.0.3"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0307b8c7f438902536321f63c28cab0362f6ee89f1c7da47e3642ff956641c8b"
+checksum = "e0720e0b9ea9d52451cf29d3413ba8a9303f8815d9d9653ef70e03ff73e65566"
 dependencies = [
  "cfg-if",
  "libc",
@@ -712,9 +714,9 @@ checksum = "fbee7696b84bbf3d89a1c2eccff0850e3047ed46bfcd2e92c29a2d074d57e252"
 
 [[package]]
 name = "smol"
-version = "1.0.1"
+version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "712d02afa6ac9e7b8c777fd181aff476d009280b54b8c28703d10fa5d7e80d83"
+checksum = "d41237ba3e3ada55ff3515d37becc8fa90e5e4af2b13a011ec3f932d9f1b2405"
 dependencies = [
  "async-channel",
  "async-executor",
diff --git a/TODO.md b/TODO.md
index ed1484c4..e6b5efd1 100644
--- a/TODO.md
+++ b/TODO.md
@@ -1,5 +1,6 @@
-- Implement backspace/delete
-- Implement marks
+helper methods: iterate over selection spans in the document.
+
+- Implement marks (superset of Selection/Range)
 - Implement style configs, tab settings
 - Visual tab width
 - Refactor tree-sitter-highlight to work like the atom one, recomputing partial tree updates.
diff --git a/helix-core/Cargo.toml b/helix-core/Cargo.toml
index 0d3bd4a4..6a4b09e5 100644
--- a/helix-core/Cargo.toml
+++ b/helix-core/Cargo.toml
@@ -17,3 +17,4 @@ unicode-segmentation = "1.6.0"
 unicode-width = "0.1.7"
 # slab = "0.4.2"
 tree-sitter = "0.16.1"
+once_cell = "1.4.1"
diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs
index e443168e..e97c16be 100644
--- a/helix-core/src/lib.rs
+++ b/helix-core/src/lib.rs
@@ -1,5 +1,4 @@
 #![allow(unused)]
-pub mod config;
 pub mod graphemes;
 pub mod macros;
 mod position;
diff --git a/helix-core/src/state.rs b/helix-core/src/state.rs
index 79e15eff..4b610207 100644
--- a/helix-core/src/state.rs
+++ b/helix-core/src/state.rs
@@ -1,4 +1,5 @@
 use crate::graphemes::{nth_next_grapheme_boundary, nth_prev_grapheme_boundary, RopeGraphemes};
+use crate::syntax::LOADER;
 use crate::{Position, Range, Rope, RopeSlice, Selection, Syntax};
 use anyhow::Error;
 
@@ -48,7 +49,8 @@ impl State {
         }
     }
 
-    pub fn load(path: PathBuf) -> Result<Self, Error> {
+    // TODO: passing scopes here is awkward
+    pub fn load(path: PathBuf, scopes: &[String]) -> Result<Self, Error> {
         use std::{env, fs::File, io::BufReader, path::PathBuf};
         let _current_dir = env::current_dir()?;
 
@@ -57,31 +59,18 @@ impl State {
         // TODO: create if not found
 
         let mut state = Self::new(doc);
+
+        if let Some(language_config) = LOADER.language_config_for_file_name(path.as_path()) {
+            let highlight_config = language_config.highlight_config(scopes).unwrap().unwrap();
+            // TODO: config.configure(scopes) is now delayed, is that ok?
+
+            let syntax = Syntax::new(&state.doc, highlight_config.clone());
+
+            state.syntax = Some(syntax);
+        };
+
         state.path = Some(path);
 
-        let language = helix_syntax::get_language(&helix_syntax::LANG::Rust);
-
-        let mut highlight_config = crate::syntax::HighlightConfiguration::new(
-            language,
-            &std::fs::read_to_string(
-                "../helix-syntax/languages/tree-sitter-rust/queries/highlights.scm",
-            )
-            .unwrap(),
-            &std::fs::read_to_string(
-                "../helix-syntax/languages/tree-sitter-rust/queries/injections.scm",
-            )
-            .unwrap(),
-            "", // locals.scm
-        )
-        .unwrap();
-
-        // TODO: config.configure(scopes) is now delayed, is that ok?
-
-        // TODO: get_language is called twice
-        let syntax = Syntax::new(helix_syntax::LANG::Rust, &state.doc, highlight_config);
-
-        state.syntax = Some(syntax);
-
         Ok(state)
     }
 
diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs
index 8b55fc3e..26897ab3 100644
--- a/helix-core/src/syntax.rs
+++ b/helix-core/src/syntax.rs
@@ -2,21 +2,161 @@ use crate::{Change, Rope, RopeSlice, Transaction};
 pub use helix_syntax::LANG;
 pub use helix_syntax::{get_language, get_language_name};
 
+use std::collections::HashMap;
+use std::path::{Path, PathBuf};
+use std::sync::Arc;
+
+use once_cell::sync::OnceCell;
+
+// largely based on tree-sitter/cli/src/loader.rs
+pub struct LanguageConfiguration {
+    pub(crate) scope: String,           // source.rust
+    pub(crate) file_types: Vec<String>, // filename ends_with? <Gemfile, rb, etc>
+
+    pub(crate) path: PathBuf,
+
+    // content_regex
+    // injection_regex
+    // first_line_regex
+    //
+    // root_path
+    //
+    pub(crate) language_id: LANG,
+    pub(crate) highlight_config: OnceCell<Option<Arc<HighlightConfiguration>>>,
+    // tags_config OnceCell<> https://github.com/tree-sitter/tree-sitter/pull/583
+}
+
+impl LanguageConfiguration {
+    pub fn highlight_config(
+        &self,
+        scopes: &[String],
+    ) -> Result<Option<&Arc<HighlightConfiguration>>, anyhow::Error> {
+        self.highlight_config
+            .get_or_try_init(|| {
+                // let name = get_language_name(&self.language_id);
+
+                let highlights_query =
+                    std::fs::read_to_string(self.path.join("queries/highlights.scm"))
+                        .unwrap_or(String::new());
+
+                let injections_query =
+                    std::fs::read_to_string(self.path.join("queries/injections.scm"))
+                        .unwrap_or(String::new());
+
+                let locals_query = "";
+
+                if highlights_query.is_empty() {
+                    Ok(None)
+                } else {
+                    let language = get_language(&self.language_id);
+                    let mut config = HighlightConfiguration::new(
+                        language,
+                        &highlights_query,
+                        &injections_query,
+                        &locals_query,
+                    )
+                    .unwrap(); // TODO: no unwrap
+                    config.configure(&scopes);
+                    Ok(Some(Arc::new(config)))
+                }
+            })
+            .map(Option::as_ref)
+    }
+}
+
+use once_cell::sync::Lazy;
+
+pub(crate) static LOADER: Lazy<Loader> = Lazy::new(|| Loader::init());
+
+pub struct Loader {
+    // highlight_names ?
+    language_configs: Vec<Arc<LanguageConfiguration>>,
+    language_config_ids_by_file_type: HashMap<String, usize>, // Vec<usize>
+}
+
+impl Loader {
+    fn init() -> Loader {
+        let mut loader = Loader {
+            language_configs: Vec::new(),
+            language_config_ids_by_file_type: HashMap::new(),
+        };
+
+        // hardcoded from now, might load from toml
+        let configs = vec![
+            LanguageConfiguration {
+                scope: "source.rust".to_string(),
+                file_types: vec!["rs".to_string()],
+                language_id: LANG::Rust,
+                highlight_config: OnceCell::new(),
+                //
+                path: "../helix-syntax/languages/tree-sitter-rust".into(),
+            },
+            LanguageConfiguration {
+                scope: "source.toml".to_string(),
+                file_types: vec!["toml".to_string()],
+                language_id: LANG::Toml,
+                highlight_config: OnceCell::new(),
+                //
+                path: "../helix-syntax/languages/tree-sitter-toml".into(),
+            },
+        ];
+
+        for config in configs {
+            // get the next id
+            let language_id = loader.language_configs.len();
+
+            for file_type in &config.file_types {
+                // entry().or_insert(Vec::new).push(language_id);
+                loader
+                    .language_config_ids_by_file_type
+                    .insert(file_type.clone(), language_id);
+            }
+
+            loader.language_configs.push(Arc::new(config));
+        }
+
+        loader
+    }
+
+    pub fn language_config_for_file_name(&self, path: &Path) -> Option<Arc<LanguageConfiguration>> {
+        // Find all the language configurations that match this file name
+        // or a suffix of the file name.
+        let configuration_id = path
+            .file_name()
+            .and_then(|n| n.to_str())
+            .and_then(|file_name| self.language_config_ids_by_file_type.get(file_name))
+            .or_else(|| {
+                path.extension()
+                    .and_then(|extension| extension.to_str())
+                    .and_then(|extension| self.language_config_ids_by_file_type.get(extension))
+            });
+
+        configuration_id.and_then(|&id| self.language_configs.get(id).cloned())
+
+        // TODO: content_regex handling conflict resolution
+    }
+}
+
+//
+
 pub struct Syntax {
-    grammar: Language,
+    // grammar: Grammar,
     parser: Parser,
     cursors: Vec<QueryCursor>,
 
-    config: HighlightConfiguration,
+    config: Arc<HighlightConfiguration>,
 
     root_layer: LanguageLayer,
 }
 
 impl Syntax {
     // buffer, grammar, config, grammars, sync_timeout?
-    pub fn new(language: LANG, source: &Rope, config: HighlightConfiguration) -> Self {
+    pub fn new(
+        /*language: LANG,*/ source: &Rope,
+        config: Arc<HighlightConfiguration>,
+    ) -> Self {
         // fetch grammar for parser based on language string
-        let grammar = get_language(&language);
+        // let grammar = get_language(&language);
         let parser = Parser::new();
 
         let root_layer = LanguageLayer::new();
@@ -25,7 +165,7 @@ impl Syntax {
         // track scope_descriptor: a Vec of scopes for item in tree
 
         let mut syntax = Self {
-            grammar,
+            // grammar,
             parser,
             cursors: Vec::new(),
             config,
@@ -48,10 +188,6 @@ impl Syntax {
         syntax
     }
 
-    pub fn configure(&mut self, scopes: &[String]) {
-        self.config.configure(scopes)
-    }
-
     pub fn update(&mut self, source: &Rope, changeset: &ChangeSet) -> Result<(), Error> {
         self.root_layer
             .update(&mut self.parser, &self.config, source, changeset)
@@ -88,9 +224,9 @@ impl Syntax {
         let mut cursor = QueryCursor::new(); // reuse a pool
         let tree_ref = unsafe { mem::transmute::<_, &'static Tree>(self.tree()) };
         let cursor_ref = unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) };
-        let query_ref = unsafe { mem::transmute::<_, &'static mut Query>(&mut self.config.query) };
+        let query_ref = unsafe { mem::transmute::<_, &'static Query>(&self.config.query) };
         let config_ref =
-            unsafe { mem::transmute::<_, &'static HighlightConfiguration>(&self.config) };
+            unsafe { mem::transmute::<_, &'static HighlightConfiguration>(self.config.as_ref()) };
 
         // TODO: if reusing cursors this might need resetting
         if let Some(range) = &range {
@@ -432,8 +568,8 @@ impl LanguageLayer {
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::{iter, mem, ops, str, usize};
 use tree_sitter::{
-    Language, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, QueryMatch,
-    Range, Tree,
+    Language as Grammar, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError,
+    QueryMatch, Range, Tree,
 };
 
 const CANCELLATION_CHECK_INTERVAL: usize = 100;
@@ -462,7 +598,7 @@ pub enum HighlightEvent {
 ///
 /// This struct is immutable and can be shared between threads.
 pub struct HighlightConfiguration {
-    pub language: Language,
+    pub language: Grammar,
     pub query: Query,
     combined_injections_query: Option<Query>,
     locals_pattern_index: usize,
@@ -477,16 +613,6 @@ pub struct HighlightConfiguration {
     local_ref_capture_index: Option<u32>,
 }
 
-/// Performs syntax highlighting, recognizing a given list of highlight names.
-///
-/// For the best performance `Highlighter` values should be reused between
-/// syntax highlighting calls. A separate highlighter is needed for each thread that
-/// is performing highlighting.
-pub struct Highlighter {
-    parser: Parser,
-    cursors: Vec<QueryCursor>,
-}
-
 #[derive(Debug)]
 struct LocalDef<'a> {
     name: &'a str,
@@ -527,70 +653,13 @@ struct HighlightIterLayer<'a> {
     depth: usize,
 }
 
-impl Default for Highlighter {
-    fn default() -> Self {
-        Highlighter {
-            parser: Parser::new(),
-            cursors: Vec::new(),
-        }
-    }
-}
-
-impl Highlighter {
-    pub fn new() -> Self {
-        Self::default()
-    }
-
-    pub fn parser(&mut self) -> &mut Parser {
-        &mut self.parser
-    }
-
-    // /// Iterate over the highlighted regions for a given slice of source code.
-    // pub fn highlight<'a>(
-    //     &'a mut self,
-    //     config: &'a HighlightConfiguration,
-    //     source: &'a [u8],
-    //     cancellation_flag: Option<&'a AtomicUsize>,
-    //     mut injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
-    // ) -> Result<impl Iterator<Item = Result<HighlightEvent, Error>> + 'a, Error> {
-    //     let layers = HighlightIterLayer::new(
-    //         source,
-    //         self,
-    //         cancellation_flag,
-    //         &mut injection_callback,
-    //         config,
-    //         0,
-    //         vec![Range {
-    //             start_byte: 0,
-    //             end_byte: usize::MAX,
-    //             start_point: Point::new(0, 0),
-    //             end_point: Point::new(usize::MAX, usize::MAX),
-    //         }],
-    //     )?;
-    //     assert_ne!(layers.len(), 0);
-    //     let mut result = HighlightIter {
-    //         source,
-    //         byte_offset: 0,
-    //         injection_callback,
-    //         cancellation_flag,
-    //         highlighter: self,
-    //         iter_count: 0,
-    //         layers,
-    //         next_event: None,
-    //         last_highlight_range: None,
-    //     };
-    //     result.sort_layers();
-    //     Ok(result)
-    // }
-}
-
 impl HighlightConfiguration {
-    /// Creates a `HighlightConfiguration` for a given `Language` and set of highlighting
+    /// Creates a `HighlightConfiguration` for a given `Grammar` and set of highlighting
     /// queries.
     ///
     /// # Parameters
     ///
-    /// * `language`  - The Tree-sitter `Language` that should be used for parsing.
+    /// * `language`  - The Tree-sitter `Grammar` that should be used for parsing.
     /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This
     ///   should be non-empty, otherwise no syntax highlights will be added.
     /// * `injections_query` -  A string containing tree patterns for injecting other languages
@@ -600,7 +669,7 @@ impl HighlightConfiguration {
     ///
     /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method.
     pub fn new(
-        language: Language,
+        language: Grammar,
         highlights_query: &str,
         injection_query: &str,
         locals_query: &str,
diff --git a/helix-syntax/languages/tree-sitter-toml b/helix-syntax/languages/tree-sitter-toml
new file mode 160000
index 00000000..42c9ff20
--- /dev/null
+++ b/helix-syntax/languages/tree-sitter-toml
@@ -0,0 +1 @@
+Subproject commit 42c9ff20c0371bed7f514036e823f10793caacec
diff --git a/helix-syntax/src/lib.rs b/helix-syntax/src/lib.rs
index 1ca36ca6..60472fdd 100644
--- a/helix-syntax/src/lib.rs
+++ b/helix-syntax/src/lib.rs
@@ -82,6 +82,7 @@ mk_langs!(
     (Rust, tree_sitter_rust),
     (Scala, tree_sitter_scala),
     (Swift, tree_sitter_swift),
+    (Toml, tree_sitter_toml),
     (Tsx, tree_sitter_tsx),
     (Typescript, tree_sitter_typescript)
 );
diff --git a/helix-term/src/editor.rs b/helix-term/src/editor.rs
index 790c3f16..24e62306 100644
--- a/helix-term/src/editor.rs
+++ b/helix-term/src/editor.rs
@@ -1,10 +1,5 @@
 use crate::Args;
-use helix_core::{
-    state::coords_at_pos,
-    state::Mode,
-    syntax::{HighlightConfiguration, HighlightEvent, Highlighter},
-    State,
-};
+use helix_core::{state::coords_at_pos, state::Mode, syntax::HighlightEvent, State};
 use helix_view::{commands, keymap, View};
 
 use std::{
@@ -107,14 +102,18 @@ impl Editor {
 
                 // TODO: cache highlight results
                 // TODO: only recalculate when state.doc is actually modified
-                let highlights: Vec<_> = view
-                    .state
-                    .syntax
-                    .as_mut()
-                    .unwrap()
-                    .highlight_iter(source_code.as_bytes(), Some(range), None, |_| None)
-                    .unwrap()
-                    .collect(); // TODO: we collect here to avoid double borrow, fix later
+                let highlights: Vec<_> = match view.state.syntax.as_mut() {
+                    Some(syntax) => {
+                        syntax
+                            .highlight_iter(source_code.as_bytes(), Some(range), None, |_| None)
+                            .unwrap()
+                            .collect() // TODO: we collect here to avoid double borrow, fix later
+                    }
+                    None => vec![Ok(HighlightEvent::Source {
+                        start: range.start,
+                        end: range.end,
+                    })],
+                };
 
                 let mut spans = Vec::new();
 
diff --git a/helix-view/src/view.rs b/helix-view/src/view.rs
index 3f7a9974..0900b0ca 100644
--- a/helix-view/src/view.rs
+++ b/helix-view/src/view.rs
@@ -14,9 +14,8 @@ pub struct View {
 
 impl View {
     pub fn open(path: PathBuf, size: (u16, u16)) -> Result<View, Error> {
-        let mut state = State::load(path)?;
         let theme = Theme::default();
-        state.syntax.as_mut().unwrap().configure(theme.scopes());
+        let state = State::load(path, theme.scopes())?;
 
         let view = View {
             state,