From ec528c06260761998ca02476b48216498b44cc86 Mon Sep 17 00:00:00 2001 From: Stefan Holderbach Date: Wed, 20 Mar 2024 19:43:50 +0100 Subject: [PATCH] Refactor source cache into `CachedFile` struct (#12240) # Description Get rid of two parallel `Vec`s in `StateDelta` and `EngineState`, that also duplicated span information. Use a struct with documenting fields. Also use `Arc` and `Arc<[u8]>` for the allocations as they are never modified and cloned often (see #12229 for the first improvement). This also makes the representation more compact as no capacity is necessary. # User-Facing Changes API breakage on `EngineState`/`StateWorkingSet`/`StateDelta` that should not really affect plugin authors. --- .../src/completions/command_completions.rs | 9 +- crates/nu-cli/src/util.rs | 4 +- crates/nu-command/src/debug/view_files.rs | 12 ++- crates/nu-lsp/src/lib.rs | 25 +++-- crates/nu-protocol/src/engine/cached_file.rs | 15 +++ crates/nu-protocol/src/engine/engine_state.rs | 50 +++++----- crates/nu-protocol/src/engine/mod.rs | 3 + crates/nu-protocol/src/engine/state_delta.rs | 9 +- .../src/engine/state_working_set.rs | 94 +++++++++---------- src/ide.rs | 16 ++-- 10 files changed, 133 insertions(+), 104 deletions(-) create mode 100644 crates/nu-protocol/src/engine/cached_file.rs diff --git a/crates/nu-cli/src/completions/command_completions.rs b/crates/nu-cli/src/completions/command_completions.rs index 8cabd21334..5e5c4f06a5 100644 --- a/crates/nu-cli/src/completions/command_completions.rs +++ b/crates/nu-cli/src/completions/command_completions.rs @@ -1,7 +1,7 @@ use crate::completions::{Completer, CompletionOptions, MatchAlgorithm, SortBy}; use nu_parser::FlatShape; use nu_protocol::{ - engine::{EngineState, StateWorkingSet}, + engine::{CachedFile, EngineState, StateWorkingSet}, Span, }; use reedline::Suggestion; @@ -229,8 +229,9 @@ pub fn find_non_whitespace_index(contents: &[u8], start: usize) -> usize { } } -pub fn is_passthrough_command(working_set_file_contents: &[(Arc>, usize, usize)]) -> bool { - for (contents, _, _) in working_set_file_contents { +pub fn is_passthrough_command(working_set_file_contents: &[CachedFile]) -> bool { + for cached_file in working_set_file_contents { + let contents = &cached_file.content; let last_pipe_pos_rev = contents.iter().rev().position(|x| x == &b'|'); let last_pipe_pos = last_pipe_pos_rev.map(|x| contents.len() - x).unwrap_or(0); @@ -295,7 +296,7 @@ mod command_completions_tests { let input = ele.0.as_bytes(); let mut engine_state = EngineState::new(); - engine_state.add_file("test.nu".into(), vec![]); + engine_state.add_file("test.nu".into(), Arc::new([])); let delta = { let mut working_set = StateWorkingSet::new(&engine_state); diff --git a/crates/nu-cli/src/util.rs b/crates/nu-cli/src/util.rs index c2a6743c86..46553b1c1f 100644 --- a/crates/nu-cli/src/util.rs +++ b/crates/nu-cli/src/util.rs @@ -94,8 +94,8 @@ fn gather_env_vars( let span_offset = engine_state.next_span_start(); engine_state.add_file( - "Host Environment Variables".to_string(), - fake_env_file.as_bytes().to_vec(), + "Host Environment Variables".into(), + fake_env_file.as_bytes().into(), ); let (tokens, _) = lex(fake_env_file.as_bytes(), span_offset, &[], &[], true); diff --git a/crates/nu-command/src/debug/view_files.rs b/crates/nu-command/src/debug/view_files.rs index f9214f06a3..914e86c3c8 100644 --- a/crates/nu-command/src/debug/view_files.rs +++ b/crates/nu-command/src/debug/view_files.rs @@ -43,13 +43,15 @@ impl Command for ViewFiles { ) -> Result { let mut records = vec![]; - for (file, start, end) in engine_state.files() { + for file in engine_state.files() { + let start = file.covered_span.start; + let end = file.covered_span.end; records.push(Value::record( record! { - "filename" => Value::string(&**file, call.head), - "start" => Value::int(*start as i64, call.head), - "end" => Value::int(*end as i64, call.head), - "size" => Value::int(*end as i64 - *start as i64, call.head), + "filename" => Value::string(&*file.name, call.head), + "start" => Value::int(start as i64, call.head), + "end" => Value::int(end as i64, call.head), + "size" => Value::int(end as i64 - start as i64, call.head), }, call.head, )); diff --git a/crates/nu-lsp/src/lib.rs b/crates/nu-lsp/src/lib.rs index 2b0e19c868..020c4e5f7c 100644 --- a/crates/nu-lsp/src/lib.rs +++ b/crates/nu-lsp/src/lib.rs @@ -284,11 +284,15 @@ impl LanguageServer { if let Some(block_id) = working_set.get_decl(decl_id).get_block_id() { let block = working_set.get_block(block_id); if let Some(span) = &block.span { - for (file_path, file_start, file_end) in working_set.files() { - if span.start >= *file_start && span.start < *file_end { + for cached_file in working_set.files() { + if cached_file.covered_span.contains(span.start) { return Some(GotoDefinitionResponse::Scalar(Location { - uri: Url::from_file_path(&**file_path).ok()?, - range: Self::span_to_range(span, file, *file_start), + uri: Url::from_file_path(&*cached_file.name).ok()?, + range: Self::span_to_range( + span, + file, + cached_file.covered_span.start, + ), })); } } @@ -297,9 +301,10 @@ impl LanguageServer { } Id::Variable(var_id) => { let var = working_set.get_variable(var_id); - for (_, file_start, file_end) in working_set.files() { - if var.declaration_span.start >= *file_start - && var.declaration_span.start < *file_end + for cached_file in working_set.files() { + if cached_file + .covered_span + .contains(var.declaration_span.start) { return Some(GotoDefinitionResponse::Scalar(Location { uri: params @@ -307,7 +312,11 @@ impl LanguageServer { .text_document .uri .clone(), - range: Self::span_to_range(&var.declaration_span, file, *file_start), + range: Self::span_to_range( + &var.declaration_span, + file, + cached_file.covered_span.start, + ), })); } } diff --git a/crates/nu-protocol/src/engine/cached_file.rs b/crates/nu-protocol/src/engine/cached_file.rs new file mode 100644 index 0000000000..fa93864712 --- /dev/null +++ b/crates/nu-protocol/src/engine/cached_file.rs @@ -0,0 +1,15 @@ +use crate::Span; +use std::sync::Arc; + +/// Unit of cached source code +#[derive(Clone)] +pub struct CachedFile { + // Use Arcs of slice types for more compact representation (capacity less) + // Could possibly become an `Arc` + /// The file name with which the code is associated (also includes REPL input) + pub name: Arc, + /// Source code as raw bytes + pub content: Arc<[u8]>, + /// global span coordinates that are covered by this [`CachedFile`] + pub covered_span: Span, +} diff --git a/crates/nu-protocol/src/engine/engine_state.rs b/crates/nu-protocol/src/engine/engine_state.rs index de713c3059..eb2eb69492 100644 --- a/crates/nu-protocol/src/engine/engine_state.rs +++ b/crates/nu-protocol/src/engine/engine_state.rs @@ -1,6 +1,7 @@ use fancy_regex::Regex; use lru::LruCache; +use super::cached_file::CachedFile; use super::{usage::build_usage, usage::Usage, StateDelta}; use super::{ Command, EnvVars, OverlayFrame, ScopeFrame, Stack, Variable, Visibility, DEFAULT_OVERLAY_NAME, @@ -74,8 +75,7 @@ impl Clone for IsDebugging { /// but they also rely on using IDs rather than full definitions. #[derive(Clone)] pub struct EngineState { - files: Vec<(Arc, usize, usize)>, - file_contents: Vec<(Arc>, usize, usize)>, + files: Vec, pub(super) virtual_paths: Vec<(String, VirtualPath)>, vars: Vec, decls: Arc>>, @@ -119,7 +119,6 @@ impl EngineState { pub fn new() -> Self { Self { files: vec![], - file_contents: vec![], virtual_paths: vec![], vars: vec![ Variable::new(Span::new(0, 0), Type::Any, false), @@ -183,7 +182,6 @@ impl EngineState { pub fn merge_delta(&mut self, mut delta: StateDelta) -> Result<(), ShellError> { // Take the mutable reference and extend the permanent state from the working set self.files.extend(delta.files); - self.file_contents.extend(delta.file_contents); self.virtual_paths.extend(delta.virtual_paths); self.vars.extend(delta.vars); self.blocks.extend(delta.blocks); @@ -628,8 +626,8 @@ impl EngineState { } pub fn print_contents(&self) { - for (contents, _, _) in self.file_contents.iter() { - let string = String::from_utf8_lossy(contents); + for cached_file in self.files.iter() { + let string = String::from_utf8_lossy(&cached_file.content); println!("{string}"); } } @@ -747,9 +745,10 @@ impl EngineState { } pub fn get_span_contents(&self, span: Span) -> &[u8] { - for (contents, start, finish) in &self.file_contents { - if span.start >= *start && span.end <= *finish { - return &contents[(span.start - start)..(span.end - start)]; + for file in &self.files { + if file.covered_span.contains_span(span) { + return &file.content + [(span.start - file.covered_span.start)..(span.end - file.covered_span.start)]; } } &[0u8; 0] @@ -909,26 +908,28 @@ impl EngineState { } pub fn next_span_start(&self) -> usize { - if let Some((_, _, last)) = self.file_contents.last() { - *last + if let Some(cached_file) = self.files.last() { + cached_file.covered_span.end } else { 0 } } - pub fn files(&self) -> impl Iterator, usize, usize)> { + pub fn files(&self) -> impl Iterator { self.files.iter() } - pub fn add_file(&mut self, filename: String, contents: Vec) -> usize { + pub fn add_file(&mut self, filename: Arc, content: Arc<[u8]>) -> FileId { let next_span_start = self.next_span_start(); - let next_span_end = next_span_start + contents.len(); + let next_span_end = next_span_start + content.len(); - self.file_contents - .push((Arc::new(contents), next_span_start, next_span_end)); + let covered_span = Span::new(next_span_start, next_span_end); - self.files - .push((Arc::new(filename), next_span_start, next_span_end)); + self.files.push(CachedFile { + name: filename, + content, + covered_span, + }); self.num_files() - 1 } @@ -968,8 +969,9 @@ impl EngineState { .unwrap_or_default() } - pub fn get_file_contents(&self) -> &[(Arc>, usize, usize)] { - &self.file_contents + // TODO: see if we can completely get rid of this + pub fn get_file_contents(&self) -> &[CachedFile] { + &self.files } pub fn get_startup_time(&self) -> i64 { @@ -1043,7 +1045,7 @@ mod engine_state_tests { #[test] fn add_file_gives_id_including_parent() { let mut engine_state = EngineState::new(); - let parent_id = engine_state.add_file("test.nu".into(), vec![]); + let parent_id = engine_state.add_file("test.nu".into(), Arc::new([])); let mut working_set = StateWorkingSet::new(&engine_state); let working_set_id = working_set.add_file("child.nu".into(), &[]); @@ -1055,7 +1057,7 @@ mod engine_state_tests { #[test] fn merge_states() -> Result<(), ShellError> { let mut engine_state = EngineState::new(); - engine_state.add_file("test.nu".into(), vec![]); + engine_state.add_file("test.nu".into(), Arc::new([])); let delta = { let mut working_set = StateWorkingSet::new(&engine_state); @@ -1066,8 +1068,8 @@ mod engine_state_tests { engine_state.merge_delta(delta)?; assert_eq!(engine_state.num_files(), 2); - assert_eq!(&*engine_state.files[0].0, "test.nu"); - assert_eq!(&*engine_state.files[1].0, "child.nu"); + assert_eq!(&*engine_state.files[0].name, "test.nu"); + assert_eq!(&*engine_state.files[1].name, "child.nu"); Ok(()) } diff --git a/crates/nu-protocol/src/engine/mod.rs b/crates/nu-protocol/src/engine/mod.rs index 7deff38ad4..3c2189e8b0 100644 --- a/crates/nu-protocol/src/engine/mod.rs +++ b/crates/nu-protocol/src/engine/mod.rs @@ -1,3 +1,4 @@ +mod cached_file; mod call_info; mod capture_block; mod command; @@ -11,6 +12,8 @@ mod stdio; mod usage; mod variable; +pub use cached_file::CachedFile; + pub use call_info::*; pub use capture_block::*; pub use command::*; diff --git a/crates/nu-protocol/src/engine/state_delta.rs b/crates/nu-protocol/src/engine/state_delta.rs index d12090b6c2..23309c78f8 100644 --- a/crates/nu-protocol/src/engine/state_delta.rs +++ b/crates/nu-protocol/src/engine/state_delta.rs @@ -1,3 +1,4 @@ +use super::cached_file::CachedFile; use super::{usage::Usage, Command, EngineState, OverlayFrame, ScopeFrame, Variable, VirtualPath}; use crate::ast::Block; use crate::Module; @@ -12,8 +13,7 @@ use crate::RegisteredPlugin; /// can be applied to the global state to update it to contain both previous state and the state held /// within the delta. pub struct StateDelta { - pub(super) files: Vec<(Arc, usize, usize)>, - pub(crate) file_contents: Vec<(Arc>, usize, usize)>, + pub(super) files: Vec, pub(super) virtual_paths: Vec<(String, VirtualPath)>, pub(super) vars: Vec, // indexed by VarId pub(super) decls: Vec>, // indexed by DeclId @@ -38,7 +38,6 @@ impl StateDelta { StateDelta { files: vec![], - file_contents: vec![], virtual_paths: vec![], vars: vec![], decls: vec![], @@ -131,7 +130,7 @@ impl StateDelta { self.scope.pop(); } - pub fn get_file_contents(&self) -> &[(Arc>, usize, usize)] { - &self.file_contents + pub fn get_file_contents(&self) -> &[CachedFile] { + &self.files } } diff --git a/crates/nu-protocol/src/engine/state_working_set.rs b/crates/nu-protocol/src/engine/state_working_set.rs index 2e0811963c..be8368de5f 100644 --- a/crates/nu-protocol/src/engine/state_working_set.rs +++ b/crates/nu-protocol/src/engine/state_working_set.rs @@ -1,3 +1,4 @@ +use super::cached_file::CachedFile; use super::{ usage::build_usage, Command, EngineState, OverlayFrame, StateDelta, Variable, VirtualPath, Visibility, PWD_ENV, @@ -285,8 +286,8 @@ impl<'a> StateWorkingSet<'a> { pub fn next_span_start(&self) -> usize { let permanent_span_start = self.permanent_state.next_span_start(); - if let Some((_, _, last)) = self.delta.file_contents.last() { - *last + if let Some(cached_file) = self.delta.files.last() { + cached_file.covered_span.end } else { permanent_span_start } @@ -296,21 +297,22 @@ impl<'a> StateWorkingSet<'a> { self.permanent_state.next_span_start() } - pub fn files(&'a self) -> impl Iterator, usize, usize)> { + pub fn files(&self) -> impl Iterator { self.permanent_state.files().chain(self.delta.files.iter()) } - pub fn get_contents_of_file(&self, file_id: usize) -> Option<&[u8]> { - for (id, (contents, _, _)) in self.delta.file_contents.iter().enumerate() { - if self.permanent_state.num_files() + id == file_id { - return Some(contents); - } + pub fn get_contents_of_file(&self, file_id: FileId) -> Option<&[u8]> { + if let Some(cached_file) = self.permanent_state.get_file_contents().get(file_id) { + return Some(&cached_file.content); } - - for (id, (contents, _, _)) in self.permanent_state.get_file_contents().iter().enumerate() { - if id == file_id { - return Some(contents); - } + // The index subtraction will not underflow, if we hit the permanent state first. + // Check if you try reordering for locality + if let Some(cached_file) = self + .delta + .get_file_contents() + .get(file_id - self.permanent_state.num_files()) + { + return Some(&cached_file.content); } None @@ -319,27 +321,22 @@ impl<'a> StateWorkingSet<'a> { #[must_use] pub fn add_file(&mut self, filename: String, contents: &[u8]) -> FileId { // First, look for the file to see if we already have it - for (idx, (fname, file_start, file_end)) in self.files().enumerate() { - if **fname == filename { - let prev_contents = self.get_span_contents(Span::new(*file_start, *file_end)); - if prev_contents == contents { - return idx; - } + for (idx, cached_file) in self.files().enumerate() { + if *cached_file.name == filename && &*cached_file.content == contents { + return idx; } } let next_span_start = self.next_span_start(); let next_span_end = next_span_start + contents.len(); - self.delta.file_contents.push(( - Arc::new(contents.to_vec()), - next_span_start, - next_span_end, - )); + let covered_span = Span::new(next_span_start, next_span_end); - self.delta - .files - .push((Arc::new(filename), next_span_start, next_span_end)); + self.delta.files.push(CachedFile { + name: filename.into(), + content: contents.into(), + covered_span, + }); self.num_files() - 1 } @@ -352,35 +349,31 @@ impl<'a> StateWorkingSet<'a> { } pub fn get_span_for_filename(&self, filename: &str) -> Option { - let (file_id, ..) = self - .files() - .enumerate() - .find(|(_, (fname, _, _))| **fname == filename)?; + let file_id = self.files().position(|file| &*file.name == filename)?; Some(self.get_span_for_file(file_id)) } - pub fn get_span_for_file(&self, file_id: usize) -> Span { + /// Panics: + /// On invalid `FileId` + /// + /// Use with care + pub fn get_span_for_file(&self, file_id: FileId) -> Span { let result = self .files() .nth(file_id) .expect("internal error: could not find source for previously parsed file"); - Span::new(result.1, result.2) + result.covered_span } pub fn get_span_contents(&self, span: Span) -> &[u8] { let permanent_end = self.permanent_state.next_span_start(); if permanent_end <= span.start { - for (contents, start, finish) in &self.delta.file_contents { - if (span.start >= *start) && (span.end <= *finish) { - let begin = span.start - start; - let mut end = span.end - start; - if begin > end { - end = *finish - permanent_end; - } - - return &contents[begin..end]; + for cached_file in &self.delta.files { + if cached_file.covered_span.contains_span(span) { + return &cached_file.content[span.start - cached_file.covered_span.start + ..span.end - cached_file.covered_span.start]; } } } @@ -1033,19 +1026,24 @@ impl<'a> miette::SourceCode for &StateWorkingSet<'a> { let finding_span = "Finding span in StateWorkingSet"; dbg!(finding_span, span); } - for (filename, start, end) in self.files() { + for cached_file in self.files() { + let (filename, start, end) = ( + &cached_file.name, + cached_file.covered_span.start, + cached_file.covered_span.end, + ); if debugging { dbg!(&filename, start, end); } - if span.offset() >= *start && span.offset() + span.len() <= *end { + if span.offset() >= start && span.offset() + span.len() <= end { if debugging { let found_file = "Found matching file"; dbg!(found_file); } - let our_span = Span::new(*start, *end); + let our_span = cached_file.covered_span; // We need to move to a local span because we're only reading // the specific file contents via self.get_span_contents. - let local_span = (span.offset() - *start, span.len()).into(); + let local_span = (span.offset() - start, span.len()).into(); if debugging { dbg!(&local_span); } @@ -1066,7 +1064,7 @@ impl<'a> miette::SourceCode for &StateWorkingSet<'a> { } let data = span_contents.data(); - if **filename == "" { + if &**filename == "" { if debugging { let success_cli = "Successfully read CLI span"; dbg!(success_cli, String::from_utf8_lossy(data)); @@ -1084,7 +1082,7 @@ impl<'a> miette::SourceCode for &StateWorkingSet<'a> { dbg!(success_file); } return Ok(Box::new(miette::MietteSpanContents::new_named( - (**filename).clone(), + (**filename).to_owned(), data, retranslated, span_contents.line(), diff --git a/src/ide.rs b/src/ide.rs index 59422c3ddc..399c6578a0 100644 --- a/src/ide.rs +++ b/src/ide.rs @@ -160,14 +160,14 @@ pub fn goto_def(engine_state: &mut EngineState, file_path: &str, location: &Valu let block = working_set.get_block(block_id); if let Some(span) = &block.span { for file in working_set.files() { - if span.start >= file.1 && span.start < file.2 { + if file.covered_span.contains(span.start) { println!( "{}", json!( { - "file": &**file.0, - "start": span.start - file.1, - "end": span.end - file.1 + "file": &*file.name, + "start": span.start - file.covered_span.start, + "end": span.end - file.covered_span.start, } ) ); @@ -180,14 +180,14 @@ pub fn goto_def(engine_state: &mut EngineState, file_path: &str, location: &Valu Some((Id::Variable(var_id), ..)) => { let var = working_set.get_variable(var_id); for file in working_set.files() { - if var.declaration_span.start >= file.1 && var.declaration_span.start < file.2 { + if file.covered_span.contains(var.declaration_span.start) { println!( "{}", json!( { - "file": &**file.0, - "start": var.declaration_span.start - file.1, - "end": var.declaration_span.end - file.1 + "file": &*file.name, + "start": var.declaration_span.start - file.covered_span.start, + "end": var.declaration_span.end - file.covered_span.start, } ) );