diff --git a/Cargo.lock b/Cargo.lock index 93470fcc91..13a0131e5f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2110,6 +2110,15 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "lru" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6e8aaa3f231bb4bd57b84b2d5dc3ae7f350265df8aa96492e0bc394a1571909" +dependencies = [ + "hashbrown", +] + [[package]] name = "lscolors" version = "0.12.0" @@ -2778,6 +2787,7 @@ dependencies = [ "chrono-humanize", "fancy-regex", "indexmap", + "lru", "miette", "nu-json", "nu-test-support", diff --git a/crates/nu-engine/src/eval.rs b/crates/nu-engine/src/eval.rs index 107e8a2fff..cf2bf38d24 100644 --- a/crates/nu-engine/src/eval.rs +++ b/crates/nu-engine/src/eval.rs @@ -402,9 +402,11 @@ pub fn eval_expression( Comparison::NotEqual => lhs.ne(op_span, &rhs, expr.span), Comparison::In => lhs.r#in(op_span, &rhs, expr.span), Comparison::NotIn => lhs.not_in(op_span, &rhs, expr.span), - Comparison::RegexMatch => lhs.regex_match(op_span, &rhs, false, expr.span), + Comparison::RegexMatch => { + lhs.regex_match(engine_state, op_span, &rhs, false, expr.span) + } Comparison::NotRegexMatch => { - lhs.regex_match(op_span, &rhs, true, expr.span) + lhs.regex_match(engine_state, op_span, &rhs, true, expr.span) } Comparison::StartsWith => lhs.starts_with(op_span, &rhs, expr.span), Comparison::EndsWith => lhs.ends_with(op_span, &rhs, expr.span), diff --git a/crates/nu-protocol/Cargo.toml b/crates/nu-protocol/Cargo.toml index a9d21c1953..7807043e96 100644 --- a/crates/nu-protocol/Cargo.toml +++ b/crates/nu-protocol/Cargo.toml @@ -18,6 +18,7 @@ chrono = { version="0.4.23", features= ["serde", "std"], default-features = fals chrono-humanize = "0.2.1" fancy-regex = "0.10.0" indexmap = { version="1.7" } +lru = "0.8.1" miette = { version = "5.1.0", features = ["fancy-no-backtrace"] } num-format = "0.4.3" serde = {version = "1.0.143", default-features = false } diff --git a/crates/nu-protocol/src/engine/engine_state.rs b/crates/nu-protocol/src/engine/engine_state.rs index 5dabda7e66..e6f6b90db1 100644 --- a/crates/nu-protocol/src/engine/engine_state.rs +++ b/crates/nu-protocol/src/engine/engine_state.rs @@ -1,3 +1,6 @@ +use fancy_regex::Regex; +use lru::LruCache; + use super::{Command, EnvVars, OverlayFrame, ScopeFrame, Stack, Visibility, DEFAULT_OVERLAY_NAME}; use crate::Value; use crate::{ @@ -6,6 +9,7 @@ use crate::{ }; use core::panic; use std::borrow::Borrow; +use std::num::NonZeroUsize; use std::path::Path; use std::path::PathBuf; use std::{ @@ -94,8 +98,12 @@ pub struct EngineState { pub history_session_id: i64, // If Nushell was started, e.g., with `nu spam.nu`, the file's parent is stored here pub currently_parsed_cwd: Option, + pub regex_cache: Arc>>, } +// The max number of compiled regexes to keep around in a LRU cache, arbitrarily chosen +const REGEX_CACHE_SIZE: usize = 100; // must be nonzero, otherwise will panic + pub const NU_VARIABLE_ID: usize = 0; pub const IN_VARIABLE_ID: usize = 1; pub const ENV_VARIABLE_ID: usize = 2; @@ -137,6 +145,9 @@ impl EngineState { config_path: HashMap::new(), history_session_id: 0, currently_parsed_cwd: None, + regex_cache: Arc::new(Mutex::new(LruCache::new( + NonZeroUsize::new(REGEX_CACHE_SIZE).expect("tried to create cache of size zero"), + ))), } } diff --git a/crates/nu-protocol/src/value/mod.rs b/crates/nu-protocol/src/value/mod.rs index b744aafd1c..a41f98af65 100644 --- a/crates/nu-protocol/src/value/mod.rs +++ b/crates/nu-protocol/src/value/mod.rs @@ -7,6 +7,7 @@ mod unit; use crate::ast::{Bits, Boolean, CellPath, Comparison, PathMember}; use crate::ast::{Math, Operator}; +use crate::engine::EngineState; use crate::ShellError; use crate::{did_you_mean, BlockId, Config, Span, Spanned, Type, VarId}; use byte_unit::ByteUnit; @@ -2627,6 +2628,7 @@ impl Value { pub fn regex_match( &self, + engine_state: &EngineState, op: Span, rhs: &Value, invert: bool, @@ -2640,18 +2642,36 @@ impl Value { span: rhs_span, }, ) => { - // We are leaving some performance on the table by compiling the regex every time. - // Small regexes compile in microseconds, and the simplicity of this approach currently - // outweighs the performance costs. Revisit this if it ever becomes a bottleneck. - let regex = Regex::new(rhs).map_err(|e| { - ShellError::UnsupportedInput( - format!("{e}"), - "value originated from here".into(), - span, - *rhs_span, - ) - })?; - let is_match = regex.is_match(lhs); + let is_match = match engine_state.regex_cache.try_lock() { + Ok(mut cache) => match cache.get(rhs) { + Some(regex) => regex.is_match(lhs), + None => { + let regex = Regex::new(rhs).map_err(|e| { + ShellError::UnsupportedInput( + format!("{e}"), + "value originated from here".into(), + span, + *rhs_span, + ) + })?; + let ret = regex.is_match(lhs); + cache.put(rhs.clone(), regex); + ret + } + }, + Err(_) => { + let regex = Regex::new(rhs).map_err(|e| { + ShellError::UnsupportedInput( + format!("{e}"), + "value originated from here".into(), + span, + *rhs_span, + ) + })?; + regex.is_match(lhs) + } + }; + Ok(Value::Bool { val: if invert { !is_match.unwrap_or(false)