diff --git a/crates/nu-command/src/strings/parse.rs b/crates/nu-command/src/strings/parse.rs index 8f6d35b0b3..51067a16a2 100644 --- a/crates/nu-command/src/strings/parse.rs +++ b/crates/nu-command/src/strings/parse.rs @@ -1,9 +1,9 @@ -use fancy_regex::Regex; +use fancy_regex::{Captures, Regex}; use nu_engine::command_prelude::*; -use nu_protocol::{ListStream, ValueIterator}; -use std::sync::{ - atomic::{AtomicBool, Ordering}, - Arc, +use nu_protocol::ListStream; +use std::{ + collections::VecDeque, + sync::{atomic::AtomicBool, Arc}, }; #[derive(Clone)] @@ -119,7 +119,6 @@ fn operate( let head = call.head; let pattern: Spanned = call.req(engine_state, stack, 0)?; let regex: bool = call.has_flag(engine_state, stack, "regex")?; - let ctrlc = engine_state.ctrlc.clone(); let pattern_item = pattern.item; let pattern_span = pattern.span; @@ -130,7 +129,7 @@ fn operate( build_regex(&pattern_item, pattern_span)? }; - let regex_pattern = Regex::new(&item_to_parse).map_err(|e| ShellError::GenericError { + let regex = Regex::new(&item_to_parse).map_err(|e| ShellError::GenericError { error: "Error with regular expression".into(), msg: e.to_string(), span: Some(pattern_span), @@ -138,92 +137,108 @@ fn operate( inner: vec![], })?; - let columns = column_names(®ex_pattern); + let columns = regex + .capture_names() + .skip(1) + .enumerate() + .map(|(i, name)| { + name.map(String::from) + .unwrap_or_else(|| format!("capture{i}")) + }) + .collect::>(); + + let ctrlc = engine_state.ctrlc.clone(); match input { PipelineData::Empty => Ok(PipelineData::Empty), - PipelineData::Value(..) => { - let mut parsed: Vec = Vec::new(); + PipelineData::Value(value, ..) => match value { + Value::String { val, .. } => { + let captures = regex + .captures_iter(&val) + .map(|captures| captures_to_value(captures, &columns, head)) + .collect::>()?; - for v in input { - let v_span = v.span(); - match v.coerce_into_string() { - Ok(s) => { - let results = regex_pattern.captures_iter(&s); - - for c in results { - let captures = match c { - Ok(c) => c, - Err(e) => { - return Err(ShellError::GenericError { - error: "Error with regular expression captures".into(), - msg: e.to_string(), - span: None, - help: None, - inner: vec![], - }) - } - }; - - let record = columns - .iter() - .zip(captures.iter().skip(1)) - .map(|(column_name, cap)| { - let cap_string = cap.map(|v| v.as_str()).unwrap_or(""); - (column_name.clone(), Value::string(cap_string, v_span)) - }) - .collect(); - - parsed.push(Value::record(record, head)); - } - } - Err(_) => { - return Err(ShellError::PipelineMismatch { - exp_input_type: "string".into(), - dst_span: head, - src_span: v_span, - }) - } - } + Ok(Value::list(captures, head).into_pipeline_data()) } + Value::List { vals, .. } => { + let iter = vals.into_iter().map(move |val| { + let span = val.span(); + val.into_string().map_err(|_| ShellError::PipelineMismatch { + exp_input_type: "string".into(), + dst_span: head, + src_span: span, + }) + }); - Ok(ListStream::new(parsed.into_iter(), head, ctrlc).into()) - } + let iter = ParseIter { + captures: VecDeque::new(), + regex, + columns, + iter, + span: head, + ctrlc, + }; + + Ok(ListStream::new(iter, head, None).into()) + } + value => Err(ShellError::PipelineMismatch { + exp_input_type: "string".into(), + dst_span: head, + src_span: value.span(), + }), + }, PipelineData::ListStream(stream, ..) => Ok(stream - .modify(|stream| ParseStreamer { - span: head, - excess: Vec::new(), - regex: regex_pattern, - columns, - stream, - ctrlc, + .modify(|stream| { + let iter = stream.map(move |val| { + let span = val.span(); + val.into_string().map_err(|_| ShellError::PipelineMismatch { + exp_input_type: "string".into(), + dst_span: head, + src_span: span, + }) + }); + + ParseIter { + captures: VecDeque::new(), + regex, + columns, + iter, + span: head, + ctrlc, + } }) .into()), - PipelineData::ExternalStream { stdout: None, .. } => Ok(PipelineData::Empty), - PipelineData::ExternalStream { stdout: Some(stream), .. - } => Ok(ListStream::new( - ParseStreamerExternal { - span: head, - excess: Vec::new(), - regex: regex_pattern, + } => { + // Collect all `stream` chunks into a single `chunk` to be able to deal with matches that + // extend across chunk boundaries. + // This is a stop-gap solution until the `regex` crate supports streaming or an alternative + // solution is found. + // See https://github.com/nushell/nushell/issues/9795 + let str = stream.into_string()?.item; + + // let iter = stream.lines(); + + let iter = ParseIter { + captures: VecDeque::new(), + regex, columns, - stream: stream.stream, - }, - head, - ctrlc, - ) - .into()), + iter: std::iter::once(Ok(str)), + span: head, + ctrlc, + }; + + Ok(ListStream::new(iter, head, None).into()) + } } } fn build_regex(input: &str, span: Span) -> Result { let mut output = "(?s)\\A".to_string(); - //let mut loop_input = input; let mut loop_input = input.chars().peekable(); loop { let mut before = String::new(); @@ -274,172 +289,73 @@ fn build_regex(input: &str, span: Span) -> Result { Ok(output) } -fn column_names(regex: &Regex) -> Vec { - regex - .capture_names() - .enumerate() - .skip(1) - .map(|(i, name)| { - name.map(String::from) - .unwrap_or_else(|| format!("capture{}", i - 1)) - }) - .collect() -} - -pub struct ParseStreamer { - span: Span, - excess: Vec, +struct ParseIter>> { + captures: VecDeque, regex: Regex, columns: Vec, - stream: ValueIterator, + iter: I, + span: Span, ctrlc: Option>, } -impl Iterator for ParseStreamer { - type Item = Value; - fn next(&mut self) -> Option { - if !self.excess.is_empty() { - return Some(self.excess.remove(0)); +impl>> ParseIter { + fn populate_captures(&mut self, str: &str) -> Result<(), ShellError> { + for captures in self.regex.captures_iter(str) { + self.captures + .push_back(captures_to_value(captures, &self.columns, self.span)?); } + Ok(()) + } +} +impl>> Iterator for ParseIter { + type Item = Value; + + fn next(&mut self) -> Option { loop { - if let Some(ctrlc) = &self.ctrlc { - if ctrlc.load(Ordering::SeqCst) { - break None; - } + if nu_utils::ctrl_c::was_pressed(&self.ctrlc) { + return None; } - let v = self.stream.next()?; - let span = v.span(); + if let Some(val) = self.captures.pop_front() { + return Some(val); + } - let Ok(s) = v.coerce_into_string() else { - return Some(Value::error( - ShellError::PipelineMismatch { - exp_input_type: "string".into(), - dst_span: self.span, - src_span: span, - }, - span, - )); - }; + let result = self + .iter + .next()? + .and_then(|str| self.populate_captures(&str)); - let parsed = stream_helper( - self.regex.clone(), - span, - s, - self.columns.clone(), - &mut self.excess, - ); - - if parsed.is_none() { - continue; - }; - - return parsed; + if let Err(err) = result { + return Some(Value::error(err, self.span)); + } } } } -pub struct ParseStreamerExternal { +fn captures_to_value( + captures: Result, + columns: &[String], span: Span, - excess: Vec, - regex: Regex, - columns: Vec, - stream: Box, ShellError>> + Send + 'static>, -} +) -> Result { + let captures = captures.map_err(|err| ShellError::GenericError { + error: "Error with regular expression captures".into(), + msg: err.to_string(), + span: Some(span), + help: None, + inner: vec![], + })?; -impl Iterator for ParseStreamerExternal { - type Item = Value; - fn next(&mut self) -> Option { - if !self.excess.is_empty() { - return Some(self.excess.remove(0)); - } + let record = columns + .iter() + .zip(captures.iter().skip(1)) + .map(|(column, match_)| { + let match_str = match_.map(|m| m.as_str()).unwrap_or(""); + (column.clone(), Value::string(match_str, span)) + }) + .collect(); - let mut chunk = self.stream.next(); - - // Collect all `stream` chunks into a single `chunk` to be able to deal with matches that - // extend across chunk boundaries. - // This is a stop-gap solution until the `regex` crate supports streaming or an alternative - // solution is found. - // See https://github.com/nushell/nushell/issues/9795 - while let Some(Ok(chunks)) = &mut chunk { - match self.stream.next() { - Some(Ok(mut next_chunk)) => chunks.append(&mut next_chunk), - error @ Some(Err(_)) => chunk = error, - None => break, - } - } - - let chunk = match chunk { - Some(Ok(chunk)) => chunk, - Some(Err(err)) => return Some(Value::error(err, self.span)), - _ => return None, - }; - - let Ok(chunk) = String::from_utf8(chunk) else { - return Some(Value::error( - ShellError::PipelineMismatch { - exp_input_type: "string".into(), - dst_span: self.span, - src_span: self.span, - }, - self.span, - )); - }; - - stream_helper( - self.regex.clone(), - self.span, - chunk, - self.columns.clone(), - &mut self.excess, - ) - } -} - -fn stream_helper( - regex: Regex, - span: Span, - s: String, - columns: Vec, - excess: &mut Vec, -) -> Option { - let results = regex.captures_iter(&s); - - for c in results { - let captures = match c { - Ok(c) => c, - Err(e) => { - return Some(Value::error( - ShellError::GenericError { - error: "Error with regular expression captures".into(), - msg: e.to_string(), - span: Some(span), - help: Some(e.to_string()), - inner: vec![], - }, - span, - )) - } - }; - - let record = columns - .iter() - .zip(captures.iter().skip(1)) - .map(|(column_name, cap)| { - let cap_string = cap.map(|v| v.as_str()).unwrap_or(""); - (column_name.clone(), Value::string(cap_string, span)) - }) - .collect(); - - excess.push(Value::record(record, span)); - } - - if !excess.is_empty() { - Some(excess.remove(0)) - } else { - None - } + Ok(Value::record(record, span)) } #[cfg(test)]