From d4798d6ee19ddd1fc4cd08093adf022f3797420b Mon Sep 17 00:00:00 2001 From: Gavin Foley <6389719+gavinfoley@users.noreply.github.com> Date: Sun, 6 Nov 2022 14:57:28 -0500 Subject: [PATCH] Fix for escaping backslashes in interpolated strings (fixes #6737) (#7020) Co-authored-by: Gavin Foley --- crates/nu-parser/src/parser.rs | 147 ++++++++++-------- crates/nu-parser/tests/test_parser.rs | 215 +++++++++++++++++++++----- 2 files changed, 257 insertions(+), 105 deletions(-) diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs index 9cd521cf47..5260e9c194 100644 --- a/crates/nu-parser/src/parser.rs +++ b/crates/nu-parser/src/parser.rs @@ -1604,87 +1604,104 @@ pub fn parse_string_interpolation( let mut b = start; + let mut consecutive_backslashes: usize = 0; + while b != end { - if contents[b - start] == b'(' - && (if double_quote && (b - start) > 0 { - contents[b - start - 1] != b'\\' - } else { - true - }) - && mode == InterpolationMode::String - { - mode = InterpolationMode::Expression; - if token_start < b { - let span = Span { - start: token_start, - end: b, - }; - let str_contents = working_set.get_span_contents(span); + let current_byte = contents[b - start]; - let str_contents = if double_quote { - let (str_contents, err) = unescape_string(str_contents, span); - error = error.or(err); + match mode { + InterpolationMode::String => { + let preceding_consecutive_backslashes = consecutive_backslashes; - str_contents + let is_backslash = current_byte == b'\\'; + consecutive_backslashes = if is_backslash { + preceding_consecutive_backslashes + 1 } else { - str_contents.to_vec() + 0 }; - output.push(Expression { - expr: Expr::String(String::from_utf8_lossy(&str_contents).to_string()), - span, - ty: Type::String, - custom_completion: None, - }); - token_start = b; - } - } - if mode == InterpolationMode::Expression { - let byte = contents[b - start]; - if let Some(b'\'') = delimiter_stack.last() { - if byte == b'\'' { - delimiter_stack.pop(); - } - } else if let Some(b'"') = delimiter_stack.last() { - if byte == b'"' { - delimiter_stack.pop(); - } - } else if let Some(b'`') = delimiter_stack.last() { - if byte == b'`' { - delimiter_stack.pop(); - } - } else if byte == b'\'' { - delimiter_stack.push(b'\'') - } else if byte == b'"' { - delimiter_stack.push(b'"'); - } else if byte == b'`' { - delimiter_stack.push(b'`') - } else if byte == b'(' { - delimiter_stack.push(b')'); - } else if byte == b')' { - if let Some(b')') = delimiter_stack.last() { - delimiter_stack.pop(); - } - if delimiter_stack.is_empty() { - mode = InterpolationMode::String; - + if current_byte == b'(' + && (!double_quote || preceding_consecutive_backslashes % 2 == 0) + { + mode = InterpolationMode::Expression; if token_start < b { let span = Span { start: token_start, - end: b + 1, + end: b, + }; + let str_contents = working_set.get_span_contents(span); + + let str_contents = if double_quote { + let (str_contents, err) = unescape_string(str_contents, span); + error = error.or(err); + + str_contents + } else { + str_contents.to_vec() }; - let (expr, err) = - parse_full_cell_path(working_set, None, span, expand_aliases_denylist); - error = error.or(err); - output.push(expr); + output.push(Expression { + expr: Expr::String(String::from_utf8_lossy(&str_contents).to_string()), + span, + ty: Type::String, + custom_completion: None, + }); + token_start = b; } + } + } + InterpolationMode::Expression => { + let byte = current_byte; + if let Some(b'\'') = delimiter_stack.last() { + if byte == b'\'' { + delimiter_stack.pop(); + } + } else if let Some(b'"') = delimiter_stack.last() { + if byte == b'"' { + delimiter_stack.pop(); + } + } else if let Some(b'`') = delimiter_stack.last() { + if byte == b'`' { + delimiter_stack.pop(); + } + } else if byte == b'\'' { + delimiter_stack.push(b'\'') + } else if byte == b'"' { + delimiter_stack.push(b'"'); + } else if byte == b'`' { + delimiter_stack.push(b'`') + } else if byte == b'(' { + delimiter_stack.push(b')'); + } else if byte == b')' { + if let Some(b')') = delimiter_stack.last() { + delimiter_stack.pop(); + } + if delimiter_stack.is_empty() { + mode = InterpolationMode::String; - token_start = b + 1; - continue; + if token_start < b { + let span = Span { + start: token_start, + end: b + 1, + }; + + let (expr, err) = parse_full_cell_path( + working_set, + None, + span, + expand_aliases_denylist, + ); + error = error.or(err); + output.push(expr); + } + + token_start = b + 1; + continue; + } } } } + b += 1; } diff --git a/crates/nu-parser/tests/test_parser.rs b/crates/nu-parser/tests/test_parser.rs index c75a74c39e..500376059c 100644 --- a/crates/nu-parser/tests/test_parser.rs +++ b/crates/nu-parser/tests/test_parser.rs @@ -175,46 +175,6 @@ pub fn parse_binary_with_multi_byte_char() { assert!(!matches!(&expressions[0].expr, Expr::Binary(_))) } -#[test] -pub fn parse_string() { - let engine_state = EngineState::new(); - let mut working_set = StateWorkingSet::new(&engine_state); - - let (block, err) = parse(&mut working_set, None, b"\"hello nushell\"", true, &[]); - - assert!(err.is_none()); - assert!(block.len() == 1); - let expressions = &block[0]; - assert!(expressions.len() == 1); - assert_eq!( - expressions[0].expr, - Expr::String("hello nushell".to_string()) - ) -} - -#[test] -pub fn parse_escaped_string() { - let engine_state = EngineState::new(); - let mut working_set = StateWorkingSet::new(&engine_state); - - let (block, err) = parse( - &mut working_set, - None, - b"\"hello \\u006e\\u0075\\u0073hell\"", - true, - &[], - ); - - assert!(err.is_none()); - assert!(block.len() == 1); - let expressions = &block[0]; - assert!(expressions.len() == 1); - assert_eq!( - expressions[0].expr, - Expr::String("hello nushell".to_string()) - ) -} - #[test] pub fn parse_call() { let engine_state = EngineState::new(); @@ -364,6 +324,181 @@ fn test_nothing_comparisson_neq() { )) } +mod string { + use super::*; + + #[test] + pub fn parse_string() { + let engine_state = EngineState::new(); + let mut working_set = StateWorkingSet::new(&engine_state); + + let (block, err) = parse(&mut working_set, None, b"\"hello nushell\"", true, &[]); + + assert!(err.is_none()); + assert!(block.len() == 1); + let expressions = &block[0]; + assert!(expressions.len() == 1); + assert_eq!( + expressions[0].expr, + Expr::String("hello nushell".to_string()) + ) + } + + #[test] + pub fn parse_escaped_string() { + let engine_state = EngineState::new(); + let mut working_set = StateWorkingSet::new(&engine_state); + + let (block, err) = parse( + &mut working_set, + None, + b"\"hello \\u006e\\u0075\\u0073hell\"", + true, + &[], + ); + + assert!(err.is_none()); + assert!(block.len() == 1); + let expressions = &block[0]; + assert!(expressions.len() == 1); + assert_eq!( + expressions[0].expr, + Expr::String("hello nushell".to_string()) + ) + } + + mod interpolation { + use super::*; + + #[test] + pub fn parse_string_interpolation() { + let engine_state = EngineState::new(); + let mut working_set = StateWorkingSet::new(&engine_state); + + let (block, err) = parse(&mut working_set, None, b"$\"hello (39 + 3)\"", true, &[]); + + assert!(err.is_none()); + assert!(block.len() == 1); + + let expressions = &block[0]; + assert!(expressions.len() == 1); + + let expr = &expressions[0].expr; + + let subexprs: Vec<&Expr>; + match expr { + Expr::StringInterpolation(expressions) => { + subexprs = expressions.iter().map(|e| &e.expr).collect(); + } + _ => panic!("Expected an `Expr::StringInterpolation`"), + } + + assert_eq!(subexprs.len(), 2); + + assert_eq!(subexprs[0], &Expr::String("hello ".to_string())); + + assert!(matches!(subexprs[1], &Expr::FullCellPath(..))); + } + + #[test] + pub fn parse_string_interpolation_escaped_parenthesis() { + let engine_state = EngineState::new(); + let mut working_set = StateWorkingSet::new(&engine_state); + + let (block, err) = parse(&mut working_set, None, b"$\"hello \\(39 + 3)\"", true, &[]); + + assert!(err.is_none()); + + assert!(block.len() == 1); + let expressions = &block[0]; + + assert!(expressions.len() == 1); + let expr = &expressions[0].expr; + + let subexprs: Vec<&Expr>; + match expr { + Expr::StringInterpolation(expressions) => { + subexprs = expressions.iter().map(|e| &e.expr).collect(); + } + _ => panic!("Expected an `Expr::StringInterpolation`"), + } + + assert_eq!(subexprs.len(), 1); + + assert_eq!(subexprs[0], &Expr::String("hello (39 + 3)".to_string())); + } + + #[test] + pub fn parse_string_interpolation_escaped_backslash_before_parenthesis() { + let engine_state = EngineState::new(); + let mut working_set = StateWorkingSet::new(&engine_state); + + let (block, err) = parse( + &mut working_set, + None, + b"$\"hello \\\\(39 + 3)\"", + true, + &[], + ); + + assert!(err.is_none()); + + assert!(block.len() == 1); + let expressions = &block[0]; + + assert!(expressions.len() == 1); + let expr = &expressions[0].expr; + + let subexprs: Vec<&Expr>; + match expr { + Expr::StringInterpolation(expressions) => { + subexprs = expressions.iter().map(|e| &e.expr).collect(); + } + _ => panic!("Expected an `Expr::StringInterpolation`"), + } + + assert_eq!(subexprs.len(), 2); + + assert_eq!(subexprs[0], &Expr::String("hello \\".to_string())); + + assert!(matches!(subexprs[1], &Expr::FullCellPath(..))); + } + + #[test] + pub fn parse_string_interpolation_backslash_count_reset_by_expression() { + let engine_state = EngineState::new(); + let mut working_set = StateWorkingSet::new(&engine_state); + + let (block, err) = parse( + &mut working_set, + None, + b"$\"\\(1 + 3)\\(7 - 5)\"", + true, + &[], + ); + + assert!(err.is_none()); + + assert!(block.len() == 1); + let expressions = &block[0]; + + assert!(expressions.len() == 1); + let expr = &expressions[0].expr; + + let subexprs: Vec<&Expr>; + match expr { + Expr::StringInterpolation(expressions) => { + subexprs = expressions.iter().map(|e| &e.expr).collect(); + } + _ => panic!("Expected an `Expr::StringInterpolation`"), + } + + assert_eq!(subexprs.len(), 1); + assert_eq!(subexprs[0], &Expr::String("(1 + 3)(7 - 5)".to_string())); + } + } +} + mod range { use super::*; use nu_protocol::ast::{RangeInclusion, RangeOperator};