From cd4d778f058b05e7da27067ca602b24a5efe56b6 Mon Sep 17 00:00:00 2001 From: gabriel Date: Mon, 13 May 2024 22:58:05 +0800 Subject: [PATCH 1/4] attempt to make `lex` parse string interpolation properly --- crates/nu-parser/src/lex.rs | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs index f9e75d13f0..471ca08f1a 100644 --- a/crates/nu-parser/src/lex.rs +++ b/crates/nu-parser/src/lex.rs @@ -30,7 +30,7 @@ impl Token { } } -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, PartialEq)] pub enum BlockKind { Paren, CurlyBracket, @@ -94,6 +94,9 @@ pub fn lex_item( let token_start = *curr_offset; + let mut string_interpolation = false; + let mut inter_bracket = false; + // This Vec tracks paired delimiters let mut block_level: Vec = vec![]; @@ -133,12 +136,18 @@ pub fn lex_item( )), ); } + } else if c == b'(' && string_interpolation { + inter_bracket = true; + } else if c == b')' && string_interpolation { + inter_bracket = false; } + // If we encountered the closing quote character for the current // string, we're done with the current string. - if c == start { + if c == start && (!string_interpolation || !inter_bracket) { // Also need to check to make sure we aren't escaped quote_start = None; + string_interpolation = false; } } else if c == b'#' { if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) { @@ -157,7 +166,12 @@ pub fn lex_item( } else if is_special_item(&block_level, c, special_tokens) && token_start == *curr_offset { *curr_offset += 1; break; - } else if c == b'\'' || c == b'"' || c == b'`' { + } else if (c == b'\'' || c == b'"' || c == b'`') && quote_start.is_none() { + if *curr_offset > 0 && input.get(*curr_offset - 1) == Some(&b'$'){ + if c == b'\'' || c == b'"' { + string_interpolation = true; + } + } // We encountered the opening quote of a string literal. quote_start = Some(c); } else if c == b'[' { @@ -266,6 +280,7 @@ pub fn lex_item( // The non-lite parse trims quotes on both sides, so we add the expected quote so that // anyone wanting to consume this partial parse (e.g., completions) will be able to get // correct information from the non-lite parse. + return ( Token { contents: TokenContents::Item, @@ -273,7 +288,7 @@ pub fn lex_item( }, Some(ParseError::UnexpectedEof( (delim as char).to_string(), - Span::new(span.end, span.end), + Span::new(span.start, span.end), )), ); } From 8adc4833fa2d2904871c9e9e7501c4599f409a67 Mon Sep 17 00:00:00 2001 From: gabriel Date: Tue, 14 May 2024 18:14:52 +0800 Subject: [PATCH 2/4] fmt --- crates/nu-parser/src/lex.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs index 471ca08f1a..f6b70b922c 100644 --- a/crates/nu-parser/src/lex.rs +++ b/crates/nu-parser/src/lex.rs @@ -167,7 +167,7 @@ pub fn lex_item( *curr_offset += 1; break; } else if (c == b'\'' || c == b'"' || c == b'`') && quote_start.is_none() { - if *curr_offset > 0 && input.get(*curr_offset - 1) == Some(&b'$'){ + if *curr_offset > 0 && input.get(*curr_offset - 1) == Some(&b'$') { if c == b'\'' || c == b'"' { string_interpolation = true; } From 6ad7a434a7efe4ce90d9857981591493ff03206f Mon Sep 17 00:00:00 2001 From: gabriel Date: Tue, 14 May 2024 18:16:57 +0800 Subject: [PATCH 3/4] clippy --- crates/nu-parser/src/lex.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs index f6b70b922c..9560c5aec5 100644 --- a/crates/nu-parser/src/lex.rs +++ b/crates/nu-parser/src/lex.rs @@ -167,10 +167,11 @@ pub fn lex_item( *curr_offset += 1; break; } else if (c == b'\'' || c == b'"' || c == b'`') && quote_start.is_none() { - if *curr_offset > 0 && input.get(*curr_offset - 1) == Some(&b'$') { - if c == b'\'' || c == b'"' { - string_interpolation = true; - } + if *curr_offset > 0 + && input.get(*curr_offset - 1) == Some(&b'$') + && (c == b'\'' || c == b'"') + { + string_interpolation = true; } // We encountered the opening quote of a string literal. quote_start = Some(c); From 5832e125807980f43e2b835c0181cba181e25fc0 Mon Sep 17 00:00:00 2001 From: gabriel Date: Wed, 15 May 2024 08:57:42 +0800 Subject: [PATCH 4/4] add test --- crates/nu-parser/tests/test_lex.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/nu-parser/tests/test_lex.rs b/crates/nu-parser/tests/test_lex.rs index 07470a310e..eec7b14b43 100644 --- a/crates/nu-parser/tests/test_lex.rs +++ b/crates/nu-parser/tests/test_lex.rs @@ -187,6 +187,13 @@ fn lex_incomplete_quote() { assert!(matches!(err, ParseError::UnexpectedEof(v, _) if v == "'")); } +#[test] +fn lex_odd_string_interpolations() { + let file = b"$\"('\"')\""; + let output = lex(file, 0, &[], &[], true); + assert!(output.1.is_none()); +} + #[test] fn lex_comments_no_space() { // test for parses that contain tokens that normally introduce comments