diff --git a/crates/nu-parser/src/errors.rs b/crates/nu-parser/src/errors.rs index e5020f9d8c..a2425e9395 100644 --- a/crates/nu-parser/src/errors.rs +++ b/crates/nu-parser/src/errors.rs @@ -442,6 +442,10 @@ pub enum ParseError { )] NotAConstant(#[label = "Value is not a parse-time constant"] Span), + #[error("Invalid literal")] // in . + #[diagnostic()] + InvalidLiteral(String, String, #[label("{0} in {1}")] Span), + #[error("{0}")] #[diagnostic()] LabeledError(String, String, #[label("{1}")] Span), @@ -520,6 +524,7 @@ impl ParseError { ParseError::ShellErrRedirect(s) => *s, ParseError::ShellOutErrRedirect(s) => *s, ParseError::UnknownOperator(_, _, s) => *s, + ParseError::InvalidLiteral(_, _, s) => *s, ParseError::NotAConstant(s) => *s, } } diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs index 6d40c403dd..3dd3f5d371 100644 --- a/crates/nu-parser/src/parser.rs +++ b/crates/nu-parser/src/parser.rs @@ -1351,9 +1351,9 @@ pub fn parse_int(token: &[u8], span: Span) -> (Expression, Option) { } else { ( garbage(span), - Some(ParseError::Mismatch( + Some(ParseError::InvalidLiteral( + format!("invalid digits for radix {}", radix), "int".into(), - "incompatible int".into(), span, )), ) @@ -1362,6 +1362,13 @@ pub fn parse_int(token: &[u8], span: Span) -> (Expression, Option) { let token = strip_underscores(token); + if token.is_empty() { + return ( + garbage(span), + Some(ParseError::Expected("int".into(), span)), + ); + } + if let Some(num) = token.strip_prefix("0b") { extract_int(num, span, 2) } else if let Some(num) = token.strip_prefix("0o") { @@ -1408,16 +1415,21 @@ pub fn parse_float(token: &[u8], span: Span) -> (Expression, Option) } pub fn parse_number(token: &[u8], span: Span) -> (Expression, Option) { - if let (x, None) = parse_int(token, span) { - (x, None) - } else if let (x, None) = parse_float(token, span) { - (x, None) - } else { - ( - garbage(span), - Some(ParseError::Expected("number".into(), span)), - ) + match parse_int(token, span) { + (x, None) => { + return (x, None); + } + (_, Some(ParseError::Expected(_, _))) => {} + (x, e) => return (x, e), } + if let (x, None) = parse_float(token, span) { + return (x, None); + } + + ( + garbage(span), + Some(ParseError::Expected("number".into(), span)), + ) } pub fn parse_range( @@ -1432,6 +1444,7 @@ pub fn parse_range( // and is ".." or "..<" // and one of the or bounds must be present (just '..' is not allowed since it // looks like parent directory) + //bugbug range cannot be [..] because that looks like parent directory let contents = working_set.get_span_contents(span); @@ -2140,11 +2153,7 @@ pub fn parse_datetime( if bytes.is_empty() || !bytes[0].is_ascii_digit() { return ( garbage(span), - Some(ParseError::Mismatch( - "datetime".into(), - "non-datetime".into(), - span, - )), + Some(ParseError::Expected("datetime".into(), span)), ); } @@ -2192,11 +2201,7 @@ pub fn parse_datetime( ( garbage(span), - Some(ParseError::Mismatch( - "datetime".into(), - "non-datetime".into(), - span, - )), + Some(ParseError::Expected("datetime".into(), span)), ) } @@ -2213,9 +2218,8 @@ pub fn parse_duration( Some(expression) => (expression, None), None => ( garbage(span), - Some(ParseError::Mismatch( - "duration".into(), - "non-duration unit".into(), + Some(ParseError::Expected( + "duration with valid units".into(), span, )), ), @@ -2339,13 +2343,13 @@ pub fn parse_filesize( let bytes = working_set.get_span_contents(span); + //todo: parse_filesize_bytes should distinguish between not-that-type and syntax error in units match parse_filesize_bytes(bytes, span) { Some(expression) => (expression, None), None => ( garbage(span), - Some(ParseError::Mismatch( - "filesize".into(), - "non-filesize unit".into(), + Some(ParseError::Expected( + "filesize with valid units".into(), span, )), ), @@ -2454,7 +2458,7 @@ pub fn parse_glob_pattern( } else { ( garbage(span), - Some(ParseError::Expected("string".into(), span)), + Some(ParseError::Expected("glob pattern string".into(), span)), ) } } @@ -2568,8 +2572,9 @@ pub fn unescape_string(bytes: &[u8], span: Span) -> (Vec, Option cur_idx += 1; } _ => { - err = Some(ParseError::Expected( - "closing '}' in unicode escape `\\u{n..}`".into(), + err = Some(ParseError::InvalidLiteral( + "missing '}' for unicode escape '\\u{X...}'".into(), + "string".into(), Span::new(span.start + idx, span.end), )); break 'us_loop; @@ -2600,16 +2605,18 @@ pub fn unescape_string(bytes: &[u8], span: Span) -> (Vec, Option } } // fall through -- escape not accepted above, must be error. - err = Some(ParseError::Expected( - "unicode escape \\u{n..}".into(), + err = Some(ParseError::InvalidLiteral( + "invalid unicode escape '\\u{X...}', must be 1-6 hex digits, max value 10FFFF".into(), + "string".into(), Span::new(span.start + idx, span.end), )); break 'us_loop; } _ => { - err = Some(ParseError::Expected( - "supported escape character".into(), + err = Some(ParseError::InvalidLiteral( + "unrecognized escape after '\\'".into(), + "string".into(), Span::new(span.start + idx, span.end), )); break 'us_loop; @@ -4539,11 +4546,23 @@ pub fn parse_value( ) } } + + // Be sure to return ParseError::Expected(..) if invoked for one of these shapes, but lex + // stream doesn't start with '{'} -- parsing in SyntaxShape::Any arm depends on this error variant. + SyntaxShape::Block | SyntaxShape::Closure(..) | SyntaxShape::Record => ( + garbage(span), + Some(ParseError::Expected( + "block, closure or record".into(), + span, + )), + ), + SyntaxShape::Any => { if bytes.starts_with(b"[") { //parse_value(working_set, span, &SyntaxShape::Table) parse_full_cell_path(working_set, None, span, expand_aliases_denylist) } else { + /* Parser very sensitive to order of shapes tried. Recording the original order for postierity let shapes = [ SyntaxShape::Binary, SyntaxShape::Int, @@ -4557,11 +4576,34 @@ pub fn parse_value( SyntaxShape::Block, SyntaxShape::String, ]; + */ + let shapes = [ + SyntaxShape::Binary, + SyntaxShape::Filesize, + SyntaxShape::Duration, + SyntaxShape::Range, + SyntaxShape::DateTime, //FIXME requires 3 failed conversion attempts before failing + SyntaxShape::Record, + SyntaxShape::Closure(None), + SyntaxShape::Block, + SyntaxShape::Int, + SyntaxShape::Number, + SyntaxShape::String, + ]; for shape in shapes.iter() { - if let (s, None) = - parse_value(working_set, span, shape, expand_aliases_denylist) - { - return (s, None); + let (s, e) = parse_value(working_set, span, shape, expand_aliases_denylist); + match (s, e) { + (s, None) => { + return (s, None); + } + (_, Some(ParseError::Expected(_, _))) => { + // value didn't parse as this shape, try other options + continue; + } + (s, e) => { + // value did parse, but had syntax issues, don't try any more options. + return (s, e); + } } } ( diff --git a/crates/nu-parser/tests/test_parser.rs b/crates/nu-parser/tests/test_parser.rs index c266fcb02e..7140bb8383 100644 --- a/crates/nu-parser/tests/test_parser.rs +++ b/crates/nu-parser/tests/test_parser.rs @@ -42,6 +42,239 @@ impl Command for Let { } } +fn test_int( + test_tag: &str, // name of sub-test + test: &[u8], // input expression + expected_val: Expr, // (usually Expr::{Int,String, Float}, not ::BinOp... + expected_err: Option<&str>, +) // substring in error text +{ + let engine_state = EngineState::new(); + let mut working_set = StateWorkingSet::new(&engine_state); + + let (block, err) = parse(&mut working_set, None, test, true, &[]); + + if let Some(err_pat) = expected_err { + if let Some(parse_err) = err { + let act_err = format!("{:?}", parse_err); + assert!( + act_err.contains(err_pat), + "{test_tag}: expected err to contain {err_pat}, but actual error was {act_err}" + ); + } else { + assert!( + err.is_some(), + "{test_tag}: expected err containing {err_pat}, but no error returned" + ); + } + } else { + assert!(err.is_none(), "{test_tag}: unexpected error {err:#?}"); + assert_eq!(block.len(), 1, "{test_tag}: result block length > 1"); + let expressions = &block[0]; + assert_eq!( + expressions.len(), + 1, + "{test_tag}: got multiple result expressions, expected 1" + ); + if let PipelineElement::Expression( + _, + Expression { + expr: observed_val, .. + }, + ) = &expressions[0] + { + compare_rhs_binaryOp(test_tag, &expected_val, &observed_val); + } + } +} + +#[allow(non_snake_case)] +fn compare_rhs_binaryOp( + test_tag: &str, + expected: &Expr, // the rhs expr we hope to see (::Int, ::Float, not ::B) + observed: &Expr, // the Expr actually provided: can be ::Int, ::Float, ::String, + // or ::BinOp (in which case rhs is checked), or ::Call (in which case cmd is checked) +) { + match observed { + Expr::Int(..) | Expr::Float(..) | Expr::String(..) => { + assert_eq!( + expected, observed, + "{test_tag}: Expected: {expected:#?}, observed {observed:#?}" + ); + } + Expr::BinaryOp(_, _, e) => { + let observed_expr = &e.expr; + // can't pattern match Box, but can match the box, then deref in separate statement. + assert_eq!( + expected, observed_expr, + "{test_tag}: Expected: {expected:#?}, observed: {observed:#?}" + ) + } + Expr::ExternalCall(e, _, _) => { + let observed_expr = &e.expr; + assert_eq!( + expected, observed_expr, + "{test_tag}: Expected: {expected:#?}, observed: {observed_expr:#?}" + ) + } + _ => { + panic!("{test_tag}: Unexpected Expr:: variant returned, observed {observed:#?}"); + } + } +} + +#[test] +pub fn multi_test_parse_int() { + struct Test<'a>(&'a str, &'a [u8], Expr, Option<&'a str>); + + // use test expression of form '0 + x' to force parse() to parse x as numeric. + // if expression were just 'x', parse() would try other items that would mask the error we're looking for. + let tests = vec![ + Test("binary literal int", b"0 + 0b0", Expr::Int(0), None), + Test( + "binary literal invalid digits", + b"0 + 0b2", + Expr::Int(0), + Some("invalid digits for radix 2"), + ), + Test("octal literal int", b"0 + 0o1", Expr::Int(1), None), + Test( + "octal literal int invalid digits", + b"0 + 0o8", + Expr::Int(0), + Some("invalid digits for radix 8"), + ), + Test( + "octal literal int truncated", + b"0 + 0o", + Expr::Int(0), + Some("invalid digits for radix 8"), + ), + Test("hex literal int", b"0 + 0x2", Expr::Int(2), None), + Test( + "hex literal int invalid digits", + b"0 + 0x0aq", + Expr::Int(0), + Some("invalid digits for radix 16"), + ), + Test( + "hex literal with 'e' not mistaken for float", + b"0 + 0x00e0", + Expr::Int(0xe0), + None, + ), + // decimal (rad10) literal is anything that starts with + // optional sign then a digit. + Test("rad10 literal int", b"0 + 42", Expr::Int(42), None), + Test( + "rad10 with leading + sign", + b"0 + -42", + Expr::Int(-42), + None, + ), + Test("rad10 with leading - sign", b"0 + +42", Expr::Int(42), None), + Test( + "flag char is string, not (invalid) int", + b"-x", + Expr::String("-x".into()), + None, + ), + Test( + "keyword parameter is string", + b"--exact", + Expr::String("--exact".into()), + None, + ), + Test( + "ranges or relative paths not confused for int", + b"./a/b", + Expr::String("./a/b".into()), + None, + ), + Test( + "semver data not confused for int", + b"1.0.1", + Expr::String("1.0.1".into()), + None, + ), + ]; + + for test in tests { + test_int(test.0, test.1, test.2, test.3); + } +} + +#[ignore] +#[test] +pub fn multi_test_parse_number() { + struct Test<'a>(&'a str, &'a [u8], Expr, Option<&'a str>); + + // use test expression of form '0 + x' to force parse() to parse x as numeric. + // if expression were just 'x', parse() would try other items that would mask the error we're looking for. + let tests = vec![ + Test("float decimal", b"0 + 43.5", Expr::Float(43.5), None), + //Test("float with leading + sign", b"0 + +41.7", Expr::Float(-41.7), None), + Test( + "float with leading - sign", + b"0 + -41.7", + Expr::Float(-41.7), + None, + ), + Test( + "float scientific notation", + b"0 + 3e10", + Expr::Float(3.00e10), + None, + ), + Test( + "float decimal literal invalid digits", + b"0 + .3foo", + Expr::Int(0), + Some("invalid digits"), + ), + Test( + "float scientific notation literal invalid digits", + b"0 + 3e0faa", + Expr::Int(0), + Some("invalid digits"), + ), + Test( + // odd that error is unsupportedOperation, but it does fail. + "decimal literal int 2 leading signs", + b"0 + --9", + Expr::Int(0), + Some("UnsupportedOperation"), + ), + //Test( + // ". should not be taken as float", + // b"abc + .foo", + // Expr::String("..".into()), + // None, + //), + ]; + + for test in tests { + test_int(test.0, test.1, test.2, test.3); + } +} +#[ignore] +#[test] +fn test_parse_any() { + let test = b"1..10"; + let engine_state = EngineState::new(); + let mut working_set = StateWorkingSet::new(&engine_state); + + let (block, err) = parse(&mut working_set, None, test, true, &[]); + + match (block, err) { + (_, Some(e)) => { + println!("test: {test:?}, error: {e:#?}"); + } + (b, None) => { + println!("test: {test:?}, parse: {b:#?}"); + } + } +} #[test] pub fn parse_int() { let engine_state = EngineState::new(); diff --git a/crates/nu-parser/tests/test_parser_unicode_escapes.rs b/crates/nu-parser/tests/test_parser_unicode_escapes.rs index f330c11473..50d4e10c21 100644 --- a/crates/nu-parser/tests/test_parser_unicode_escapes.rs +++ b/crates/nu-parser/tests/test_parser_unicode_escapes.rs @@ -74,12 +74,12 @@ pub fn unicode_escapes_in_strings_expected_failures() { // template: Tc(br#"""", "") //deprecated Tc(br#""\u06e""#, "any shape"), // 4digit too short, next char is EOF //deprecatedTc(br#""\u06ex""#, "any shape"), // 4digit too short, next char is non-hex-digit - Tc(br#""hello \u{6e""#, "any shape"), // extended, missing close delim + Tc(br#""hello \u{6e""#, "missing '}'"), // extended, missing close delim Tc( br#""\u{39}8\u{000000000000000000000000000000000000000000000037}""#, - "any shape", + "must be 1-6 hex digits", ), // hex too long, but small value - Tc(br#""\u{110000}""#, "any shape"), // max unicode <= 0x10ffff + Tc(br#""\u{110000}""#, "max value 10FFF"), // max unicode <= 0x10ffff ]; for tci in test_vec {