From d0e0701a88296e17067498c562255c3ea31f29c1 Mon Sep 17 00:00:00 2001 From: Mango The Fourth <40720523+MangoIV@users.noreply.github.com> Date: Wed, 22 Feb 2023 13:59:47 +0100 Subject: [PATCH] fix: fix lexing of comments, such that a#b becomes a coherent Item (#8151) # Description Previously `nix run nixpkgs#hello` was lexed as `Item, Item, Item, Comment`, however, `#hello` is *not* supposed to be a comment here and should be parsed as part of the third `Item`. This change introduces this behavior by not interrupting the parse of the current token upon seeing a `#`. Thank you so much for considering this, I think many `nix` users will be grateful for this change and I think this will lead to more adaptation in the ecosystem. - closes #8137 and #6335 # User-Facing Changes - code like `somecode# bla` and `somecode#bla` will not be parsed as `somecode, comment` but as `somecode#bla`, hence this is a breaking change for all users who didn't put a space before a comment introducing token (`#`) # Tests + Formatting I've added tests that cover this behavior in `test_lex.rs` - [x] `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - [x] `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A clippy::needless_collect` to check that you're using the standard code style - [x] `cargo test --workspace` to check that all tests pass # After Submitting > If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. I think this is expected behavior in most other shells, so the documentation was lacking for not documenting the unexpected behavior before and hence now is automatically more complete >D --- crates/nu-parser/src/lex.rs | 1 - crates/nu-parser/tests/test_lex.rs | 51 ++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs index cbb94cfaec..a9e3949b72 100644 --- a/crates/nu-parser/src/lex.rs +++ b/crates/nu-parser/src/lex.rs @@ -59,7 +59,6 @@ fn is_item_terminator( || c == b'\r' || c == b'|' || c == b';' - || c == b'#' || additional_whitespace.contains(&c) || special_tokens.contains(&c)) } diff --git a/crates/nu-parser/tests/test_lex.rs b/crates/nu-parser/tests/test_lex.rs index e88eb3d5ce..bbbb33e4af 100644 --- a/crates/nu-parser/tests/test_lex.rs +++ b/crates/nu-parser/tests/test_lex.rs @@ -93,6 +93,57 @@ fn lex_incomplete_quote() { assert!(matches!(err, ParseError::UnexpectedEof(v, _) if v == "'")); } +#[test] +fn lex_comments_no_space() { + // test for parses that contain tokens that normally introduce comments + // Code: + // let z = 42 #the comment + // let x#y = 69 #hello + // let flk = nixpkgs#hello #hello + let file = b"let z = 42 #the comment \n let x#y = 69 #hello \n let flk = nixpkgs#hello #hello"; + let output = lex(file, 0, &[], &[], false); + + assert_eq!( + output.0.get(4).unwrap(), + &Token { + contents: TokenContents::Comment, + span: Span::new(11, 24) + } + ); + + assert_eq!( + output.0.get(7).unwrap(), + &Token { + contents: TokenContents::Item, + span: Span::new(30, 33) + } + ); + + assert_eq!( + output.0.get(10).unwrap(), + &Token { + contents: TokenContents::Comment, + span: Span::new(39, 46) + } + ); + + assert_eq!( + output.0.get(15).unwrap(), + &Token { + contents: TokenContents::Item, + span: Span::new(58, 71) + } + ); + + assert_eq!( + output.0.get(16).unwrap(), + &Token { + contents: TokenContents::Comment, + span: Span::new(72, 78) + } + ); +} + #[test] fn lex_comments() { // Comments should keep the end of line token