From d0e0701a88296e17067498c562255c3ea31f29c1 Mon Sep 17 00:00:00 2001
From: Mango The Fourth <40720523+MangoIV@users.noreply.github.com>
Date: Wed, 22 Feb 2023 13:59:47 +0100
Subject: [PATCH] fix: fix lexing of comments, such that a#b becomes a coherent
 Item (#8151)

# Description

Previously `nix run nixpkgs#hello` was lexed as `Item, Item, Item,
Comment`, however, `#hello` is *not* supposed to be a comment here and
should be parsed as part of the third `Item`.

This change introduces this behavior by not interrupting the parse of
the current token upon seeing a `#`.

Thank you so much for considering this, I think many `nix` users will be
grateful for this change and I think this will lead to more adaptation
in the ecosystem.

- closes #8137 and #6335

# User-Facing Changes

- code like `somecode# bla` and `somecode#bla` will not be parsed as
`somecode, comment` but as `somecode#bla`, hence this is a breaking
change for all users who didn't put a space before a comment introducing
token (`#`)

# Tests + Formatting

I've added tests that cover this behavior in `test_lex.rs`

- [x] `cargo fmt --all -- --check` to check standard code formatting
(`cargo fmt --all` applies these changes)
- [x] `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A
clippy::needless_collect` to check that you're using the standard code
style
- [x] `cargo test --workspace` to check that all tests pass

# After Submitting

> If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.

I think this is expected behavior in most other shells, so the
documentation was lacking for not documenting the unexpected behavior
before and hence now is automatically more complete >D
---
 crates/nu-parser/src/lex.rs        |  1 -
 crates/nu-parser/tests/test_lex.rs | 51 ++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs
index cbb94cfaec..a9e3949b72 100644
--- a/crates/nu-parser/src/lex.rs
+++ b/crates/nu-parser/src/lex.rs
@@ -59,7 +59,6 @@ fn is_item_terminator(
             || c == b'\r'
             || c == b'|'
             || c == b';'
-            || c == b'#'
             || additional_whitespace.contains(&c)
             || special_tokens.contains(&c))
 }
diff --git a/crates/nu-parser/tests/test_lex.rs b/crates/nu-parser/tests/test_lex.rs
index e88eb3d5ce..bbbb33e4af 100644
--- a/crates/nu-parser/tests/test_lex.rs
+++ b/crates/nu-parser/tests/test_lex.rs
@@ -93,6 +93,57 @@ fn lex_incomplete_quote() {
     assert!(matches!(err, ParseError::UnexpectedEof(v, _) if v == "'"));
 }
 
+#[test]
+fn lex_comments_no_space() {
+    // test for parses that contain tokens that normally introduce comments
+    // Code:
+    // let z = 42 #the comment
+    // let x#y = 69 #hello
+    // let flk = nixpkgs#hello #hello
+    let file = b"let z = 42 #the comment \n let x#y = 69 #hello \n let flk = nixpkgs#hello #hello";
+    let output = lex(file, 0, &[], &[], false);
+
+    assert_eq!(
+        output.0.get(4).unwrap(),
+        &Token {
+            contents: TokenContents::Comment,
+            span: Span::new(11, 24)
+        }
+    );
+
+    assert_eq!(
+        output.0.get(7).unwrap(),
+        &Token {
+            contents: TokenContents::Item,
+            span: Span::new(30, 33)
+        }
+    );
+
+    assert_eq!(
+        output.0.get(10).unwrap(),
+        &Token {
+            contents: TokenContents::Comment,
+            span: Span::new(39, 46)
+        }
+    );
+
+    assert_eq!(
+        output.0.get(15).unwrap(),
+        &Token {
+            contents: TokenContents::Item,
+            span: Span::new(58, 71)
+        }
+    );
+
+    assert_eq!(
+        output.0.get(16).unwrap(),
+        &Token {
+            contents: TokenContents::Comment,
+            span: Span::new(72, 78)
+        }
+    );
+}
+
 #[test]
 fn lex_comments() {
     // Comments should keep the end of line token