From ca224775119e06efd10d48cdce437dfb9526a57a Mon Sep 17 00:00:00 2001
From: Kira <k@cyra.pink>
Date: Fri, 19 Jul 2024 23:32:42 +0200
Subject: [PATCH 1/8] draft fix for datetimes in record values

---
 crates/nu-parser/src/lex.rs    | 44 ++++++++++++++++++++++++++++++++++
 crates/nu-parser/src/parser.rs |  5 ++--
 2 files changed, 47 insertions(+), 2 deletions(-)
diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs
index bad9e94a10..d7b317310b 100644
--- a/crates/nu-parser/src/lex.rs
+++ b/crates/nu-parser/src/lex.rs
@@ -454,6 +454,26 @@ pub fn lex_signature(
         special_tokens,
         skip_comment,
         true,
+        false,
+    )
+}
+
+// temporary name because i cant decide on a better one
+pub fn lex_but_ignore_specials_after_special(
+    input: &[u8],
+    span_offset: usize,
+    additional_whitespace: &[u8],
+    special_tokens: &[u8],
+    skip_comment: bool,
+) -> (Vec<Token>, Option<ParseError>) {
+    lex_internal(
+        input,
+        span_offset,
+        additional_whitespace,
+        special_tokens,
+        skip_comment,
+        false,
+        true,
     )
 }
 
@@ -471,6 +491,7 @@ pub fn lex(
         special_tokens,
         skip_comment,
         false,
+        false,
     )
 }
 
@@ -482,7 +503,12 @@ fn lex_internal(
     skip_comment: bool,
     // within signatures we want to treat `<` and `>` specially
     in_signature: bool,
+    // after lexing a special item, disable special items when lexing the next item.
+    // necessary because colons are special in records, but datetime literals may contain colons
+    ignore_specials_after_special: bool,
 ) -> (Vec<Token>, Option<ParseError>) {
+    let mut specials_disabled = false;
+
     let mut error = None;
 
     let mut curr_offset = 0;
@@ -612,7 +638,22 @@ fn lex_internal(
         } else if c == b' ' || c == b'\t' || additional_whitespace.contains(&c) {
             // If the next character is non-newline whitespace, skip it.
             curr_offset += 1;
+        } else if ignore_specials_after_special && !specials_disabled && special_tokens.contains(&c)
+        {
+            // If disabling special items but if they're not currently disabled, handle a special item
+            // character right here, bypassing lex_item
+            output.push(Token::new(
+                TokenContents::Item,
+                Span::new(span_offset + curr_offset, span_offset + curr_offset + 1),
+            ));
+            curr_offset += 1;
+            specials_disabled = true;
         } else {
+            let special_tokens = if specials_disabled {
+                &[]
+            } else {
+                special_tokens
+            };
             let (token, err) = lex_item(
                 input,
                 &mut curr_offset,
@@ -625,6 +666,9 @@ fn lex_internal(
                 error = err;
             }
             is_complete = true;
+            if token.contents == TokenContents::Item {
+                specials_disabled = false;
+            }
             output.push(token);
         }
     }
diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs
index 8332d2f383..18c6367390 100644
--- a/crates/nu-parser/src/parser.rs
+++ b/crates/nu-parser/src/parser.rs
@@ -1,5 +1,5 @@
 use crate::{
-    lex::{is_assignment_operator, lex, lex_signature},
+    lex::{is_assignment_operator, lex, lex_but_ignore_specials_after_special, lex_signature},
     lite_parser::{lite_parse, LiteCommand, LitePipeline, LiteRedirection, LiteRedirectionTarget},
     parse_keywords::*,
     parse_patterns::parse_pattern,
@@ -5601,7 +5601,8 @@ pub fn parse_record(working_set: &mut StateWorkingSet, span: Span) -> Expression
     let inner_span = Span::new(start, end);
     let source = working_set.get_span_contents(inner_span);
 
-    let (tokens, err) = lex(source, start, &[b'\n', b'\r', b','], &[b':'], true);
+    let (tokens, err) =
+        lex_but_ignore_specials_after_special(source, start, &[b'\n', b'\r', b','], &[b':'], true);
     if let Some(err) = err {
         working_set.error(err);
     }

From 671ddb5a2f69f856d34eeb62888d2ed70334d513 Mon Sep 17 00:00:00 2001
From: Kira <k@cyra.pink>
Date: Sun, 21 Jul 2024 15:25:38 +0200
Subject: [PATCH 2/8] slightly better names

---
 crates/nu-parser/src/lex.rs    | 8 +++-----
 crates/nu-parser/src/parser.rs | 4 ++--
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs
index d7b317310b..b7c8ccc118 100644
--- a/crates/nu-parser/src/lex.rs
+++ b/crates/nu-parser/src/lex.rs
@@ -458,8 +458,7 @@ pub fn lex_signature(
     )
 }
 
-// temporary name because i cant decide on a better one
-pub fn lex_but_ignore_specials_after_special(
+pub fn lex_alternating_special_tokens(
     input: &[u8],
     span_offset: usize,
     additional_whitespace: &[u8],
@@ -505,7 +504,7 @@ fn lex_internal(
     in_signature: bool,
     // after lexing a special item, disable special items when lexing the next item.
     // necessary because colons are special in records, but datetime literals may contain colons
-    ignore_specials_after_special: bool,
+    alternate_specials: bool,
 ) -> (Vec<Token>, Option<ParseError>) {
     let mut specials_disabled = false;
 
@@ -638,8 +637,7 @@ fn lex_internal(
         } else if c == b' ' || c == b'\t' || additional_whitespace.contains(&c) {
             // If the next character is non-newline whitespace, skip it.
             curr_offset += 1;
-        } else if ignore_specials_after_special && !specials_disabled && special_tokens.contains(&c)
-        {
+        } else if alternate_specials && !specials_disabled && special_tokens.contains(&c) {
             // If disabling special items but if they're not currently disabled, handle a special item
             // character right here, bypassing lex_item
             output.push(Token::new(
diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs
index 18c6367390..ca6dc7dffd 100644
--- a/crates/nu-parser/src/parser.rs
+++ b/crates/nu-parser/src/parser.rs
@@ -1,5 +1,5 @@
 use crate::{
-    lex::{is_assignment_operator, lex, lex_but_ignore_specials_after_special, lex_signature},
+    lex::{is_assignment_operator, lex, lex_alternating_special_tokens, lex_signature},
     lite_parser::{lite_parse, LiteCommand, LitePipeline, LiteRedirection, LiteRedirectionTarget},
     parse_keywords::*,
     parse_patterns::parse_pattern,
@@ -5602,7 +5602,7 @@ pub fn parse_record(working_set: &mut StateWorkingSet, span: Span) -> Expression
     let source = working_set.get_span_contents(inner_span);
 
     let (tokens, err) =
-        lex_but_ignore_specials_after_special(source, start, &[b'\n', b'\r', b','], &[b':'], true);
+        lex_alternating_special_tokens(source, start, &[b'\n', b'\r', b','], &[b':'], true);
     if let Some(err) = err {
         working_set.error(err);
     }

From b62d9119be5be25824fe494dcf638357543c5e49 Mon Sep 17 00:00:00 2001
From: Kira <k@cyra.pink>
Date: Sun, 21 Jul 2024 23:25:22 +0200
Subject: [PATCH 3/8] Explicitly reject record value position bare words
 containing colons

---
 crates/nu-parser/src/parser.rs | 45 ++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs
index ca6dc7dffd..1207088ecb 100644
--- a/crates/nu-parser/src/parser.rs
+++ b/crates/nu-parser/src/parser.rs
@@ -5694,6 +5694,51 @@ pub fn parse_record(working_set: &mut StateWorkingSet, span: Span) -> Expression
             let value = parse_value(working_set, tokens[idx].span, &SyntaxShape::Any);
             idx += 1;
 
+            let bareword_error = |string_value: &Expression| {
+                let string_span = working_set.get_span_contents(string_value.span);
+                let colon_position = string_span
+                    .iter()
+                    .find_position(|b| **b == b':')
+                    .map(|(i, _)| string_value.span.start + i);
+                if let Some(colon_position) = colon_position {
+                    Some(ParseError::InvalidLiteral(
+                        "colon".to_string(),
+                        "bare word specifying record value".to_string(),
+                        Span::new(colon_position, colon_position + 1),
+                    ))
+                } else {
+                    None
+                }
+            };
+            let value_span = working_set.get_span_contents(value.span);
+            let parse_error = match value.expr {
+                Expr::String(_) => {
+                    if ![b'"', b'\'', b'`'].contains(&value_span[0]) {
+                        bareword_error(&value)
+                    } else {
+                        None
+                    }
+                }
+                Expr::StringInterpolation(ref expressions) => {
+                    if value_span[0] != b'$' {
+                        expressions
+                            .iter()
+                            .filter(|expr| matches!(expr.expr, Expr::String(_)))
+                            .filter_map(bareword_error)
+                            .next()
+                    } else {
+                        None
+                    }
+                }
+                _ => None,
+            };
+            let value = if let Some(parse_error) = parse_error {
+                working_set.error(parse_error);
+                garbage(working_set, value.span)
+            } else {
+                value
+            };
+
             if let Some(field) = field.as_string() {
                 if let Some(fields) = &mut field_types {
                     fields.push((field, value.ty.clone()));

From 26c4bbfa41d9fd5f45d5f2f99bf409415ee8b15c Mon Sep 17 00:00:00 2001
From: Kira <k@cyra.pink>
Date: Sun, 21 Jul 2024 23:48:05 +0200
Subject: [PATCH 4/8] oh thanks clippy

---
 crates/nu-parser/src/parser.rs | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs
index 1207088ecb..380d8744f8 100644
--- a/crates/nu-parser/src/parser.rs
+++ b/crates/nu-parser/src/parser.rs
@@ -5695,20 +5695,18 @@ pub fn parse_record(working_set: &mut StateWorkingSet, span: Span) -> Expression
             idx += 1;
 
             let bareword_error = |string_value: &Expression| {
-                let string_span = working_set.get_span_contents(string_value.span);
-                let colon_position = string_span
+                working_set
+                    .get_span_contents(string_value.span)
                     .iter()
                     .find_position(|b| **b == b':')
-                    .map(|(i, _)| string_value.span.start + i);
-                if let Some(colon_position) = colon_position {
-                    Some(ParseError::InvalidLiteral(
-                        "colon".to_string(),
-                        "bare word specifying record value".to_string(),
-                        Span::new(colon_position, colon_position + 1),
-                    ))
-                } else {
-                    None
-                }
+                    .map(|(i, _)| {
+                        let colon_position = i + string_value.span.start;
+                        ParseError::InvalidLiteral(
+                            "colon".to_string(),
+                            "bare word specifying record value".to_string(),
+                            Span::new(colon_position, colon_position + 1),
+                        )
+                    })
             };
             let value_span = working_set.get_span_contents(value.span);
             let parse_error = match value.expr {

From 4da348f7ca9b6eca0fca55f515985b8da09ef578 Mon Sep 17 00:00:00 2001
From: Kira <k@cyra.pink>
Date: Tue, 23 Jul 2024 02:37:17 +0200
Subject: [PATCH 5/8] new attempt

---
 crates/nu-parser/src/lex.rs    | 159 ++++++++++++++++++---------------
 crates/nu-parser/src/parser.rs | 116 ++++++++++++++----------
 2 files changed, 155 insertions(+), 120 deletions(-)

diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs
index b7c8ccc118..628b84414a 100644
--- a/crates/nu-parser/src/lex.rs
+++ b/crates/nu-parser/src/lex.rs
@@ -447,33 +447,58 @@ pub fn lex_signature(
     special_tokens: &[u8],
     skip_comment: bool,
 ) -> (Vec<Token>, Option<ParseError>) {
-    lex_internal(
+    let mut state = LexState {
         input,
+        output: Vec::new(),
+        error: None,
         span_offset,
+    };
+    lex_internal(
+        &mut state,
         additional_whitespace,
         special_tokens,
         skip_comment,
         true,
-        false,
-    )
+        None,
+    );
+    (state.output, state.error)
 }
 
-pub fn lex_alternating_special_tokens(
-    input: &[u8],
-    span_offset: usize,
+pub struct LexState<'a> {
+    pub input: &'a [u8],
+    pub output: Vec<Token>,
+    pub error: Option<ParseError>,
+    pub span_offset: usize,
+}
+
+// Lex until the output is max_tokens longer than before the call, or until the input is exhausted.
+// The behaviour here is non-obvious (maybe non-useful) iff your additional_whitespace doesn't include newline:
+// If you pass `output` in a state where the last token is an Eol, this might *remove* tokens.
+pub fn lex_n_tokens(
+    state: &mut LexState,
     additional_whitespace: &[u8],
     special_tokens: &[u8],
     skip_comment: bool,
-) -> (Vec<Token>, Option<ParseError>) {
+    max_tokens: usize,
+) -> isize {
+    let n_tokens = state.output.len();
     lex_internal(
-        input,
-        span_offset,
+        state,
         additional_whitespace,
         special_tokens,
         skip_comment,
         false,
-        true,
-    )
+        Some(max_tokens),
+    );
+    // If this lex_internal call reached the end of the input, there may now be fewer tokens
+    // in the output than before.
+    let tokens_n_diff = (state.output.len() as isize) - (n_tokens as isize);
+    let next_offset = state.output.last().map(|token| token.span.end);
+    if let Some(next_offset) = next_offset {
+        state.input = &state.input[next_offset - state.span_offset..];
+        state.span_offset = next_offset;
+    }
+    tokens_n_diff
 }
 
 pub fn lex(
@@ -483,39 +508,43 @@ pub fn lex(
     special_tokens: &[u8],
     skip_comment: bool,
 ) -> (Vec<Token>, Option<ParseError>) {
-    lex_internal(
+    let mut state = LexState {
         input,
+        output: Vec::new(),
+        error: None,
         span_offset,
+    };
+    lex_internal(
+        &mut state,
         additional_whitespace,
         special_tokens,
         skip_comment,
         false,
-        false,
-    )
+        None,
+    );
+    (state.output, state.error)
 }
 
 fn lex_internal(
-    input: &[u8],
-    span_offset: usize,
+    state: &mut LexState,
     additional_whitespace: &[u8],
     special_tokens: &[u8],
     skip_comment: bool,
     // within signatures we want to treat `<` and `>` specially
     in_signature: bool,
-    // after lexing a special item, disable special items when lexing the next item.
-    // necessary because colons are special in records, but datetime literals may contain colons
-    alternate_specials: bool,
-) -> (Vec<Token>, Option<ParseError>) {
-    let mut specials_disabled = false;
-
-    let mut error = None;
+    max_tokens: Option<usize>,
+) {
+    let initial_output_len = state.output.len();
 
     let mut curr_offset = 0;
 
-    let mut output = vec![];
     let mut is_complete = true;
-
-    while let Some(c) = input.get(curr_offset) {
+    while let Some(c) = state.input.get(curr_offset) {
+        if max_tokens
+            .is_some_and(|max_tokens| state.output.len() >= initial_output_len + max_tokens)
+        {
+            break;
+        }
         let c = *c;
         if c == b'|' {
             // If the next character is `|`, it's either `|` or `||`.
@@ -524,13 +553,13 @@ fn lex_internal(
             curr_offset += 1;
 
             // If the next character is `|`, we're looking at a `||`.
-            if let Some(c) = input.get(curr_offset) {
+            if let Some(c) = state.input.get(curr_offset) {
                 if *c == b'|' {
                     let idx = curr_offset;
                     curr_offset += 1;
-                    output.push(Token::new(
+                    state.output.push(Token::new(
                         TokenContents::PipePipe,
-                        Span::new(span_offset + prev_idx, span_offset + idx + 1),
+                        Span::new(state.span_offset + prev_idx, state.span_offset + idx + 1),
                     ));
                     continue;
                 }
@@ -540,12 +569,12 @@ fn lex_internal(
 
             // Before we push, check to see if the previous character was a newline.
             // If so, then this is a continuation of the previous line
-            if let Some(prev) = output.last_mut() {
+            if let Some(prev) = state.output.last_mut() {
                 match prev.contents {
                     TokenContents::Eol => {
                         *prev = Token::new(
                             TokenContents::Pipe,
-                            Span::new(span_offset + idx, span_offset + idx + 1),
+                            Span::new(state.span_offset + idx, state.span_offset + idx + 1),
                         );
                         // And this is a continuation of the previous line if previous line is a
                         // comment line (combined with EOL + Comment)
@@ -553,12 +582,12 @@ fn lex_internal(
                         // Initially, the last one token is TokenContents::Pipe, we don't need to
                         // check it, so the beginning offset is 2.
                         let mut offset = 2;
-                        while output.len() > offset {
-                            let index = output.len() - offset;
-                            if output[index].contents == TokenContents::Comment
-                                && output[index - 1].contents == TokenContents::Eol
+                        while state.output.len() > offset {
+                            let index = state.output.len() - offset;
+                            if state.output[index].contents == TokenContents::Comment
+                                && state.output[index - 1].contents == TokenContents::Eol
                             {
-                                output.remove(index - 1);
+                                state.output.remove(index - 1);
                                 offset += 1;
                             } else {
                                 break;
@@ -566,16 +595,16 @@ fn lex_internal(
                         }
                     }
                     _ => {
-                        output.push(Token::new(
+                        state.output.push(Token::new(
                             TokenContents::Pipe,
-                            Span::new(span_offset + idx, span_offset + idx + 1),
+                            Span::new(state.span_offset + idx, state.span_offset + idx + 1),
                         ));
                     }
                 }
             } else {
-                output.push(Token::new(
+                state.output.push(Token::new(
                     TokenContents::Pipe,
-                    Span::new(span_offset + idx, span_offset + idx + 1),
+                    Span::new(state.span_offset + idx, state.span_offset + idx + 1),
                 ));
             }
 
@@ -583,17 +612,17 @@ fn lex_internal(
         } else if c == b';' {
             // If the next character is a `;`, we're looking at a semicolon token.
 
-            if !is_complete && error.is_none() {
-                error = Some(ParseError::ExtraTokens(Span::new(
+            if !is_complete && state.error.is_none() {
+                state.error = Some(ParseError::ExtraTokens(Span::new(
                     curr_offset,
                     curr_offset + 1,
                 )));
             }
             let idx = curr_offset;
             curr_offset += 1;
-            output.push(Token::new(
+            state.output.push(Token::new(
                 TokenContents::Semicolon,
-                Span::new(span_offset + idx, span_offset + idx + 1),
+                Span::new(state.span_offset + idx, state.span_offset + idx + 1),
             ));
         } else if c == b'\r' {
             // Ignore a stand-alone carriage return
@@ -603,9 +632,9 @@ fn lex_internal(
             let idx = curr_offset;
             curr_offset += 1;
             if !additional_whitespace.contains(&c) {
-                output.push(Token::new(
+                state.output.push(Token::new(
                     TokenContents::Eol,
-                    Span::new(span_offset + idx, span_offset + idx + 1),
+                    Span::new(state.span_offset + idx, state.span_offset + idx + 1),
                 ));
             }
         } else if c == b'#' {
@@ -613,12 +642,12 @@ fn lex_internal(
             // comment. The comment continues until the next newline.
             let mut start = curr_offset;
 
-            while let Some(input) = input.get(curr_offset) {
+            while let Some(input) = state.input.get(curr_offset) {
                 if *input == b'\n' {
                     if !skip_comment {
-                        output.push(Token::new(
+                        state.output.push(Token::new(
                             TokenContents::Comment,
-                            Span::new(span_offset + start, span_offset + curr_offset),
+                            Span::new(state.span_offset + start, state.span_offset + curr_offset),
                         ));
                     }
                     start = curr_offset;
@@ -629,48 +658,30 @@ fn lex_internal(
                 }
             }
             if start != curr_offset && !skip_comment {
-                output.push(Token::new(
+                state.output.push(Token::new(
                     TokenContents::Comment,
-                    Span::new(span_offset + start, span_offset + curr_offset),
+                    Span::new(state.span_offset + start, state.span_offset + curr_offset),
                 ));
             }
         } else if c == b' ' || c == b'\t' || additional_whitespace.contains(&c) {
             // If the next character is non-newline whitespace, skip it.
             curr_offset += 1;
-        } else if alternate_specials && !specials_disabled && special_tokens.contains(&c) {
-            // If disabling special items but if they're not currently disabled, handle a special item
-            // character right here, bypassing lex_item
-            output.push(Token::new(
-                TokenContents::Item,
-                Span::new(span_offset + curr_offset, span_offset + curr_offset + 1),
-            ));
-            curr_offset += 1;
-            specials_disabled = true;
         } else {
-            let special_tokens = if specials_disabled {
-                &[]
-            } else {
-                special_tokens
-            };
             let (token, err) = lex_item(
-                input,
+                state.input,
                 &mut curr_offset,
-                span_offset,
+                state.span_offset,
                 additional_whitespace,
                 special_tokens,
                 in_signature,
             );
-            if error.is_none() {
-                error = err;
+            if state.error.is_none() {
+                state.error = err;
             }
             is_complete = true;
-            if token.contents == TokenContents::Item {
-                specials_disabled = false;
-            }
-            output.push(token);
+            state.output.push(token);
         }
     }
-    (output, error)
 }
 
 /// True if this the start of a redirection. Does not match `>>` or `>|` forms.
diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs
index 380d8744f8..3350364b1d 100644
--- a/crates/nu-parser/src/parser.rs
+++ b/crates/nu-parser/src/parser.rs
@@ -1,5 +1,5 @@
 use crate::{
-    lex::{is_assignment_operator, lex, lex_alternating_special_tokens, lex_signature},
+    lex::{is_assignment_operator, lex, lex_n_tokens, lex_signature, LexState},
     lite_parser::{lite_parse, LiteCommand, LitePipeline, LiteRedirection, LiteRedirectionTarget},
     parse_keywords::*,
     parse_patterns::parse_pattern,
@@ -5599,10 +5599,32 @@ pub fn parse_record(working_set: &mut StateWorkingSet, span: Span) -> Expression
     }
 
     let inner_span = Span::new(start, end);
-    let source = working_set.get_span_contents(inner_span);
 
-    let (tokens, err) =
-        lex_alternating_special_tokens(source, start, &[b'\n', b'\r', b','], &[b':'], true);
+    let mut lex_state = LexState {
+        input: working_set.get_span_contents(inner_span),
+        output: Vec::new(),
+        error: None,
+        span_offset: start,
+    };
+    let mut lex_n = |additional_whitespace, special_tokens, max_tokens| {
+        lex_n_tokens(
+            &mut lex_state,
+            additional_whitespace,
+            special_tokens,
+            true,
+            max_tokens,
+        )
+    };
+    loop {
+        if lex_n(&[b'\n', b'\r', b','], &[b':'], 2) < 2 {
+            break;
+        };
+        if lex_n(&[b'\n', b'\r', b','], &[], 1) < 1 {
+            break;
+        };
+    }
+    let (tokens, err) = (lex_state.output, lex_state.error);
+
     if let Some(err) = err {
         working_set.error(err);
     }
@@ -5694,48 +5716,50 @@ pub fn parse_record(working_set: &mut StateWorkingSet, span: Span) -> Expression
             let value = parse_value(working_set, tokens[idx].span, &SyntaxShape::Any);
             idx += 1;
 
-            let bareword_error = |string_value: &Expression| {
-                working_set
-                    .get_span_contents(string_value.span)
-                    .iter()
-                    .find_position(|b| **b == b':')
-                    .map(|(i, _)| {
-                        let colon_position = i + string_value.span.start;
-                        ParseError::InvalidLiteral(
-                            "colon".to_string(),
-                            "bare word specifying record value".to_string(),
-                            Span::new(colon_position, colon_position + 1),
-                        )
-                    })
-            };
-            let value_span = working_set.get_span_contents(value.span);
-            let parse_error = match value.expr {
-                Expr::String(_) => {
-                    if ![b'"', b'\'', b'`'].contains(&value_span[0]) {
-                        bareword_error(&value)
-                    } else {
-                        None
-                    }
-                }
-                Expr::StringInterpolation(ref expressions) => {
-                    if value_span[0] != b'$' {
-                        expressions
-                            .iter()
-                            .filter(|expr| matches!(expr.expr, Expr::String(_)))
-                            .filter_map(bareword_error)
-                            .next()
-                    } else {
-                        None
-                    }
-                }
-                _ => None,
-            };
-            let value = if let Some(parse_error) = parse_error {
-                working_set.error(parse_error);
-                garbage(working_set, value.span)
-            } else {
-                value
-            };
+            // Disallow colons in bare word values
+
+            // let bareword_error = |string_value: &Expression| {
+            //     working_set
+            //         .get_span_contents(string_value.span)
+            //         .iter()
+            //         .find_position(|b| **b == b':')
+            //         .map(|(i, _)| {
+            //             let colon_position = i + string_value.span.start;
+            //             ParseError::InvalidLiteral(
+            //                 "colon".to_string(),
+            //                 "bare word specifying record value".to_string(),
+            //                 Span::new(colon_position, colon_position + 1),
+            //             )
+            //         })
+            // };
+            // let value_span = working_set.get_span_contents(value.span);
+            // let parse_error = match value.expr {
+            //     Expr::String(_) => {
+            //         if ![b'"', b'\'', b'`'].contains(&value_span[0]) {
+            //             bareword_error(&value)
+            //         } else {
+            //             None
+            //         }
+            //     }
+            //     Expr::StringInterpolation(ref expressions) => {
+            //         if value_span[0] != b'$' {
+            //             expressions
+            //                 .iter()
+            //                 .filter(|expr| matches!(expr.expr, Expr::String(_)))
+            //                 .filter_map(bareword_error)
+            //                 .next()
+            //         } else {
+            //             None
+            //         }
+            //     }
+            //     _ => None,
+            // };
+            // let value = if let Some(parse_error) = parse_error {
+            //     working_set.error(parse_error);
+            //     garbage(working_set, value.span)
+            // } else {
+            //     value
+            // };
 
             if let Some(field) = field.as_string() {
                 if let Some(fields) = &mut field_types {

From cc980115fca1df6f9caca008d9eb89cd4107ca9b Mon Sep 17 00:00:00 2001
From: Kira <k@cyra.pink>
Date: Tue, 23 Jul 2024 18:21:40 +0200
Subject: [PATCH 6/8] reject non-item-tokens and bare words containing colons
 as record keys and values

---
 crates/nu-parser/src/parser.rs | 127 +++++++++++++++++++++------------
 1 file changed, 80 insertions(+), 47 deletions(-)

diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs
index 3350364b1d..92db4205c6 100644
--- a/crates/nu-parser/src/parser.rs
+++ b/crates/nu-parser/src/parser.rs
@@ -5579,6 +5579,49 @@ pub fn parse_builtin_commands(
     }
 }
 
+fn check_record_key_or_value(
+    working_set: &StateWorkingSet,
+    expr: &Expression,
+    position: &str,
+) -> Option<ParseError> {
+    let bareword_error = |string_value: &Expression| {
+        working_set
+            .get_span_contents(string_value.span)
+            .iter()
+            .find_position(|b| **b == b':')
+            .map(|(i, _)| {
+                let colon_position = i + string_value.span.start;
+                ParseError::InvalidLiteral(
+                    "colon".to_string(),
+                    format!("bare word specifying record {}", position),
+                    Span::new(colon_position, colon_position + 1),
+                )
+            })
+    };
+    let value_span = working_set.get_span_contents(expr.span);
+    match expr.expr {
+        Expr::String(_) => {
+            if ![b'"', b'\'', b'`'].contains(&value_span[0]) {
+                bareword_error(expr)
+            } else {
+                None
+            }
+        }
+        Expr::StringInterpolation(ref expressions) => {
+            if value_span[0] != b'$' {
+                expressions
+                    .iter()
+                    .filter(|expr| matches!(expr.expr, Expr::String(_)))
+                    .filter_map(bareword_error)
+                    .next()
+            } else {
+                None
+            }
+        }
+        _ => None,
+    }
+}
+
 pub fn parse_record(working_set: &mut StateWorkingSet, span: Span) -> Expression {
     let bytes = working_set.get_span_contents(span);
 
@@ -5668,7 +5711,22 @@ pub fn parse_record(working_set: &mut StateWorkingSet, span: Span) -> Expression
             ));
         } else {
             // Normal key-value pair
-            let field = parse_value(working_set, curr_span, &SyntaxShape::Any);
+            let field_token = &tokens[idx];
+            let field = if field_token.contents != TokenContents::Item {
+                working_set.error(ParseError::Expected(
+                    "item in record key position",
+                    Span::new(field_token.span.start, field_token.span.end),
+                ));
+                garbage(working_set, curr_span)
+            } else {
+                let field = parse_value(working_set, curr_span, &SyntaxShape::Any);
+                if let Some(error) = check_record_key_or_value(working_set, &field, "key") {
+                    working_set.error(error);
+                    garbage(working_set, field.span)
+                } else {
+                    field
+                }
+            };
 
             idx += 1;
             if idx == tokens.len() {
@@ -5713,54 +5771,29 @@ pub fn parse_record(working_set: &mut StateWorkingSet, span: Span) -> Expression
                 ));
                 break;
             }
-            let value = parse_value(working_set, tokens[idx].span, &SyntaxShape::Any);
+
+            let value_token = &tokens[idx];
+            let value = if value_token.contents != TokenContents::Item {
+                working_set.error(ParseError::Expected(
+                    "item in record value position",
+                    Span::new(value_token.span.start, value_token.span.end),
+                ));
+                garbage(
+                    working_set,
+                    Span::new(value_token.span.start, value_token.span.end),
+                )
+            } else {
+                let value = parse_value(working_set, tokens[idx].span, &SyntaxShape::Any);
+                if let Some(parse_error) = check_record_key_or_value(&working_set, &value, "value")
+                {
+                    working_set.error(parse_error);
+                    garbage(working_set, value.span)
+                } else {
+                    value
+                }
+            };
             idx += 1;
 
-            // Disallow colons in bare word values
-
-            // let bareword_error = |string_value: &Expression| {
-            //     working_set
-            //         .get_span_contents(string_value.span)
-            //         .iter()
-            //         .find_position(|b| **b == b':')
-            //         .map(|(i, _)| {
-            //             let colon_position = i + string_value.span.start;
-            //             ParseError::InvalidLiteral(
-            //                 "colon".to_string(),
-            //                 "bare word specifying record value".to_string(),
-            //                 Span::new(colon_position, colon_position + 1),
-            //             )
-            //         })
-            // };
-            // let value_span = working_set.get_span_contents(value.span);
-            // let parse_error = match value.expr {
-            //     Expr::String(_) => {
-            //         if ![b'"', b'\'', b'`'].contains(&value_span[0]) {
-            //             bareword_error(&value)
-            //         } else {
-            //             None
-            //         }
-            //     }
-            //     Expr::StringInterpolation(ref expressions) => {
-            //         if value_span[0] != b'$' {
-            //             expressions
-            //                 .iter()
-            //                 .filter(|expr| matches!(expr.expr, Expr::String(_)))
-            //                 .filter_map(bareword_error)
-            //                 .next()
-            //         } else {
-            //             None
-            //         }
-            //     }
-            //     _ => None,
-            // };
-            // let value = if let Some(parse_error) = parse_error {
-            //     working_set.error(parse_error);
-            //     garbage(working_set, value.span)
-            // } else {
-            //     value
-            // };
-
             if let Some(field) = field.as_string() {
                 if let Some(fields) = &mut field_types {
                     fields.push((field, value.ty.clone()));

From b3bdd4ac6477959baf526c3e092fcc8a530ad315 Mon Sep 17 00:00:00 2001
From: Kira <k@cyra.pink>
Date: Wed, 24 Jul 2024 17:14:32 +0200
Subject: [PATCH 7/8] I really dont know what im doing with these tests sorry

---
 crates/nu-parser/src/lex.rs           |  1 +
 crates/nu-parser/src/lib.rs           |  2 +-
 crates/nu-parser/src/parser.rs        |  3 +-
 crates/nu-parser/tests/test_lex.rs    | 25 ++++++++++++-
 crates/nu-parser/tests/test_parser.rs | 53 +++++++++++++++++++++++++++
 5 files changed, 80 insertions(+), 4 deletions(-)

diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs
index 628b84414a..adcb239f4e 100644
--- a/crates/nu-parser/src/lex.rs
+++ b/crates/nu-parser/src/lex.rs
@@ -464,6 +464,7 @@ pub fn lex_signature(
     (state.output, state.error)
 }
 
+#[derive(Debug)]
 pub struct LexState<'a> {
     pub input: &'a [u8],
     pub output: Vec<Token>,
diff --git a/crates/nu-parser/src/lib.rs b/crates/nu-parser/src/lib.rs
index 8f871aa815..c5d69cb270 100644
--- a/crates/nu-parser/src/lib.rs
+++ b/crates/nu-parser/src/lib.rs
@@ -16,7 +16,7 @@ pub use flatten::{
     flatten_block, flatten_expression, flatten_pipeline, flatten_pipeline_element, FlatShape,
 };
 pub use known_external::KnownExternal;
-pub use lex::{lex, lex_signature, Token, TokenContents};
+pub use lex::{lex, lex_n_tokens, lex_signature, LexState, Token, TokenContents};
 pub use lite_parser::{lite_parse, LiteBlock, LiteCommand};
 pub use nu_protocol::parser_path::*;
 pub use parse_keywords::*;
diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs
index 92db4205c6..22a0f6ab64 100644
--- a/crates/nu-parser/src/parser.rs
+++ b/crates/nu-parser/src/parser.rs
@@ -5784,8 +5784,7 @@ pub fn parse_record(working_set: &mut StateWorkingSet, span: Span) -> Expression
                 )
             } else {
                 let value = parse_value(working_set, tokens[idx].span, &SyntaxShape::Any);
-                if let Some(parse_error) = check_record_key_or_value(&working_set, &value, "value")
-                {
+                if let Some(parse_error) = check_record_key_or_value(working_set, &value, "value") {
                     working_set.error(parse_error);
                     garbage(working_set, value.span)
                 } else {
diff --git a/crates/nu-parser/tests/test_lex.rs b/crates/nu-parser/tests/test_lex.rs
index 07470a310e..22fe4c4715 100644
--- a/crates/nu-parser/tests/test_lex.rs
+++ b/crates/nu-parser/tests/test_lex.rs
@@ -1,4 +1,4 @@
-use nu_parser::{lex, lex_signature, Token, TokenContents};
+use nu_parser::{lex, lex_n_tokens, lex_signature, LexState, Token, TokenContents};
 use nu_protocol::{ParseError, Span};
 
 #[test]
@@ -281,3 +281,26 @@ fn lex_comments() {
         }
     );
 }
+
+#[test]
+fn lex_manually() {
+    let file = b"'a'\n#comment\n#comment again\n| continue";
+    let mut lex_state = LexState {
+        input: file,
+        output: Vec::new(),
+        error: None,
+        span_offset: 10,
+    };
+    assert_eq!(lex_n_tokens(&mut lex_state, &[], &[], false, 1), 1);
+    assert_eq!(lex_state.output.len(), 1);
+    assert_eq!(lex_n_tokens(&mut lex_state, &[], &[], false, 5), 5);
+    assert_eq!(lex_state.output.len(), 6);
+    // Next token is the pipe.
+    // This shortens the output because it exhausts the input before it can
+    // compensate for the EOL tokens lost to the line continuation
+    assert_eq!(lex_n_tokens(&mut lex_state, &[], &[], false, 1), -1);
+    assert_eq!(lex_state.output.len(), 5);
+    assert_eq!(file.len(), lex_state.span_offset - 10);
+    let last_span = lex_state.output.last().unwrap().span;
+    assert_eq!(&file[last_span.start - 10..last_span.end - 10], b"continue");
+}
diff --git a/crates/nu-parser/tests/test_parser.rs b/crates/nu-parser/tests/test_parser.rs
index 44cd79a04a..591d783eef 100644
--- a/crates/nu-parser/tests/test_parser.rs
+++ b/crates/nu-parser/tests/test_parser.rs
@@ -2397,3 +2397,56 @@ mod operator {
         );
     }
 }
+
+mod record {
+    use super::*;
+
+    use nu_protocol::ast::RecordItem;
+
+    #[rstest]
+    #[case(b"{ :: x }", "Invalid literal")] // Key is bare colon
+    #[case(b"{ a: x:y }", "Invalid literal")] // Value is bare word with colon
+    #[case(b"{ a: x('y'):z }", "Invalid literal")] // Value is bare string interpolation with colon
+    #[case(b"{ ;: x }", "Parse mismatch during operation.")] // Key is a non-item token
+    #[case(b"{ a: || }", "Parse mismatch during operation.")] // Value is a non-item token
+    fn refuse_confusing_record(#[case] expr: &[u8], #[case] error: &str) {
+        dbg!(String::from_utf8_lossy(expr));
+        let engine_state = EngineState::new();
+        let mut working_set = StateWorkingSet::new(&engine_state);
+        parse(&mut working_set, None, expr, false);
+        assert_eq!(
+            working_set.parse_errors.first().map(|e| e.to_string()),
+            Some(error.to_string())
+        );
+    }
+
+    #[rstest]
+    #[case(b"{ a: 2024-07-23T22:54:54.532100627+02:00 b:xy }")]
+    fn parse_datetime_in_record(#[case] expr: &[u8]) {
+        dbg!(String::from_utf8_lossy(expr));
+        let engine_state = EngineState::new();
+        let mut working_set = StateWorkingSet::new(&engine_state);
+        let block = parse(&mut working_set, None, expr, false);
+        assert!(working_set.parse_errors.first().is_none());
+        let pipeline_el_expr = &block
+            .pipelines
+            .first()
+            .unwrap()
+            .elements
+            .first()
+            .unwrap()
+            .expr
+            .expr;
+        dbg!(pipeline_el_expr);
+        match pipeline_el_expr {
+            Expr::FullCellPath(v) => match &v.head.expr {
+                Expr::Record(fields) => assert!(matches!(
+                    fields[0],
+                    RecordItem::Pair(_, Expression { ty: Type::Date, .. })
+                )),
+                _ => panic!("Expected record head"),
+            },
+            _ => panic!("Expected full cell path"),
+        }
+    }
+}

From b26dd50006571b11296b5af50769d06f474a9707 Mon Sep 17 00:00:00 2001
From: Kira <k@cyra.pink>
Date: Fri, 2 Aug 2024 23:26:07 +0200
Subject: [PATCH 8/8] Add doc comment to `lex_n_tokens`

---
 crates/nu-parser/src/lex.rs | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs
index adcb239f4e..f0802fcd7a 100644
--- a/crates/nu-parser/src/lex.rs
+++ b/crates/nu-parser/src/lex.rs
@@ -472,9 +472,11 @@ pub struct LexState<'a> {
     pub span_offset: usize,
 }
 
-// Lex until the output is max_tokens longer than before the call, or until the input is exhausted.
-// The behaviour here is non-obvious (maybe non-useful) iff your additional_whitespace doesn't include newline:
-// If you pass `output` in a state where the last token is an Eol, this might *remove* tokens.
+/// Lex until the output is `max_tokens` longer than before the call, or until the input is exhausted.
+/// The return value indicates how many tokens the call added to / removed from the output.
+///
+/// The behaviour here is non-obvious when `additional_whitespace` doesn't include newline:
+/// If you pass a `state` where the last token in the output is an Eol, this might *remove* tokens.
 pub fn lex_n_tokens(
     state: &mut LexState,
     additional_whitespace: &[u8],