new attempt
This commit is contained in:
parent
26c4bbfa41
commit
4da348f7ca
|
@ -447,33 +447,58 @@ pub fn lex_signature(
|
||||||
special_tokens: &[u8],
|
special_tokens: &[u8],
|
||||||
skip_comment: bool,
|
skip_comment: bool,
|
||||||
) -> (Vec<Token>, Option<ParseError>) {
|
) -> (Vec<Token>, Option<ParseError>) {
|
||||||
lex_internal(
|
let mut state = LexState {
|
||||||
input,
|
input,
|
||||||
|
output: Vec::new(),
|
||||||
|
error: None,
|
||||||
span_offset,
|
span_offset,
|
||||||
|
};
|
||||||
|
lex_internal(
|
||||||
|
&mut state,
|
||||||
additional_whitespace,
|
additional_whitespace,
|
||||||
special_tokens,
|
special_tokens,
|
||||||
skip_comment,
|
skip_comment,
|
||||||
true,
|
true,
|
||||||
false,
|
None,
|
||||||
)
|
);
|
||||||
|
(state.output, state.error)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn lex_alternating_special_tokens(
|
pub struct LexState<'a> {
|
||||||
input: &[u8],
|
pub input: &'a [u8],
|
||||||
span_offset: usize,
|
pub output: Vec<Token>,
|
||||||
|
pub error: Option<ParseError>,
|
||||||
|
pub span_offset: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Lex until the output is max_tokens longer than before the call, or until the input is exhausted.
|
||||||
|
// The behaviour here is non-obvious (maybe non-useful) iff your additional_whitespace doesn't include newline:
|
||||||
|
// If you pass `output` in a state where the last token is an Eol, this might *remove* tokens.
|
||||||
|
pub fn lex_n_tokens(
|
||||||
|
state: &mut LexState,
|
||||||
additional_whitespace: &[u8],
|
additional_whitespace: &[u8],
|
||||||
special_tokens: &[u8],
|
special_tokens: &[u8],
|
||||||
skip_comment: bool,
|
skip_comment: bool,
|
||||||
) -> (Vec<Token>, Option<ParseError>) {
|
max_tokens: usize,
|
||||||
|
) -> isize {
|
||||||
|
let n_tokens = state.output.len();
|
||||||
lex_internal(
|
lex_internal(
|
||||||
input,
|
state,
|
||||||
span_offset,
|
|
||||||
additional_whitespace,
|
additional_whitespace,
|
||||||
special_tokens,
|
special_tokens,
|
||||||
skip_comment,
|
skip_comment,
|
||||||
false,
|
false,
|
||||||
true,
|
Some(max_tokens),
|
||||||
)
|
);
|
||||||
|
// If this lex_internal call reached the end of the input, there may now be fewer tokens
|
||||||
|
// in the output than before.
|
||||||
|
let tokens_n_diff = (state.output.len() as isize) - (n_tokens as isize);
|
||||||
|
let next_offset = state.output.last().map(|token| token.span.end);
|
||||||
|
if let Some(next_offset) = next_offset {
|
||||||
|
state.input = &state.input[next_offset - state.span_offset..];
|
||||||
|
state.span_offset = next_offset;
|
||||||
|
}
|
||||||
|
tokens_n_diff
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn lex(
|
pub fn lex(
|
||||||
|
@ -483,39 +508,43 @@ pub fn lex(
|
||||||
special_tokens: &[u8],
|
special_tokens: &[u8],
|
||||||
skip_comment: bool,
|
skip_comment: bool,
|
||||||
) -> (Vec<Token>, Option<ParseError>) {
|
) -> (Vec<Token>, Option<ParseError>) {
|
||||||
lex_internal(
|
let mut state = LexState {
|
||||||
input,
|
input,
|
||||||
|
output: Vec::new(),
|
||||||
|
error: None,
|
||||||
span_offset,
|
span_offset,
|
||||||
|
};
|
||||||
|
lex_internal(
|
||||||
|
&mut state,
|
||||||
additional_whitespace,
|
additional_whitespace,
|
||||||
special_tokens,
|
special_tokens,
|
||||||
skip_comment,
|
skip_comment,
|
||||||
false,
|
false,
|
||||||
false,
|
None,
|
||||||
)
|
);
|
||||||
|
(state.output, state.error)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn lex_internal(
|
fn lex_internal(
|
||||||
input: &[u8],
|
state: &mut LexState,
|
||||||
span_offset: usize,
|
|
||||||
additional_whitespace: &[u8],
|
additional_whitespace: &[u8],
|
||||||
special_tokens: &[u8],
|
special_tokens: &[u8],
|
||||||
skip_comment: bool,
|
skip_comment: bool,
|
||||||
// within signatures we want to treat `<` and `>` specially
|
// within signatures we want to treat `<` and `>` specially
|
||||||
in_signature: bool,
|
in_signature: bool,
|
||||||
// after lexing a special item, disable special items when lexing the next item.
|
max_tokens: Option<usize>,
|
||||||
// necessary because colons are special in records, but datetime literals may contain colons
|
) {
|
||||||
alternate_specials: bool,
|
let initial_output_len = state.output.len();
|
||||||
) -> (Vec<Token>, Option<ParseError>) {
|
|
||||||
let mut specials_disabled = false;
|
|
||||||
|
|
||||||
let mut error = None;
|
|
||||||
|
|
||||||
let mut curr_offset = 0;
|
let mut curr_offset = 0;
|
||||||
|
|
||||||
let mut output = vec![];
|
|
||||||
let mut is_complete = true;
|
let mut is_complete = true;
|
||||||
|
while let Some(c) = state.input.get(curr_offset) {
|
||||||
while let Some(c) = input.get(curr_offset) {
|
if max_tokens
|
||||||
|
.is_some_and(|max_tokens| state.output.len() >= initial_output_len + max_tokens)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
let c = *c;
|
let c = *c;
|
||||||
if c == b'|' {
|
if c == b'|' {
|
||||||
// If the next character is `|`, it's either `|` or `||`.
|
// If the next character is `|`, it's either `|` or `||`.
|
||||||
|
@ -524,13 +553,13 @@ fn lex_internal(
|
||||||
curr_offset += 1;
|
curr_offset += 1;
|
||||||
|
|
||||||
// If the next character is `|`, we're looking at a `||`.
|
// If the next character is `|`, we're looking at a `||`.
|
||||||
if let Some(c) = input.get(curr_offset) {
|
if let Some(c) = state.input.get(curr_offset) {
|
||||||
if *c == b'|' {
|
if *c == b'|' {
|
||||||
let idx = curr_offset;
|
let idx = curr_offset;
|
||||||
curr_offset += 1;
|
curr_offset += 1;
|
||||||
output.push(Token::new(
|
state.output.push(Token::new(
|
||||||
TokenContents::PipePipe,
|
TokenContents::PipePipe,
|
||||||
Span::new(span_offset + prev_idx, span_offset + idx + 1),
|
Span::new(state.span_offset + prev_idx, state.span_offset + idx + 1),
|
||||||
));
|
));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -540,12 +569,12 @@ fn lex_internal(
|
||||||
|
|
||||||
// Before we push, check to see if the previous character was a newline.
|
// Before we push, check to see if the previous character was a newline.
|
||||||
// If so, then this is a continuation of the previous line
|
// If so, then this is a continuation of the previous line
|
||||||
if let Some(prev) = output.last_mut() {
|
if let Some(prev) = state.output.last_mut() {
|
||||||
match prev.contents {
|
match prev.contents {
|
||||||
TokenContents::Eol => {
|
TokenContents::Eol => {
|
||||||
*prev = Token::new(
|
*prev = Token::new(
|
||||||
TokenContents::Pipe,
|
TokenContents::Pipe,
|
||||||
Span::new(span_offset + idx, span_offset + idx + 1),
|
Span::new(state.span_offset + idx, state.span_offset + idx + 1),
|
||||||
);
|
);
|
||||||
// And this is a continuation of the previous line if previous line is a
|
// And this is a continuation of the previous line if previous line is a
|
||||||
// comment line (combined with EOL + Comment)
|
// comment line (combined with EOL + Comment)
|
||||||
|
@ -553,12 +582,12 @@ fn lex_internal(
|
||||||
// Initially, the last one token is TokenContents::Pipe, we don't need to
|
// Initially, the last one token is TokenContents::Pipe, we don't need to
|
||||||
// check it, so the beginning offset is 2.
|
// check it, so the beginning offset is 2.
|
||||||
let mut offset = 2;
|
let mut offset = 2;
|
||||||
while output.len() > offset {
|
while state.output.len() > offset {
|
||||||
let index = output.len() - offset;
|
let index = state.output.len() - offset;
|
||||||
if output[index].contents == TokenContents::Comment
|
if state.output[index].contents == TokenContents::Comment
|
||||||
&& output[index - 1].contents == TokenContents::Eol
|
&& state.output[index - 1].contents == TokenContents::Eol
|
||||||
{
|
{
|
||||||
output.remove(index - 1);
|
state.output.remove(index - 1);
|
||||||
offset += 1;
|
offset += 1;
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
|
@ -566,16 +595,16 @@ fn lex_internal(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
output.push(Token::new(
|
state.output.push(Token::new(
|
||||||
TokenContents::Pipe,
|
TokenContents::Pipe,
|
||||||
Span::new(span_offset + idx, span_offset + idx + 1),
|
Span::new(state.span_offset + idx, state.span_offset + idx + 1),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
output.push(Token::new(
|
state.output.push(Token::new(
|
||||||
TokenContents::Pipe,
|
TokenContents::Pipe,
|
||||||
Span::new(span_offset + idx, span_offset + idx + 1),
|
Span::new(state.span_offset + idx, state.span_offset + idx + 1),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -583,17 +612,17 @@ fn lex_internal(
|
||||||
} else if c == b';' {
|
} else if c == b';' {
|
||||||
// If the next character is a `;`, we're looking at a semicolon token.
|
// If the next character is a `;`, we're looking at a semicolon token.
|
||||||
|
|
||||||
if !is_complete && error.is_none() {
|
if !is_complete && state.error.is_none() {
|
||||||
error = Some(ParseError::ExtraTokens(Span::new(
|
state.error = Some(ParseError::ExtraTokens(Span::new(
|
||||||
curr_offset,
|
curr_offset,
|
||||||
curr_offset + 1,
|
curr_offset + 1,
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
let idx = curr_offset;
|
let idx = curr_offset;
|
||||||
curr_offset += 1;
|
curr_offset += 1;
|
||||||
output.push(Token::new(
|
state.output.push(Token::new(
|
||||||
TokenContents::Semicolon,
|
TokenContents::Semicolon,
|
||||||
Span::new(span_offset + idx, span_offset + idx + 1),
|
Span::new(state.span_offset + idx, state.span_offset + idx + 1),
|
||||||
));
|
));
|
||||||
} else if c == b'\r' {
|
} else if c == b'\r' {
|
||||||
// Ignore a stand-alone carriage return
|
// Ignore a stand-alone carriage return
|
||||||
|
@ -603,9 +632,9 @@ fn lex_internal(
|
||||||
let idx = curr_offset;
|
let idx = curr_offset;
|
||||||
curr_offset += 1;
|
curr_offset += 1;
|
||||||
if !additional_whitespace.contains(&c) {
|
if !additional_whitespace.contains(&c) {
|
||||||
output.push(Token::new(
|
state.output.push(Token::new(
|
||||||
TokenContents::Eol,
|
TokenContents::Eol,
|
||||||
Span::new(span_offset + idx, span_offset + idx + 1),
|
Span::new(state.span_offset + idx, state.span_offset + idx + 1),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
} else if c == b'#' {
|
} else if c == b'#' {
|
||||||
|
@ -613,12 +642,12 @@ fn lex_internal(
|
||||||
// comment. The comment continues until the next newline.
|
// comment. The comment continues until the next newline.
|
||||||
let mut start = curr_offset;
|
let mut start = curr_offset;
|
||||||
|
|
||||||
while let Some(input) = input.get(curr_offset) {
|
while let Some(input) = state.input.get(curr_offset) {
|
||||||
if *input == b'\n' {
|
if *input == b'\n' {
|
||||||
if !skip_comment {
|
if !skip_comment {
|
||||||
output.push(Token::new(
|
state.output.push(Token::new(
|
||||||
TokenContents::Comment,
|
TokenContents::Comment,
|
||||||
Span::new(span_offset + start, span_offset + curr_offset),
|
Span::new(state.span_offset + start, state.span_offset + curr_offset),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
start = curr_offset;
|
start = curr_offset;
|
||||||
|
@ -629,48 +658,30 @@ fn lex_internal(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if start != curr_offset && !skip_comment {
|
if start != curr_offset && !skip_comment {
|
||||||
output.push(Token::new(
|
state.output.push(Token::new(
|
||||||
TokenContents::Comment,
|
TokenContents::Comment,
|
||||||
Span::new(span_offset + start, span_offset + curr_offset),
|
Span::new(state.span_offset + start, state.span_offset + curr_offset),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
} else if c == b' ' || c == b'\t' || additional_whitespace.contains(&c) {
|
} else if c == b' ' || c == b'\t' || additional_whitespace.contains(&c) {
|
||||||
// If the next character is non-newline whitespace, skip it.
|
// If the next character is non-newline whitespace, skip it.
|
||||||
curr_offset += 1;
|
curr_offset += 1;
|
||||||
} else if alternate_specials && !specials_disabled && special_tokens.contains(&c) {
|
|
||||||
// If disabling special items but if they're not currently disabled, handle a special item
|
|
||||||
// character right here, bypassing lex_item
|
|
||||||
output.push(Token::new(
|
|
||||||
TokenContents::Item,
|
|
||||||
Span::new(span_offset + curr_offset, span_offset + curr_offset + 1),
|
|
||||||
));
|
|
||||||
curr_offset += 1;
|
|
||||||
specials_disabled = true;
|
|
||||||
} else {
|
} else {
|
||||||
let special_tokens = if specials_disabled {
|
|
||||||
&[]
|
|
||||||
} else {
|
|
||||||
special_tokens
|
|
||||||
};
|
|
||||||
let (token, err) = lex_item(
|
let (token, err) = lex_item(
|
||||||
input,
|
state.input,
|
||||||
&mut curr_offset,
|
&mut curr_offset,
|
||||||
span_offset,
|
state.span_offset,
|
||||||
additional_whitespace,
|
additional_whitespace,
|
||||||
special_tokens,
|
special_tokens,
|
||||||
in_signature,
|
in_signature,
|
||||||
);
|
);
|
||||||
if error.is_none() {
|
if state.error.is_none() {
|
||||||
error = err;
|
state.error = err;
|
||||||
}
|
}
|
||||||
is_complete = true;
|
is_complete = true;
|
||||||
if token.contents == TokenContents::Item {
|
state.output.push(token);
|
||||||
specials_disabled = false;
|
|
||||||
}
|
|
||||||
output.push(token);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
(output, error)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// True if this the start of a redirection. Does not match `>>` or `>|` forms.
|
/// True if this the start of a redirection. Does not match `>>` or `>|` forms.
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
use crate::{
|
use crate::{
|
||||||
lex::{is_assignment_operator, lex, lex_alternating_special_tokens, lex_signature},
|
lex::{is_assignment_operator, lex, lex_n_tokens, lex_signature, LexState},
|
||||||
lite_parser::{lite_parse, LiteCommand, LitePipeline, LiteRedirection, LiteRedirectionTarget},
|
lite_parser::{lite_parse, LiteCommand, LitePipeline, LiteRedirection, LiteRedirectionTarget},
|
||||||
parse_keywords::*,
|
parse_keywords::*,
|
||||||
parse_patterns::parse_pattern,
|
parse_patterns::parse_pattern,
|
||||||
|
@ -5599,10 +5599,32 @@ pub fn parse_record(working_set: &mut StateWorkingSet, span: Span) -> Expression
|
||||||
}
|
}
|
||||||
|
|
||||||
let inner_span = Span::new(start, end);
|
let inner_span = Span::new(start, end);
|
||||||
let source = working_set.get_span_contents(inner_span);
|
|
||||||
|
|
||||||
let (tokens, err) =
|
let mut lex_state = LexState {
|
||||||
lex_alternating_special_tokens(source, start, &[b'\n', b'\r', b','], &[b':'], true);
|
input: working_set.get_span_contents(inner_span),
|
||||||
|
output: Vec::new(),
|
||||||
|
error: None,
|
||||||
|
span_offset: start,
|
||||||
|
};
|
||||||
|
let mut lex_n = |additional_whitespace, special_tokens, max_tokens| {
|
||||||
|
lex_n_tokens(
|
||||||
|
&mut lex_state,
|
||||||
|
additional_whitespace,
|
||||||
|
special_tokens,
|
||||||
|
true,
|
||||||
|
max_tokens,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
loop {
|
||||||
|
if lex_n(&[b'\n', b'\r', b','], &[b':'], 2) < 2 {
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
if lex_n(&[b'\n', b'\r', b','], &[], 1) < 1 {
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
let (tokens, err) = (lex_state.output, lex_state.error);
|
||||||
|
|
||||||
if let Some(err) = err {
|
if let Some(err) = err {
|
||||||
working_set.error(err);
|
working_set.error(err);
|
||||||
}
|
}
|
||||||
|
@ -5694,48 +5716,50 @@ pub fn parse_record(working_set: &mut StateWorkingSet, span: Span) -> Expression
|
||||||
let value = parse_value(working_set, tokens[idx].span, &SyntaxShape::Any);
|
let value = parse_value(working_set, tokens[idx].span, &SyntaxShape::Any);
|
||||||
idx += 1;
|
idx += 1;
|
||||||
|
|
||||||
let bareword_error = |string_value: &Expression| {
|
// Disallow colons in bare word values
|
||||||
working_set
|
|
||||||
.get_span_contents(string_value.span)
|
// let bareword_error = |string_value: &Expression| {
|
||||||
.iter()
|
// working_set
|
||||||
.find_position(|b| **b == b':')
|
// .get_span_contents(string_value.span)
|
||||||
.map(|(i, _)| {
|
// .iter()
|
||||||
let colon_position = i + string_value.span.start;
|
// .find_position(|b| **b == b':')
|
||||||
ParseError::InvalidLiteral(
|
// .map(|(i, _)| {
|
||||||
"colon".to_string(),
|
// let colon_position = i + string_value.span.start;
|
||||||
"bare word specifying record value".to_string(),
|
// ParseError::InvalidLiteral(
|
||||||
Span::new(colon_position, colon_position + 1),
|
// "colon".to_string(),
|
||||||
)
|
// "bare word specifying record value".to_string(),
|
||||||
})
|
// Span::new(colon_position, colon_position + 1),
|
||||||
};
|
// )
|
||||||
let value_span = working_set.get_span_contents(value.span);
|
// })
|
||||||
let parse_error = match value.expr {
|
// };
|
||||||
Expr::String(_) => {
|
// let value_span = working_set.get_span_contents(value.span);
|
||||||
if ![b'"', b'\'', b'`'].contains(&value_span[0]) {
|
// let parse_error = match value.expr {
|
||||||
bareword_error(&value)
|
// Expr::String(_) => {
|
||||||
} else {
|
// if ![b'"', b'\'', b'`'].contains(&value_span[0]) {
|
||||||
None
|
// bareword_error(&value)
|
||||||
}
|
// } else {
|
||||||
}
|
// None
|
||||||
Expr::StringInterpolation(ref expressions) => {
|
// }
|
||||||
if value_span[0] != b'$' {
|
// }
|
||||||
expressions
|
// Expr::StringInterpolation(ref expressions) => {
|
||||||
.iter()
|
// if value_span[0] != b'$' {
|
||||||
.filter(|expr| matches!(expr.expr, Expr::String(_)))
|
// expressions
|
||||||
.filter_map(bareword_error)
|
// .iter()
|
||||||
.next()
|
// .filter(|expr| matches!(expr.expr, Expr::String(_)))
|
||||||
} else {
|
// .filter_map(bareword_error)
|
||||||
None
|
// .next()
|
||||||
}
|
// } else {
|
||||||
}
|
// None
|
||||||
_ => None,
|
// }
|
||||||
};
|
// }
|
||||||
let value = if let Some(parse_error) = parse_error {
|
// _ => None,
|
||||||
working_set.error(parse_error);
|
// };
|
||||||
garbage(working_set, value.span)
|
// let value = if let Some(parse_error) = parse_error {
|
||||||
} else {
|
// working_set.error(parse_error);
|
||||||
value
|
// garbage(working_set, value.span)
|
||||||
};
|
// } else {
|
||||||
|
// value
|
||||||
|
// };
|
||||||
|
|
||||||
if let Some(field) = field.as_string() {
|
if let Some(field) = field.as_string() {
|
||||||
if let Some(fields) = &mut field_types {
|
if let Some(fields) = &mut field_types {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user