diff --git a/src/lib.rs b/src/lib.rs index b42c4c684b..de7b4e7c59 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,10 +3,13 @@ mod lite_parse; mod parse_error; mod parser; mod parser_state; +mod signature; mod span; pub use lex::{lex, LexMode, Token, TokenContents}; pub use lite_parse::{lite_parse, LiteBlock, LiteCommand, LiteStatement}; pub use parse_error::ParseError; -pub use parser_state::{ParserState, ParserWorkingSet}; +pub use parser::{Call, Expr, Expression, Import, Pipeline, Statement, SyntaxShape, VarDecl}; +pub use parser_state::{DeclId, ParserState, ParserWorkingSet, VarId}; +pub use signature::Signature; pub use span::Span; diff --git a/src/main.rs b/src/main.rs index 794df50f14..5e944ecd79 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,12 @@ -use engine_q::{lex, lite_parse, LexMode, ParserWorkingSet}; +use engine_q::{lex, lite_parse, LexMode, ParserWorkingSet, Signature, SyntaxShape}; fn main() -> std::io::Result<()> { if let Some(path) = std::env::args().nth(1) { let mut working_set = ParserWorkingSet::new(None); + let sig = Signature::build("foo").named("--jazz", SyntaxShape::Int, "jazz!!", Some('j')); + working_set.add_decl((b"foo").to_vec(), sig); + //let file = std::fs::read(&path)?; //let (output, err) = working_set.parse_file(&path, &file); let (output, err) = working_set.parse_source(path.as_bytes()); diff --git a/src/parse_error.rs b/src/parse_error.rs index c9bfb3f3e6..94c5525058 100644 --- a/src/parse_error.rs +++ b/src/parse_error.rs @@ -3,9 +3,14 @@ pub use crate::Span; #[derive(Debug)] pub enum ParseError { ExtraTokens(Span), + ExtraPositional(Span), UnexpectedEof(String, Span), UnknownStatement(Span), Mismatch(String, Span), VariableNotFound(Span), UnknownCommand(Span), + NonUtf8(Span), + UnknownFlag(Span), + MissingFlagParam(Span), + ShortFlagBatchCantTakeArg(Span), } diff --git a/src/parser.rs b/src/parser.rs index 3115c53a7c..b89539fa29 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,18 +1,11 @@ -use std::str::Utf8Error; - use crate::{ lex, lite_parse, parser_state::{Type, VarId}, - LiteBlock, LiteCommand, LiteStatement, ParseError, ParserWorkingSet, Span, + DeclId, LiteBlock, ParseError, ParserWorkingSet, Span, }; -pub struct Signature { - pub name: String, - pub mandatory_positional: Vec, -} - /// The syntactic shapes that values must match to be passed into a command. You can think of this as the type-checking that occurs when you call a function. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum SyntaxShape { /// A specific match to a word or symbol Word(Vec), @@ -51,14 +44,39 @@ pub enum SyntaxShape { MathExpression, } -#[derive(Debug)] +#[derive(Debug, Clone)] +pub struct Call { + /// identifier of the declaration to call + pub decl_id: DeclId, + pub positional: Vec, + pub named: Vec<(String, Option)>, +} + +impl Default for Call { + fn default() -> Self { + Self::new() + } +} + +impl Call { + pub fn new() -> Call { + Self { + decl_id: 0, + positional: vec![], + named: vec![], + } + } +} + +#[derive(Debug, Clone)] pub enum Expr { Int(i64), Var(VarId), + Call(Call), Garbage, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct Expression { expr: Expr, ty: Type, @@ -167,16 +185,141 @@ impl ParserWorkingSet { } pub fn parse_call(&mut self, spans: &[Span]) -> (Expression, Option) { + let mut error = None; + // assume spans.len() > 0? let name = self.get_span_contents(spans[0]); if let Some(decl_id) = self.find_decl(name) { - let sig = self.get_decl(decl_id).expect("internal error: bad DeclId"); + let mut call = Call::new(); + + let sig = self + .get_decl(decl_id) + .expect("internal error: bad DeclId") + .clone(); let mut positional_idx = 0; let mut arg_offset = 1; - (Expression::garbage(spans[0]), None) + while arg_offset < spans.len() { + let arg_span = spans[arg_offset]; + let arg_contents = self.get_span_contents(arg_span); + if arg_contents.starts_with(&[b'-', b'-']) { + // FIXME: only use the first you find + let split: Vec<_> = arg_contents.split(|x| *x == b'=').collect(); + let long_name = String::from_utf8(split[0].into()); + if let Ok(long_name) = long_name { + if let Some(flag) = sig.get_long_flag(&long_name) { + if let Some(arg_shape) = &flag.arg { + if split.len() > 1 { + // and we also have the argument + let mut span = arg_span; + span.start += long_name.len() + 1; //offset by long flag and '=' + let (arg, err) = self.parse_arg(span, arg_shape.clone()); + error = error.or(err); + + call.named.push((long_name, Some(arg))); + } else if let Some(arg) = spans.get(arg_offset + 1) { + let (arg, err) = self.parse_arg(*arg, arg_shape.clone()); + error = error.or(err); + + call.named.push((long_name, Some(arg))); + arg_offset += 1; + } else { + error = error.or(Some(ParseError::MissingFlagParam(arg_span))) + } + } + } else { + error = error.or(Some(ParseError::UnknownFlag(arg_span))) + } + } else { + error = error.or(Some(ParseError::NonUtf8(arg_span))) + } + } else if arg_contents.starts_with(&[b'-']) && arg_contents.len() > 1 { + let short_flags = &arg_contents[1..]; + let mut found_short_flags = vec![]; + let mut unmatched_short_flags = vec![]; + for short_flag in short_flags.iter().enumerate() { + let short_flag_char = char::from(*short_flag.1); + let orig = arg_span; + let short_flag_span = Span { + start: orig.start + 1 + short_flag.0, + end: orig.start + 1 + short_flag.0 + 1, + file_id: orig.file_id, + }; + if let Some(flag) = sig.get_short_flag(short_flag_char) { + // If we require an arg and are in a batch of short flags, error + if !found_short_flags.is_empty() && flag.arg.is_some() { + error = error.or(Some(ParseError::ShortFlagBatchCantTakeArg( + short_flag_span, + ))) + } + found_short_flags.push(flag); + } else { + unmatched_short_flags.push(short_flag_span); + } + } + + if found_short_flags.is_empty() { + // check to see if we have a negative number + if let Some(positional) = sig.get_positional(positional_idx) { + if positional.shape == SyntaxShape::Int + || positional.shape == SyntaxShape::Number + { + let (arg, err) = self.parse_arg(arg_span, positional.shape); + + if err.is_some() { + if let Some(first) = unmatched_short_flags.first() { + error = error.or(Some(ParseError::UnknownFlag(*first))); + } + } else { + // We have successfully found a positional argument, move on + call.positional.push(arg); + positional_idx += 1; + } + } else if let Some(first) = unmatched_short_flags.first() { + error = error.or(Some(ParseError::UnknownFlag(*first))); + } + } else if let Some(first) = unmatched_short_flags.first() { + error = error.or(Some(ParseError::UnknownFlag(*first))); + } + } + + for flag in found_short_flags { + if let Some(arg_shape) = flag.arg { + if let Some(arg) = spans.get(arg_offset + 1) { + let (arg, err) = self.parse_arg(*arg, arg_shape.clone()); + error = error.or(err); + + call.named.push((flag.long.clone(), Some(arg))); + arg_offset += 1; + } else { + error = error.or(Some(ParseError::MissingFlagParam(arg_span))) + } + } else { + call.named.push((flag.long.clone(), None)); + } + } + } else if let Some(positional) = sig.get_positional(positional_idx) { + let (arg, err) = self.parse_arg(arg_span, positional.shape); + error = error.or(err); + + call.positional.push(arg); + } else { + error = error.or(Some(ParseError::ExtraPositional(arg_span))) + } + arg_offset += 1; + } + + // FIXME: type unknown + ( + Expression { + expr: Expr::Call(call), + ty: Type::Unknown, + span: span(spans), + }, + error, + ) } else { self.parse_external_call(spans) } @@ -294,6 +437,16 @@ impl ParserWorkingSet { ) } } + SyntaxShape::Int => { + if let Ok(token) = String::from_utf8(bytes.into()) { + self.parse_int(&token, span) + } else { + ( + garbage(span), + Some(ParseError::Mismatch("number".into(), span)), + ) + } + } _ => ( garbage(span), Some(ParseError::Mismatch("number".into(), span)), @@ -306,7 +459,13 @@ impl ParserWorkingSet { } pub fn parse_expression(&mut self, spans: &[Span]) -> (Expression, Option) { - self.parse_math_expression(spans) + let bytes = self.get_span_contents(spans[0]); + + match bytes[0] { + b'0' | b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7' | b'8' | b'9' | b'(' | b'{' + | b'[' | b'$' => self.parse_math_expression(spans), + _ => self.parse_call(spans), + } } pub fn parse_variable(&mut self, span: Span) -> (Option, Option) { @@ -362,14 +521,9 @@ impl ParserWorkingSet { pub fn parse_statement(&mut self, spans: &[Span]) -> (Statement, Option) { if let (stmt, None) = self.parse_let(spans) { (stmt, None) - } else if let (expr, None) = self.parse_expression(spans) { - (Statement::Expression(expr), None) } else { - let span = span(spans); - ( - Statement::Expression(garbage(span)), - Some(ParseError::Mismatch("statement".into(), span)), - ) + let (expr, err) = self.parse_expression(spans); + (Statement::Expression(expr), err) } } diff --git a/src/parser_state.rs b/src/parser_state.rs index 8b7e692db5..79b806201a 100644 --- a/src/parser_state.rs +++ b/src/parser_state.rs @@ -1,4 +1,4 @@ -use crate::{parser::Signature, ParseError, Span}; +use crate::{ParseError, Signature, Span}; use core::num; use std::{collections::HashMap, sync::Arc}; @@ -17,6 +17,7 @@ pub enum Type { pub type VarId = usize; pub type DeclId = usize; +#[derive(Debug)] struct ScopeFrame { vars: HashMap, VarId>, decls: HashMap, DeclId>, @@ -74,7 +75,7 @@ impl ParserState { self.vars.get(var_id) } - pub fn get_decl(&self, decl_id: VarId) -> Option<&Signature> { + pub fn get_decl(&self, decl_id: DeclId) -> Option<&Signature> { self.decls.get(decl_id) } @@ -104,7 +105,7 @@ impl ParserWorkingSet { vars: vec![], decls: vec![], permanent_state, - scope: vec![], + scope: vec![ScopeFrame::new()], } } @@ -118,6 +119,20 @@ impl ParserWorkingSet { self.files.len() + parent_len } + pub fn add_decl(&mut self, name: Vec, sig: Signature) -> DeclId { + let scope_frame = self + .scope + .last_mut() + .expect("internal error: missing required scope frame"); + + self.decls.push(sig); + let decl_id = self.decls.len() - 1; + + scope_frame.decls.insert(name, decl_id); + + decl_id + } + pub fn add_file(&mut self, filename: String, contents: Vec) -> usize { self.files.push((filename, contents)); diff --git a/src/signature.rs b/src/signature.rs new file mode 100644 index 0000000000..c003eba8ad --- /dev/null +++ b/src/signature.rs @@ -0,0 +1,214 @@ +use crate::parser::SyntaxShape; + +#[derive(Debug, Clone)] +pub struct Flag { + pub long: String, + pub short: Option, + pub arg: Option, + pub required: bool, + pub desc: String, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PositionalArg { + pub name: String, + pub desc: String, + pub shape: SyntaxShape, +} + +#[derive(Clone, Debug)] +pub struct Signature { + pub name: String, + pub usage: String, + pub extra_usage: String, + pub required_positional: Vec, + pub optional_positional: Vec, + pub rest_positional: Option, + pub named: Vec, + pub is_filter: bool, +} + +impl PartialEq for Signature { + fn eq(&self, other: &Self) -> bool { + self.name == other.name + && self.usage == other.usage + && self.required_positional == other.required_positional + && self.optional_positional == other.optional_positional + && self.rest_positional == other.rest_positional + && self.is_filter == other.is_filter + } +} + +impl Eq for Signature {} + +impl Signature { + pub fn new(name: impl Into) -> Signature { + Signature { + name: name.into(), + usage: String::new(), + extra_usage: String::new(), + required_positional: vec![], + optional_positional: vec![], + rest_positional: None, + named: vec![], + is_filter: false, + } + } + pub fn build(name: impl Into) -> Signature { + Signature::new(name.into()) + } + + /// Add a description to the signature + pub fn desc(mut self, usage: impl Into) -> Signature { + self.usage = usage.into(); + self + } + + /// Add a required positional argument to the signature + pub fn required( + mut self, + name: impl Into, + shape: impl Into, + desc: impl Into, + ) -> Signature { + self.required_positional.push(PositionalArg { + name: name.into(), + desc: desc.into(), + shape: shape.into(), + }); + + self + } + + /// Add a required positional argument to the signature + pub fn optional( + mut self, + name: impl Into, + shape: impl Into, + desc: impl Into, + ) -> Signature { + self.optional_positional.push(PositionalArg { + name: name.into(), + desc: desc.into(), + shape: shape.into(), + }); + + self + } + + /// Add an optional named flag argument to the signature + pub fn named( + mut self, + name: impl Into, + shape: impl Into, + desc: impl Into, + short: Option, + ) -> Signature { + let s = short.map(|c| { + debug_assert!(!self.get_shorts().contains(&c)); + c + }); + self.named.push(Flag { + long: name.into(), + short: s, + arg: Some(shape.into()), + required: false, + desc: desc.into(), + }); + + self + } + + /// Add a required named flag argument to the signature + pub fn required_named( + mut self, + name: impl Into, + shape: impl Into, + desc: impl Into, + short: Option, + ) -> Signature { + let s = short.map(|c| { + debug_assert!(!self.get_shorts().contains(&c)); + c + }); + self.named.push(Flag { + long: name.into(), + short: s, + arg: Some(shape.into()), + required: true, + desc: desc.into(), + }); + + self + } + + /// Add a switch to the signature + pub fn switch( + mut self, + name: impl Into, + desc: impl Into, + short: Option, + ) -> Signature { + let s = short.map(|c| { + debug_assert!( + !self.get_shorts().contains(&c), + "There may be duplicate short flags, such as -h" + ); + c + }); + + self.named.push(Flag { + long: name.into(), + short: s, + arg: None, + required: false, + desc: desc.into(), + }); + self + } + + /// Get list of the short-hand flags + pub fn get_shorts(&self) -> Vec { + let mut shorts = Vec::new(); + for Flag { short, .. } in &self.named { + if let Some(c) = short { + shorts.push(*c); + } + } + shorts + } + + pub fn get_positional(&self, position: usize) -> Option { + if position < self.required_positional.len() { + self.required_positional.get(position).cloned() + } else if position < (self.required_positional.len() + self.optional_positional.len()) { + self.optional_positional + .get(position - self.required_positional.len()) + .cloned() + } else { + self.rest_positional.clone() + } + } + + /// Find the matching long flag + pub fn get_long_flag(&self, name: &str) -> Option { + for flag in &self.named { + if flag.long == name { + return Some(flag.clone()); + } + } + None + } + + /// Find the matching long flag + pub fn get_short_flag(&self, short: char) -> Option { + for flag in &self.named { + if let Some(short_flag) = &flag.short { + if *short_flag == short { + return Some(flag.clone()); + } + } + } + None + } +}