From 4291e31dc728d7f96757913eb2f09782f09c0bcd Mon Sep 17 00:00:00 2001 From: Yehuda Katz Date: Mon, 10 Jun 2019 22:53:04 -0700 Subject: [PATCH] Start rebuilding lite parser using nom --- Cargo.lock | 55 +++--------- Cargo.toml | 3 +- src/errors.rs | 4 +- src/parser.rs | 1 + src/parser/parse2.rs | 6 ++ src/parser/parse2/operator.rs | 50 +++++++++++ src/parser/parse2/parser.rs | 146 ++++++++++++++++++++++++++++++++ src/parser/parse2/span.rs | 114 +++++++++++++++++++++++++ src/parser/parse2/token_tree.rs | 6 ++ src/parser/parse2/tokens.rs | 11 +++ src/parser/parse2/util.rs | 1 + 11 files changed, 352 insertions(+), 45 deletions(-) create mode 100644 src/parser/parse2.rs create mode 100644 src/parser/parse2/operator.rs create mode 100644 src/parser/parse2/parser.rs create mode 100644 src/parser/parse2/span.rs create mode 100644 src/parser/parse2/token_tree.rs create mode 100644 src/parser/parse2/tokens.rs create mode 100644 src/parser/parse2/util.rs diff --git a/Cargo.lock b/Cargo.lock index 4415d569d5..2ed37ff34e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -166,6 +166,11 @@ dependencies = [ "regex 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "bytecount" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "byteorder" version = "1.3.1" @@ -1238,17 +1243,6 @@ name = "lazycell" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "lexical-core" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)", - "ryu 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", - "stackvector 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "static_assertions 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "libc" version = "0.2.58" @@ -1530,13 +1524,13 @@ dependencies = [ ] [[package]] -name = "nom" -version = "5.0.0-beta1" +name = "nom_locate" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "lexical-core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "bytecount 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "nom 4.2.3 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -1573,7 +1567,8 @@ dependencies = [ "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", "logos 0.10.0-rc2 (registry+https://github.com/rust-lang/crates.io-index)", "logos-derive 0.10.0-rc2 (registry+https://github.com/rust-lang/crates.io-index)", - "nom 5.0.0-beta1 (registry+https://github.com/rust-lang/crates.io-index)", + "nom 4.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "nom_locate 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "ordered-float 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", "pancurses 0.16.1 (registry+https://github.com/rust-lang/crates.io-index)", "pretty 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2539,19 +2534,6 @@ name = "stable_deref_trait" version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "stackvector" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "static_assertions" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "string" version = "0.2.0" @@ -2989,14 +2971,6 @@ name = "unicode-xid" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "unreachable" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "url" version = "1.7.2" @@ -3195,6 +3169,7 @@ dependencies = [ "checksum block 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a" "checksum build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "39092a32794787acd8525ee150305ff051b0aa6cc2abaf193924f5ab05425f39" "checksum byte-unit 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6754bb4703aa167bed5381f0c6842f1cc31a9ecde3b9443f726dde3ad3afb841" +"checksum bytecount 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f861d9ce359f56dbcb6e0c2a1cb84e52ad732cadb57b806adeb3c7668caccbd8" "checksum byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a019b10a2a7cdeb292db131fc8113e57ea2a908f6e7894b0c3c671893b65dbeb" "checksum bytes 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)" = "206fdffcfa2df7cbe15601ef46c813fce0965eb3286db6b56c583b814b51c81c" "checksum cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)" = "39f75544d7bbaf57560d2168f28fd649ff9c76153874db88bdbdfd839b1a7e7d" @@ -3312,7 +3287,6 @@ dependencies = [ "checksum lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73" "checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14" "checksum lazycell 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b294d6fa9ee409a054354afc4352b0b9ef7ca222c69b8812cbea9e7d2bf3783f" -"checksum lexical-core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3e82e023e062f1d25f807ad182008fba1b46538e999f908a08cc0c29e084462e" "checksum libc 0.2.58 (registry+https://github.com/rust-lang/crates.io-index)" = "6281b86796ba5e4366000be6e9e18bf35580adf9e63fbe2294aadb587613a319" "checksum libgit2-sys 0.7.11 (registry+https://github.com/rust-lang/crates.io-index)" = "48441cb35dc255da8ae72825689a95368bf510659ae1ad55dc4aa88cb1789bf1" "checksum libssh2-sys 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "126a1f4078368b163bfdee65fbab072af08a1b374a5551b21e87ade27b1fbf9d" @@ -3343,7 +3317,7 @@ dependencies = [ "checksum nix 0.14.1 (registry+https://github.com/rust-lang/crates.io-index)" = "6c722bee1037d430d0f8e687bbdbf222f27cc6e4e68d5caf630857bb2b6dbdce" "checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945" "checksum nom 4.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2ad2a91a8e869eeb30b9cb3119ae87773a8f4ae617f41b1eb9c154b2905f7bd6" -"checksum nom 5.0.0-beta1 (registry+https://github.com/rust-lang/crates.io-index)" = "6527f311b2baba609e980e008460ab5ebff6d6da15213bb8eb193b7746eefa24" +"checksum nom_locate 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "6a47c112b3861d81f7fbf73892b9271af933af32bd5dee6889aa3c3fa9caed7e" "checksum num 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cf4825417e1e1406b3782a8ce92f4d53f26ec055e3622e1881ca8e9f5f9e08db" "checksum num-complex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "107b9be86cd2481930688277b675b0114578227f034674726605b8a482d8baf8" "checksum num-integer 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "8b8af8caa3184078cd419b430ff93684cb13937970fcb7639f728992f33ce674" @@ -3448,8 +3422,6 @@ dependencies = [ "checksum slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8" "checksum smallvec 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c4488ae950c49d403731982257768f48fada354a5203fe81f9bb6f43ca9002be" "checksum stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dba1a27d3efae4351c8051072d619e3ade2820635c3958d826bfea39d59b54c8" -"checksum stackvector 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c049c77bf85fbc036484c97b008276d539d9ebff9dfbde37b632ebcd5b8746b6" -"checksum static_assertions 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "c19be23126415861cb3a23e501d34a708f7f9b2183c5252d690941c2e69199d5" "checksum string 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d0bbfb8937e38e34c3444ff00afb28b0811d9554f15c5ad64d12b0308d1d1995" "checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550" "checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" @@ -3496,7 +3468,6 @@ dependencies = [ "checksum unicode-segmentation 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1967f4cdfc355b37fd76d2a954fb2ed3871034eb4f26d60537d88795cfc332a9" "checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" -"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" "checksum url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dd4e7c0d531266369519a4aa4f399d748bd37043b00bde1e4ff1f60a120b355a" "checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737" "checksum utf8parse 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8772a4ccbb4e89959023bc5b7cb8623a795caa7092d99f3aa9501b9484d4557d" diff --git a/Cargo.toml b/Cargo.toml index 4fb8287c06..0628246995 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ prettytable-rs = "0.8.0" itertools = "0.8.0" ansi_term = "0.11.0" conch-parser = "0.1.1" -nom = "5.0.0-beta1" +nom = "4.2.3" dunce = "1.0.0" indexmap = { version = "1.0.2", features = ["serde-1"] } chrono-humanize = "0.0.11" @@ -61,6 +61,7 @@ clipboard = "0.5" reqwest = "0.9" roxmltree = "0.6.0" pretty = "0.5.2" +nom_locate = "0.3.1" [dependencies.pancurses] version = "0.16" diff --git a/src/errors.rs b/src/errors.rs index 1023474d55..ddf45f4cd9 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -177,8 +177,8 @@ impl std::convert::From for ShellError { } } -impl std::convert::From> for ShellError { - fn from(input: nom::Err<(&str, nom::error::ErrorKind)>) -> ShellError { +impl std::convert::From> for ShellError { + fn from(input: nom::Err<(&str, nom::ErrorKind)>) -> ShellError { ShellError::String(StringError { title: format!("{:?}", input), error: Value::nothing(), diff --git a/src/parser.rs b/src/parser.rs index 6e54183eb4..b2eb9f8468 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4,6 +4,7 @@ crate mod lexer; crate mod parser; crate mod registry; crate mod span; +crate mod parse2; crate use ast::Pipeline; crate use registry::{Args, CommandConfig}; diff --git a/src/parser/parse2.rs b/src/parser/parse2.rs new file mode 100644 index 0000000000..ab4ef485be --- /dev/null +++ b/src/parser/parse2.rs @@ -0,0 +1,6 @@ +crate mod operator; +crate mod parser; +crate mod span; +crate mod token_tree; +crate mod tokens; +crate mod util; diff --git a/src/parser/parse2/operator.rs b/src/parser/parse2/operator.rs new file mode 100644 index 0000000000..89275afd74 --- /dev/null +++ b/src/parser/parse2/operator.rs @@ -0,0 +1,50 @@ +use serde_derive::{Deserialize, Serialize}; +use std::str::FromStr; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Deserialize, Serialize)] +pub enum Operator { + Equal, + NotEqual, + LessThan, + GreaterThan, + LessThanOrEqual, + GreaterThanOrEqual, +} + +impl Operator { + pub fn print(&self) -> String { + self.as_str().to_string() + } + + pub fn as_str(&self) -> &str { + match *self { + Operator::Equal => "==", + Operator::NotEqual => "!=", + Operator::LessThan => "<", + Operator::GreaterThan => ">", + Operator::LessThanOrEqual => "<=", + Operator::GreaterThanOrEqual => ">=", + } + } +} + +impl From<&str> for Operator { + fn from(input: &str) -> Operator { + Operator::from_str(input).unwrap() + } +} + +impl FromStr for Operator { + type Err = (); + fn from_str(input: &str) -> Result::Err> { + match input { + "==" => Ok(Operator::Equal), + "!=" => Ok(Operator::NotEqual), + "<" => Ok(Operator::LessThan), + ">" => Ok(Operator::GreaterThan), + "<=" => Ok(Operator::LessThanOrEqual), + ">=" => Ok(Operator::GreaterThanOrEqual), + _ => Err(()), + } + } +} diff --git a/src/parser/parse2/parser.rs b/src/parser/parse2/parser.rs new file mode 100644 index 0000000000..36c2fea082 --- /dev/null +++ b/src/parser/parse2/parser.rs @@ -0,0 +1,146 @@ +use crate::parser::parse2::{operator::*, span::*, tokens::*}; +use nom::types::CompleteStr; +use nom::*; +use nom_locate::{position, LocatedSpan}; +use std::str::FromStr; + +type NomSpan<'a> = LocatedSpan>; + +macro_rules! operator { + ($name:tt : $token:tt ) => { + named!($name( NomSpan ) -> Token, + do_parse!( + l: position!() + >> t: tag!(stringify!($token)) + >> r: position!() + >> (Spanned::from_nom(RawToken::Operator(Operator::from_str(t.fragment.0).unwrap()), l, r)) + ) + ); + }; +} + +operator! { gt: > } +operator! { lt: < } +operator! { gte: >= } +operator! { lte: <= } +operator! { eq: == } +operator! { neq: != } + +named!(integer( NomSpan ) -> Token, + do_parse!( + l: position!() + >> neg: opt!(tag!("-")) + >> num: digit1 + >> r: position!() + >> (Spanned::from_nom(RawToken::Integer(int(num.fragment.0, neg)), l, r)) + ) +); + +named!(operator( NomSpan ) -> Token, + alt!( + gte | lte | neq | gt | lt | eq + ) +); + +named!(dq_string( NomSpan ) -> Token, + do_parse!( + l: position!() + >> char!('"') + >> l1: position!() + >> many0!(none_of!("\"")) + >> r1: position!() + >> char!('"') + >> r: position!() + >> (Spanned::from_nom(RawToken::String(Span::from((l1, r1))), l, r)) + ) +); + +named!(sq_string( NomSpan ) -> Token, + do_parse!( + l: position!() + >> char!('\'') + >> l1: position!() + >> many0!(none_of!("'")) + >> r1: position!() + >> char!('\'') + >> r: position!() + >> (Spanned::from_nom(RawToken::String(Span::from((l1, r1))), l, r)) + ) +); + +fn int(frag: &str, neg: Option) -> i64 { + let int = FromStr::from_str(frag).unwrap(); + + match neg { + None => int, + Some(_) => int * -1, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_integer() { + assert_eq!( + integer(NomSpan::new(CompleteStr("123"))).unwrap().1, + Spanned::from_item(RawToken::Integer(123), (0, 3)) + ); + + assert_eq!( + integer(NomSpan::new(CompleteStr("-123"))).unwrap().1, + Spanned::from_item(RawToken::Integer(-123), (0, 4)) + ); + } + + #[test] + fn test_operator() { + assert_eq!( + operator(NomSpan::new(CompleteStr(">"))).unwrap().1, + Spanned::from_item(RawToken::Operator(Operator::GreaterThan), (0, 1)) + ); + + assert_eq!( + operator(NomSpan::new(CompleteStr(">="))).unwrap().1, + Spanned::from_item(RawToken::Operator(Operator::GreaterThanOrEqual), (0, 2)) + ); + + assert_eq!( + operator(NomSpan::new(CompleteStr("<"))).unwrap().1, + Spanned::from_item(RawToken::Operator(Operator::LessThan), (0, 1)) + ); + + assert_eq!( + operator(NomSpan::new(CompleteStr("<="))).unwrap().1, + Spanned::from_item(RawToken::Operator(Operator::LessThanOrEqual), (0, 2)) + ); + + assert_eq!( + operator(NomSpan::new(CompleteStr("=="))).unwrap().1, + Spanned::from_item(RawToken::Operator(Operator::Equal), (0, 2)) + ); + + assert_eq!( + operator(NomSpan::new(CompleteStr("!="))).unwrap().1, + Spanned::from_item(RawToken::Operator(Operator::NotEqual), (0, 2)) + ); + } + + #[test] + fn test_string() { + assert_eq!( + dq_string(NomSpan::new(CompleteStr(r#""hello world""#))) + .unwrap() + .1, + Spanned::from_item(RawToken::String(Span::from((1, 12))), (0, 13)) + ); + + assert_eq!( + sq_string(NomSpan::new(CompleteStr(r#"'hello world'"#))) + .unwrap() + .1, + Spanned::from_item(RawToken::String(Span::from((1, 12))), (0, 13)) + ); + } +} diff --git a/src/parser/parse2/span.rs b/src/parser/parse2/span.rs new file mode 100644 index 0000000000..2d3cbe7b5e --- /dev/null +++ b/src/parser/parse2/span.rs @@ -0,0 +1,114 @@ +use derive_new::new; +use std::ops::Range; + +#[derive(new, Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct Spanned { + crate span: Span, + crate item: T, +} + +impl std::ops::Deref for Spanned { + type Target = T; + + fn deref(&self) -> &T { + &self.item + } +} + +impl Spanned { + crate fn from_nom( + item: T, + start: nom_locate::LocatedSpan, + end: nom_locate::LocatedSpan, + ) -> Spanned { + let start = start.offset; + let end = end.offset; + + Spanned { + span: Span::from((start, end)), + item, + } + } + + crate fn from_item(item: T, span: impl Into) -> Spanned { + Spanned { + span: span.into(), + item, + } + } + + crate fn map(self, input: impl FnOnce(T) -> U) -> Spanned { + let Spanned { span, item } = self; + + let mapped = input(item); + Spanned { span, item: mapped } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash)] +pub struct Span { + crate start: usize, + crate end: usize, + // source: &'source str, +} + +impl From<(nom_locate::LocatedSpan, nom_locate::LocatedSpan)> for Span { + fn from(input: (nom_locate::LocatedSpan, nom_locate::LocatedSpan)) -> Span { + Span { + start: input.0.offset, + end: input.1.offset, + } + } +} + +impl From<(usize, usize)> for Span { + fn from(input: (usize, usize)) -> Span { + Span { + start: input.0, + end: input.1, + } + } +} + +impl From<&std::ops::Range> for Span { + fn from(input: &std::ops::Range) -> Span { + Span { + start: input.start, + end: input.end, + } + } +} + +impl Span { + fn new(range: &Range) -> Span { + Span { + start: range.start, + end: range.end, + // source, + } + } +} + +impl language_reporting::ReportingSpan for Span { + fn with_start(&self, start: usize) -> Self { + Span { + start, + end: self.end, + } + } + + fn with_end(&self, end: usize) -> Self { + Span { + start: self.start, + end, + } + } + + fn start(&self) -> usize { + self.start + } + + fn end(&self) -> usize { + self.end + } +} diff --git a/src/parser/parse2/token_tree.rs b/src/parser/parse2/token_tree.rs new file mode 100644 index 0000000000..3f9f45ded7 --- /dev/null +++ b/src/parser/parse2/token_tree.rs @@ -0,0 +1,6 @@ +use crate::parser::parse2::{span::*, tokens::*}; + +#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] +pub enum TokenNode { + Token(Token), +} diff --git a/src/parser/parse2/tokens.rs b/src/parser/parse2/tokens.rs new file mode 100644 index 0000000000..b12c361b91 --- /dev/null +++ b/src/parser/parse2/tokens.rs @@ -0,0 +1,11 @@ +use crate::parser::parse2::operator::*; +use crate::parser::parse2::span::*; + +#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum RawToken { + Integer(i64), + Operator(Operator), + String(Span), +} + +pub type Token = Spanned; diff --git a/src/parser/parse2/util.rs b/src/parser/parse2/util.rs new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/src/parser/parse2/util.rs @@ -0,0 +1 @@ +