diff --git a/Cargo.lock b/Cargo.lock index 491a05b526..5ef60ef89b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2421,19 +2421,6 @@ dependencies = [ "smallvec", ] -[[package]] -name = "nipper" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "761382864693f4bb171abf9e8de181a320b00464a83a9a5071059057b1fe0116" -dependencies = [ - "cssparser", - "html5ever", - "markup5ever", - "selectors", - "tendril", -] - [[package]] name = "nix" version = "0.20.1" @@ -3037,7 +3024,6 @@ name = "nu_plugin_selector" version = "0.38.0" dependencies = [ "indexmap", - "nipper", "nu-errors", "nu-plugin", "nu-protocol", diff --git a/crates/nu_plugin_selector/Cargo.toml b/crates/nu_plugin_selector/Cargo.toml index 6acc9f950d..52e7dcd003 100644 --- a/crates/nu_plugin_selector/Cargo.toml +++ b/crates/nu_plugin_selector/Cargo.toml @@ -10,7 +10,6 @@ version = "0.38.0" doctest = false [dependencies] -nipper = "0.1.9" scraper = "0.12.0" nu-errors = { version = "0.38.0", path="../nu-errors" } nu-plugin = { version = "0.38.0", path="../nu-plugin" } diff --git a/crates/nu_plugin_selector/src/nu/mod.rs b/crates/nu_plugin_selector/src/nu/mod.rs index 5e0e444c04..01e4714bd2 100644 --- a/crates/nu_plugin_selector/src/nu/mod.rs +++ b/crates/nu_plugin_selector/src/nu/mod.rs @@ -1,10 +1,10 @@ +use crate::{selector::begin_selector_query, Selector}; use nu_errors::ShellError; use nu_plugin::Plugin; use nu_protocol::{ CallInfo, Primitive, ReturnSuccess, ReturnValue, Signature, SyntaxShape, UntaggedValue, Value, }; - -use crate::{selector::begin_selector_query, Selector}; +use scraper::Selector as ScraperSelector; impl Plugin for Selector { fn config(&mut self) -> Result { @@ -63,6 +63,13 @@ impl Plugin for Selector { } fn filter(&mut self, input: Value) -> Result, ShellError> { + if !self.query.is_empty() && ScraperSelector::parse(&self.query).is_err() { + return Err(ShellError::labeled_error( + "Can not parse this query as a valid css selector", + "Parse error", + &self.tag, + )); + } match input { Value { value: UntaggedValue::Primitive(Primitive::String(s)), diff --git a/crates/nu_plugin_selector/src/selector.rs b/crates/nu_plugin_selector/src/selector.rs index 8f58d67fe8..9c94dde833 100644 --- a/crates/nu_plugin_selector/src/selector.rs +++ b/crates/nu_plugin_selector/src/selector.rs @@ -1,7 +1,7 @@ use crate::Table; -use nipper::Document; use nu_protocol::{value::StringExt, Primitive, TaggedDictBuilder, UntaggedValue, Value}; use nu_source::Tag; +use scraper::{Html, Selector as ScraperSelector}; pub struct Selector { pub query: String, @@ -166,13 +166,14 @@ fn execute_selector_query_with_attribute( query_string: &str, attribute: &str, ) -> Vec { - let doc = Document::from(input_string); + let doc = Html::parse_fragment(input_string); - doc.select(query_string) - .iter() + doc.select(&css(query_string)) .map(|selection| { selection - .attr_or(attribute, "") + .value() + .attr(attribute) + .unwrap_or("") .to_string() .to_string_value_create_tag() }) @@ -180,57 +181,51 @@ fn execute_selector_query_with_attribute( } fn execute_selector_query(input_string: &str, query_string: &str, as_html: bool) -> Vec { - let doc = Document::from(input_string); + let doc = Html::parse_fragment(input_string); match as_html { true => doc - .select(query_string) - .iter() - .map(|selection| selection.html().to_string().to_string_value_create_tag()) + .select(&css(query_string)) + .map(|selection| selection.html().to_string_value_create_tag()) .collect(), false => doc - .select(query_string) - .iter() - .map(|selection| selection.text().to_string().to_string_value_create_tag()) + .select(&css(query_string)) + .map(|selection| { + selection + .text() + .fold("".to_string(), |acc, x| format!("{}{}", acc, x)) + .to_string_value_create_tag() + }) .collect(), } } +pub fn css(selector: &str) -> ScraperSelector { + ScraperSelector::parse(selector).expect("this should never trigger") +} + #[cfg(test)] mod tests { - use nipper::Document; + use super::*; + + const SIMPLE_LIST: &'static str = r#" +
    +
  • Coffee
  • +
  • Tea
  • +
  • Milk
  • +
+"#; #[test] - fn create_document_from_string() { - let html = r#"
"#; - let document = Document::from(html); - let shouldbe = - r#"
"#; - - assert_eq!(shouldbe.to_string(), document.html().to_string()); + fn test_first_child_is_not_empty() { + assert!(!execute_selector_query(SIMPLE_LIST, "li:first-child", false).is_empty()) } #[test] - fn modify_html_document() { - let html = r#"
"#; - let document = Document::from(html); - let mut input = document.select(r#"div[name="foo"]"#); - input.set_attr("id", "input"); - input.remove_attr("name"); - - let shouldbe = "bar".to_string(); - let actual = input.attr("value").unwrap().to_string(); - - assert_eq!(shouldbe, actual); + fn test_first_child() { + assert_eq!( + vec!["Coffee".to_string().to_string_value_create_tag()], + execute_selector_query(SIMPLE_LIST, "li:first-child", false) + ) } - - // #[test] - // fn test_hacker_news() -> Result<(), ShellError> { - // let html = reqwest::blocking::get("https://news.ycombinator.com")?.text()?; - // let document = Document::from(&html); - // let result = query(html, ".hnname a".to_string(), Tag::unknown()); - // let shouldbe = Ok(vec!["Hacker News".to_str_value_create_tag()]); - // assert_eq!(shouldbe, result); - // Ok(()) - // } } diff --git a/crates/nu_plugin_selector/src/tables.rs b/crates/nu_plugin_selector/src/tables.rs index 2b42c02ba1..c6aaf41b87 100644 --- a/crates/nu_plugin_selector/src/tables.rs +++ b/crates/nu_plugin_selector/src/tables.rs @@ -1,3 +1,4 @@ +use crate::selector::css; use scraper::{element_ref::ElementRef, Html, Selector as ScraperSelector}; use std::collections::HashMap; @@ -263,10 +264,6 @@ impl<'a> IntoIterator for Row<'a> { } } -fn css(selector: &'static str) -> ScraperSelector { - ScraperSelector::parse(selector).expect("Unable to parse selector with scraper") -} - fn select_cells( element: ElementRef, selector: &ScraperSelector,