diff --git a/crates/nu_plugin_query/src/nu/mod.rs b/crates/nu_plugin_query/src/nu/mod.rs index 11ef00d7c8..80c0bf8571 100644 --- a/crates/nu_plugin_query/src/nu/mod.rs +++ b/crates/nu_plugin_query/src/nu/mod.rs @@ -77,8 +77,8 @@ pub fn web_examples() -> Vec { description: "Retrieve all `
` elements from phoronix.com website".into(), result: None, }, PluginExample { - example: "http get https://en.wikipedia.org/wiki/List_of_cities_in_India_by_population - | query web --as-table [Rank City 'Population(2011)[3]' 'Population(2001)[3][a]' 'State or union territory']".into(), + example: "http get https://en.wikipedia.org/wiki/List_of_cities_in_India_by_population | + query web --as-table [City 'Population(2011)[3]' 'Population(2001)[3][a]' 'State or unionterritory' 'Ref']".into(), description: "Retrieve a html table from Wikipedia and parse it into a nushell table using table headers as guides".into(), result: None }, diff --git a/crates/nu_plugin_query/src/query_web.rs b/crates/nu_plugin_query/src/query_web.rs index d87ef38466..9318ea9dd7 100644 --- a/crates/nu_plugin_query/src/query_web.rs +++ b/crates/nu_plugin_query/src/query_web.rs @@ -107,7 +107,6 @@ pub fn retrieve_tables( let mut cols: Vec = Vec::new(); if let Value::List { vals, .. } = &columns { for x in vals { - // TODO Find a way to get the Config object here if let Value::String { val, .. } = x { cols.push(val.to_string()) } @@ -115,10 +114,11 @@ pub fn retrieve_tables( } if inspect_mode { - eprintln!("Passed in Column Headers = {:#?}", &cols,); + eprintln!("Passed in Column Headers = {:?}\n", &cols); + eprintln!("First 2048 HTML chars = {}\n", &html[0..2047]); } - let tables = match WebTable::find_by_headers(html, &cols) { + let tables = match WebTable::find_by_headers(html, &cols, inspect_mode) { Some(t) => { if inspect_mode { eprintln!("Table Found = {:#?}", &t); @@ -161,6 +161,18 @@ fn retrieve_table(mut table: WebTable, columns: &Value, span: Span) -> Value { } } + // We provided columns but the table has no headers, so we'll just make a single column table + if !cols.is_empty() && table.headers().is_empty() { + let mut record = Record::new(); + for col in &cols { + record.push( + col.clone(), + Value::string("error: no data found (column name may be incorrect)", span), + ); + } + return Value::record(record, span); + } + let mut table_out = Vec::new(); // sometimes there are tables where the first column is the headers, kind of like // a table has ben rotated ccw 90 degrees, in these cases all columns will be missing diff --git a/crates/nu_plugin_query/src/web_tables.rs b/crates/nu_plugin_query/src/web_tables.rs index db175c6b8c..1f69342a3e 100644 --- a/crates/nu_plugin_query/src/web_tables.rs +++ b/crates/nu_plugin_query/src/web_tables.rs @@ -46,7 +46,11 @@ impl WebTable { /// /// If `headers` is empty, this is the same as /// [`find_first`](#method.find_first). - pub fn find_by_headers(html: &str, headers: &[T]) -> Option> + pub fn find_by_headers( + html: &str, + headers: &[T], + inspect_mode: bool, + ) -> Option> where T: AsRef, { @@ -64,6 +68,9 @@ impl WebTable { .filter(|table| { table.select(&sel_tr).next().map_or(false, |tr| { let cells = select_cells(tr, &sel_th, true); + if inspect_mode { + eprintln!("Potential HTML Headers = {:?}\n", &cells); + } headers.iter().all(|h| contains_str(&cells, h.as_ref())) }) }) @@ -696,12 +703,15 @@ mod tests { fn test_find_by_headers_empty() { let headers: [&str; 0] = []; - assert_eq!(None, WebTable::find_by_headers("", &headers)); - assert_eq!(None, WebTable::find_by_headers("foo", &headers)); - assert_eq!(None, WebTable::find_by_headers(HTML_NO_TABLE, &headers)); + assert_eq!(None, WebTable::find_by_headers("", &headers, false)); + assert_eq!(None, WebTable::find_by_headers("foo", &headers, false)); + assert_eq!( + None, + WebTable::find_by_headers(HTML_NO_TABLE, &headers, false) + ); - assert!(WebTable::find_by_headers(TABLE_EMPTY, &headers).is_some()); - assert!(WebTable::find_by_headers(HTML_TWO_TABLES, &headers).is_some()); + assert!(WebTable::find_by_headers(TABLE_EMPTY, &headers, false).is_some()); + assert!(WebTable::find_by_headers(HTML_TWO_TABLES, &headers, false).is_some()); } #[test] @@ -709,33 +719,45 @@ mod tests { let headers = ["Name", "Age"]; let bad_headers = ["Name", "BAD"]; - assert_eq!(None, WebTable::find_by_headers("", &headers)); - assert_eq!(None, WebTable::find_by_headers("foo", &headers)); - assert_eq!(None, WebTable::find_by_headers(HTML_NO_TABLE, &headers)); + assert_eq!(None, WebTable::find_by_headers("", &headers, false)); + assert_eq!(None, WebTable::find_by_headers("foo", &headers, false)); + assert_eq!( + None, + WebTable::find_by_headers(HTML_NO_TABLE, &headers, false) + ); - assert_eq!(None, WebTable::find_by_headers(TABLE_EMPTY, &bad_headers)); - assert_eq!(None, WebTable::find_by_headers(TABLE_TH, &bad_headers)); + assert_eq!( + None, + WebTable::find_by_headers(TABLE_EMPTY, &bad_headers, false) + ); + assert_eq!( + None, + WebTable::find_by_headers(TABLE_TH, &bad_headers, false) + ); - assert_eq!(None, WebTable::find_by_headers(TABLE_TD, &headers)); - assert_eq!(None, WebTable::find_by_headers(TABLE_TD, &bad_headers)); + assert_eq!(None, WebTable::find_by_headers(TABLE_TD, &headers, false)); + assert_eq!( + None, + WebTable::find_by_headers(TABLE_TD, &bad_headers, false) + ); } #[test] fn test_find_by_headers_some() { let headers: [&str; 0] = []; - assert!(WebTable::find_by_headers(TABLE_TH, &headers).is_some()); - assert!(WebTable::find_by_headers(TABLE_TH_TD, &headers).is_some()); - assert!(WebTable::find_by_headers(HTML_TWO_TABLES, &headers).is_some()); + assert!(WebTable::find_by_headers(TABLE_TH, &headers, false).is_some()); + assert!(WebTable::find_by_headers(TABLE_TH_TD, &headers, false).is_some()); + assert!(WebTable::find_by_headers(HTML_TWO_TABLES, &headers, false).is_some()); let headers = ["Name"]; - assert!(WebTable::find_by_headers(TABLE_TH, &headers).is_some()); - assert!(WebTable::find_by_headers(TABLE_TH_TD, &headers).is_some()); - assert!(WebTable::find_by_headers(HTML_TWO_TABLES, &headers).is_some()); + assert!(WebTable::find_by_headers(TABLE_TH, &headers, false).is_some()); + assert!(WebTable::find_by_headers(TABLE_TH_TD, &headers, false).is_some()); + assert!(WebTable::find_by_headers(HTML_TWO_TABLES, &headers, false).is_some()); let headers = ["Age", "Name"]; - assert!(WebTable::find_by_headers(TABLE_TH, &headers).is_some()); - assert!(WebTable::find_by_headers(TABLE_TH_TD, &headers).is_some()); - assert!(WebTable::find_by_headers(HTML_TWO_TABLES, &headers).is_some()); + assert!(WebTable::find_by_headers(TABLE_TH, &headers, false).is_some()); + assert!(WebTable::find_by_headers(TABLE_TH_TD, &headers, false).is_some()); + assert!(WebTable::find_by_headers(HTML_TWO_TABLES, &headers, false).is_some()); } #[test]