From c87bac04c0b0d42f2467106339b927ab7a3d01f2 Mon Sep 17 00:00:00 2001 From: Stefan Holderbach Date: Sat, 28 Oct 2023 15:18:41 +0200 Subject: [PATCH] Add common map-like API to `nu_protocol::Record` (#10841) # Description > Our `Record` looks like a map, quacks like a map, so let's treat it with the API for a map Implement common methods found on e.g. `std::collections::HashMap` or the insertion-ordered [indexmap](https://docs.rs/indexmap). This allows contributors to not have to worry about how to get to the relevant items and not mess up the assumptions of a Nushell record. ## Record assumptions - `cols` and `vals` are of equal length - for all practical purposes, keys/columns should be unique ## End goal The end goal of the upcoming series of PR's is to allow us to make `cols` and `vals` private. Then it would be possible to exchange the backing datastructure to best fit the expected workload. This could be statically (by finding the best balance) or dynamically by using an `enum` of potential representations. ## Parts - Add validating explicit part constructor `Record::from_raw_cols_vals()` - Add `Record.columns()` iterator - Add `Record.values()` iterator - Add consuming `Record.into_values()` iterator - Add `Record.contains()` helper - Add `Record.insert()` that respects existing keys - Add key-based `.get()`/`.get_mut()` to `Record` - Add `Record.get_index()` for index-based access - Implement `Extend` for `Record` naively - Use checked constructor in `record!` macro - Add `Record.index_of()` to get index by key # User-Facing Changes None directly # Developer facing changes You don't have to roll your own record handling and can use a familiar API # Tests + Formatting No explicit unit tests yet. Wouldn't be too tricky to validate core properties directly. Will be exercised by the following PRs using the new methods/traits/iterators. --- crates/nu-protocol/src/value/record.rs | 173 ++++++++++++++++++++++++- 1 file changed, 169 insertions(+), 4 deletions(-) diff --git a/crates/nu-protocol/src/value/record.rs b/crates/nu-protocol/src/value/record.rs index d28f3df2cf..b8592a3a5a 100644 --- a/crates/nu-protocol/src/value/record.rs +++ b/crates/nu-protocol/src/value/record.rs @@ -20,6 +20,16 @@ impl Record { } } + // Constructor that checks that `cols` and `vals` are of the same length. + // + // For perf reasons does not validate the rest of the record assumptions. + // - unique keys + pub fn from_raw_cols_vals(cols: Vec, vals: Vec) -> Self { + assert_eq!(cols.len(), vals.len()); + + Self { cols, vals } + } + pub fn iter(&self) -> Iter { self.into_iter() } @@ -36,10 +46,71 @@ impl Record { usize::min(self.cols.len(), self.vals.len()) } + /// Naive push to the end of the datastructure. + /// + /// May duplicate data! + /// + /// Consider to use [`Record::insert`] instead pub fn push(&mut self, col: impl Into, val: Value) { self.cols.push(col.into()); self.vals.push(val); } + + /// Insert into the record, replacing preexisting value if found. + /// + /// Returns `Some(previous_value)` if found. Else `None` + pub fn insert(&mut self, col: K, val: Value) -> Option + where + K: AsRef + Into, + { + if let Some(idx) = self.index_of(&col) { + // Can panic if vals.len() < cols.len() + let curr_val = &mut self.vals[idx]; + Some(std::mem::replace(curr_val, val)) + } else { + self.cols.push(col.into()); + self.vals.push(val); + None + } + } + + pub fn contains(&self, col: impl AsRef) -> bool { + self.cols.iter().any(|k| k == col.as_ref()) + } + + pub fn index_of(&self, col: impl AsRef) -> Option { + self.columns().position(|k| k == col.as_ref()) + } + + pub fn get(&self, col: impl AsRef) -> Option<&Value> { + self.index_of(col).and_then(|idx| self.vals.get(idx)) + } + + pub fn get_mut(&mut self, col: impl AsRef) -> Option<&mut Value> { + self.index_of(col).and_then(|idx| self.vals.get_mut(idx)) + } + + pub fn get_index(&self, idx: usize) -> Option<(&String, &Value)> { + Some((self.cols.get(idx)?, self.vals.get(idx)?)) + } + + pub fn columns(&self) -> Columns { + Columns { + iter: self.cols.iter(), + } + } + + pub fn values(&self) -> Values { + Values { + iter: self.vals.iter(), + } + } + + pub fn into_values(self) -> IntoValues { + IntoValues { + iter: self.vals.into_iter(), + } + } } impl FromIterator<(String, Value)> for Record { @@ -49,6 +120,16 @@ impl FromIterator<(String, Value)> for Record { } } +impl Extend<(String, Value)> for Record { + fn extend>(&mut self, iter: T) { + for (k, v) in iter { + // TODO: should this .insert with a check? + self.cols.push(k); + self.vals.push(v); + } + } +} + pub type IntoIter = std::iter::Zip, std::vec::IntoIter>; impl IntoIterator for Record { @@ -85,13 +166,97 @@ impl<'a> IntoIterator for &'a mut Record { } } +pub struct Columns<'a> { + iter: std::slice::Iter<'a, String>, +} + +impl<'a> Iterator for Columns<'a> { + type Item = &'a String; + + fn next(&mut self) -> Option { + self.iter.next() + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +impl<'a> DoubleEndedIterator for Columns<'a> { + fn next_back(&mut self) -> Option { + self.iter.next_back() + } +} + +impl<'a> ExactSizeIterator for Columns<'a> { + fn len(&self) -> usize { + self.iter.len() + } +} + +pub struct Values<'a> { + iter: std::slice::Iter<'a, Value>, +} + +impl<'a> Iterator for Values<'a> { + type Item = &'a Value; + + fn next(&mut self) -> Option { + self.iter.next() + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +impl<'a> DoubleEndedIterator for Values<'a> { + fn next_back(&mut self) -> Option { + self.iter.next_back() + } +} + +impl<'a> ExactSizeIterator for Values<'a> { + fn len(&self) -> usize { + self.iter.len() + } +} + +pub struct IntoValues { + iter: std::vec::IntoIter, +} + +impl Iterator for IntoValues { + type Item = Value; + + fn next(&mut self) -> Option { + self.iter.next() + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +impl DoubleEndedIterator for IntoValues { + fn next_back(&mut self) -> Option { + self.iter.next_back() + } +} + +impl ExactSizeIterator for IntoValues { + fn len(&self) -> usize { + self.iter.len() + } +} + #[macro_export] macro_rules! record { {$($col:expr => $val:expr),+ $(,)?} => { - $crate::Record { - cols: vec![$($col.into(),)+], - vals: vec![$($val,)+] - } + $crate::Record::from_raw_cols_vals ( + vec![$($col.into(),)+], + vec![$($val,)+] + ) }; {} => { $crate::Record::new()