Refactor window

This commit is contained in:
Ian Manske 2024-07-14 22:55:33 -07:00
parent 12838e6286
commit 5778a64ddf
2 changed files with 171 additions and 109 deletions

View File

@ -1,5 +1,6 @@
use nu_engine::command_prelude::*; use nu_engine::command_prelude::*;
use nu_protocol::ListStream; use nu_protocol::ListStream;
use std::num::NonZeroUsize;
#[derive(Clone)] #[derive(Clone)]
pub struct Chunks; pub struct Chunks;
@ -89,26 +90,33 @@ impl Command for Chunks {
span: chunk_size.span(), span: chunk_size.span(),
})?; })?;
if size == 0 { let size = NonZeroUsize::try_from(size).map_err(|_| ShellError::IncorrectValue {
return Err(ShellError::IncorrectValue {
msg: "`chunk_size` cannot be zero".into(), msg: "`chunk_size` cannot be zero".into(),
val_span: chunk_size.span(), val_span: chunk_size.span(),
call_span: head, call_span: head,
}); })?;
chunks(engine_state, input, size, head)
}
} }
pub fn chunks(
engine_state: &EngineState,
input: PipelineData,
chunk_size: NonZeroUsize,
span: Span,
) -> Result<PipelineData, ShellError> {
match input { match input {
PipelineData::Value(Value::List { vals, .. }, metadata) => { PipelineData::Value(Value::List { vals, .. }, metadata) => {
let chunks = ChunksIter::new(vals, size, head); let chunks = ChunksIter::new(vals, chunk_size, span);
let stream = ListStream::new(chunks, head, engine_state.signals().clone()); let stream = ListStream::new(chunks, span, engine_state.signals().clone());
Ok(PipelineData::ListStream(stream, metadata)) Ok(PipelineData::ListStream(stream, metadata))
} }
PipelineData::ListStream(stream, metadata) => { PipelineData::ListStream(stream, metadata) => {
let stream = stream.modify(|iter| ChunksIter::new(iter, size, head)); let stream = stream.modify(|iter| ChunksIter::new(iter, chunk_size, span));
Ok(PipelineData::ListStream(stream, metadata)) Ok(PipelineData::ListStream(stream, metadata))
} }
input => Err(input.unsupported_input_error("list", head)), input => Err(input.unsupported_input_error("list", span)),
}
} }
} }
@ -119,10 +127,10 @@ struct ChunksIter<I: Iterator<Item = Value>> {
} }
impl<I: Iterator<Item = Value>> ChunksIter<I> { impl<I: Iterator<Item = Value>> ChunksIter<I> {
fn new(iter: impl IntoIterator<IntoIter = I>, size: usize, span: Span) -> Self { fn new(iter: impl IntoIterator<IntoIter = I>, size: NonZeroUsize, span: Span) -> Self {
Self { Self {
iter: iter.into_iter(), iter: iter.into_iter(),
size, size: size.into(),
span, span,
} }
} }

View File

@ -1,5 +1,6 @@
use nu_engine::command_prelude::*; use nu_engine::command_prelude::*;
use nu_protocol::ValueIterator; use nu_protocol::ListStream;
use std::num::NonZeroUsize;
#[derive(Clone)] #[derive(Clone)]
pub struct Window; pub struct Window;
@ -12,8 +13,8 @@ impl Command for Window {
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build("window") Signature::build("window")
.input_output_types(vec![( .input_output_types(vec![(
Type::List(Box::new(Type::Any)), Type::list(Type::Any),
Type::List(Box::new(Type::List(Box::new(Type::Any)))), Type::list(Type::list(Type::Any)),
)]) )])
.required("window_size", SyntaxShape::Int, "The size of each window.") .required("window_size", SyntaxShape::Int, "The size of each window.")
.named( .named(
@ -77,118 +78,171 @@ impl Command for Window {
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let head = call.head; let head = call.head;
let group_size: Spanned<usize> = call.req(engine_state, stack, 0)?; let window_size: Value = call.req(engine_state, stack, 0)?;
let metadata = input.metadata(); let stride: Option<Value> = call.get_flag(engine_state, stack, "stride")?;
let stride: Option<usize> = call.get_flag(engine_state, stack, "stride")?;
let remainder = call.has_flag(engine_state, stack, "remainder")?; let remainder = call.has_flag(engine_state, stack, "remainder")?;
let stride = stride.unwrap_or(1); let size =
usize::try_from(window_size.as_int()?).map_err(|_| ShellError::NeedsPositiveValue {
span: window_size.span(),
})?;
//FIXME: add in support for external redirection when engine-q supports it generally let size = NonZeroUsize::try_from(size).map_err(|_| ShellError::IncorrectValue {
msg: "`window_size` cannot be zero".into(),
val_span: window_size.span(),
call_span: head,
})?;
let each_group_iterator = EachWindowIterator { let stride = if let Some(stride_val) = stride {
group_size: group_size.item, let stride = usize::try_from(stride_val.as_int()?).map_err(|_| {
input: Box::new(input.into_iter()), ShellError::NeedsPositiveValue {
span: head, span: stride_val.span(),
previous: None, }
stride, })?;
remainder,
NonZeroUsize::try_from(stride).map_err(|_| ShellError::IncorrectValue {
msg: "`stride` cannot be zero".into(),
val_span: stride_val.span(),
call_span: head,
})?
} else {
NonZeroUsize::MIN
}; };
Ok(each_group_iterator.into_pipeline_data_with_metadata( if remainder && size == stride {
head, super::chunks::chunks(engine_state, input, size, head)
engine_state.signals().clone(), } else if stride >= size {
metadata, match input {
)) PipelineData::Value(Value::List { vals, .. }, metadata) => {
let chunks = WindowGapIter::new(vals, size, stride, remainder, head);
let stream = ListStream::new(chunks, head, engine_state.signals().clone());
Ok(PipelineData::ListStream(stream, metadata))
}
PipelineData::ListStream(stream, metadata) => {
let stream = stream
.modify(|iter| WindowGapIter::new(iter, size, stride, remainder, head));
Ok(PipelineData::ListStream(stream, metadata))
}
input => Err(input.unsupported_input_error("list", head)),
}
} else {
match input {
PipelineData::Value(Value::List { vals, .. }, metadata) => {
let chunks = WindowOverlapIter::new(vals, size, stride, remainder, head);
let stream = ListStream::new(chunks, head, engine_state.signals().clone());
Ok(PipelineData::ListStream(stream, metadata))
}
PipelineData::ListStream(stream, metadata) => {
let stream = stream
.modify(|iter| WindowOverlapIter::new(iter, size, stride, remainder, head));
Ok(PipelineData::ListStream(stream, metadata))
}
input => Err(input.unsupported_input_error("list", head)),
}
}
} }
} }
struct EachWindowIterator { struct WindowOverlapIter<I: Iterator<Item = Value>> {
group_size: usize, iter: I,
input: ValueIterator, window: Vec<Value>,
span: Span,
previous: Option<Vec<Value>>,
stride: usize, stride: usize,
remainder: bool, remainder: bool,
span: Span,
} }
impl Iterator for EachWindowIterator { impl<I: Iterator<Item = Value>> WindowOverlapIter<I> {
fn new(
iter: impl IntoIterator<IntoIter = I>,
size: NonZeroUsize,
stride: NonZeroUsize,
remainder: bool,
span: Span,
) -> Self {
Self {
iter: iter.into_iter(),
window: Vec::with_capacity(size.into()),
stride: stride.into(),
remainder,
span,
}
}
}
impl<I: Iterator<Item = Value>> Iterator for WindowOverlapIter<I> {
type Item = Value; type Item = Value;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
let mut group = self.previous.take().unwrap_or_else(|| { let len = if self.window.is_empty() {
let mut vec = Vec::new(); self.window.capacity()
// We default to a Vec of capacity size + stride as striding pushes n extra elements to the end
vec.try_reserve(self.group_size + self.stride).ok();
vec
});
let mut current_count = 0;
if group.is_empty() {
loop {
let item = self.input.next();
match item {
Some(v) => {
group.push(v);
current_count += 1;
if current_count >= self.group_size {
break;
}
}
None => {
if self.remainder {
break;
} else { } else {
return None; self.stride
} };
}
} self.window.extend((&mut self.iter).take(len));
}
if self.window.len() == self.window.capacity()
|| (self.remainder && !self.window.is_empty())
{
let mut next = Vec::with_capacity(self.window.len());
next.extend(self.window.iter().skip(self.stride).cloned());
let window = std::mem::replace(&mut self.window, next);
Some(Value::list(window, self.span))
} else { } else {
// our historic buffer is already full, so stride instead None
loop {
let item = self.input.next();
match item {
Some(v) => {
group.push(v);
current_count += 1;
if current_count >= self.stride {
break;
} }
} }
None => { }
if self.remainder {
break; struct WindowGapIter<I: Iterator<Item = Value>> {
iter: I,
size: usize,
skip: usize,
first: bool,
remainder: bool,
span: Span,
}
impl<I: Iterator<Item = Value>> WindowGapIter<I> {
fn new(
iter: impl IntoIterator<IntoIter = I>,
size: NonZeroUsize,
stride: NonZeroUsize,
remainder: bool,
span: Span,
) -> Self {
let size = size.into();
Self {
iter: iter.into_iter(),
size,
skip: stride.get() - size,
first: true,
remainder,
span,
}
}
}
impl<I: Iterator<Item = Value>> Iterator for WindowGapIter<I> {
type Item = Value;
fn next(&mut self) -> Option<Self::Item> {
let mut window = Vec::with_capacity(self.size);
window.extend(
(&mut self.iter)
.skip(if self.first { 0 } else { self.skip })
.take(self.size),
);
self.first = false;
if window.len() == self.size || (self.remainder && !window.is_empty()) {
Some(Value::list(window, self.span))
} else { } else {
return None; None
} }
} }
} }
}
// We now have elements + stride in our group, and need to
// drop the skipped elements. Drain to preserve allocation and capacity
// Dropping this iterator consumes it.
group.drain(..self.stride.min(group.len()));
}
if group.is_empty() {
return None;
}
let return_group = group.clone();
self.previous = Some(group);
Some(Value::list(return_group, self.span))
}
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {