use a default value of 100 for infer_schema, aligning with default polars behavior
This commit is contained in:
parent
e417bc7035
commit
e5ba96cd9d
|
@ -32,6 +32,8 @@ use polars_io::{
|
||||||
avro::AvroReader, csv::read::CsvReadOptions, prelude::ParallelStrategy, HiveOptions,
|
avro::AvroReader, csv::read::CsvReadOptions, prelude::ParallelStrategy, HiveOptions,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const DEFAULT_INFER_SCHEMA: usize = 100;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct OpenDataFrame;
|
pub struct OpenDataFrame;
|
||||||
|
|
||||||
|
@ -374,7 +376,9 @@ fn from_jsonl(
|
||||||
file_path: &Path,
|
file_path: &Path,
|
||||||
file_span: Span,
|
file_span: Span,
|
||||||
) -> Result<Value, ShellError> {
|
) -> Result<Value, ShellError> {
|
||||||
let infer_schema: Option<usize> = call.get_flag("infer-schema")?;
|
let infer_schema: usize = call
|
||||||
|
.get_flag("infer-schema")?
|
||||||
|
.unwrap_or(DEFAULT_INFER_SCHEMA);
|
||||||
let maybe_schema = call
|
let maybe_schema = call
|
||||||
.get_flag("schema")?
|
.get_flag("schema")?
|
||||||
.map(|schema| NuSchema::try_from(&schema))
|
.map(|schema| NuSchema::try_from(&schema))
|
||||||
|
@ -384,7 +388,7 @@ fn from_jsonl(
|
||||||
let start_time = std::time::Instant::now();
|
let start_time = std::time::Instant::now();
|
||||||
|
|
||||||
let df = LazyJsonLineReader::new(file_path)
|
let df = LazyJsonLineReader::new(file_path)
|
||||||
.with_infer_schema_length(infer_schema)
|
.with_infer_schema_length(Some(infer_schema))
|
||||||
.with_schema(maybe_schema.map(|s| s.into()))
|
.with_schema(maybe_schema.map(|s| s.into()))
|
||||||
.finish()
|
.finish()
|
||||||
.map_err(|e| ShellError::GenericError {
|
.map_err(|e| ShellError::GenericError {
|
||||||
|
@ -417,7 +421,7 @@ fn from_jsonl(
|
||||||
let buf_reader = BufReader::new(file);
|
let buf_reader = BufReader::new(file);
|
||||||
let reader = JsonReader::new(buf_reader)
|
let reader = JsonReader::new(buf_reader)
|
||||||
.with_json_format(JsonFormat::JsonLines)
|
.with_json_format(JsonFormat::JsonLines)
|
||||||
.infer_schema_len(infer_schema);
|
.infer_schema_len(Some(infer_schema));
|
||||||
|
|
||||||
let reader = match maybe_schema {
|
let reader = match maybe_schema {
|
||||||
Some(schema) => reader.with_schema(schema.into()),
|
Some(schema) => reader.with_schema(schema.into()),
|
||||||
|
@ -459,7 +463,9 @@ fn from_csv(
|
||||||
) -> Result<Value, ShellError> {
|
) -> Result<Value, ShellError> {
|
||||||
let delimiter: Option<Spanned<String>> = call.get_flag("delimiter")?;
|
let delimiter: Option<Spanned<String>> = call.get_flag("delimiter")?;
|
||||||
let no_header: bool = call.has_flag("no-header")?;
|
let no_header: bool = call.has_flag("no-header")?;
|
||||||
let infer_schema: Option<usize> = call.get_flag("infer-schema")?;
|
let infer_schema: usize = call
|
||||||
|
.get_flag("infer-schema")?
|
||||||
|
.unwrap_or(DEFAULT_INFER_SCHEMA);
|
||||||
let skip_rows: Option<usize> = call.get_flag("skip-rows")?;
|
let skip_rows: Option<usize> = call.get_flag("skip-rows")?;
|
||||||
let columns: Option<Vec<String>> = call.get_flag("columns")?;
|
let columns: Option<Vec<String>> = call.get_flag("columns")?;
|
||||||
|
|
||||||
|
@ -499,10 +505,7 @@ fn from_csv(
|
||||||
None => csv_reader,
|
None => csv_reader,
|
||||||
};
|
};
|
||||||
|
|
||||||
let csv_reader = match infer_schema {
|
let csv_reader = csv_reader.with_infer_schema_length(Some(infer_schema));
|
||||||
None => csv_reader,
|
|
||||||
Some(r) => csv_reader.with_infer_schema_length(Some(r)),
|
|
||||||
};
|
|
||||||
|
|
||||||
let csv_reader = match skip_rows {
|
let csv_reader = match skip_rows {
|
||||||
None => csv_reader,
|
None => csv_reader,
|
||||||
|
@ -535,7 +538,7 @@ fn from_csv(
|
||||||
let start_time = std::time::Instant::now();
|
let start_time = std::time::Instant::now();
|
||||||
let df = CsvReadOptions::default()
|
let df = CsvReadOptions::default()
|
||||||
.with_has_header(!no_header)
|
.with_has_header(!no_header)
|
||||||
.with_infer_schema_length(infer_schema)
|
.with_infer_schema_length(Some(infer_schema))
|
||||||
.with_skip_rows(skip_rows.unwrap_or_default())
|
.with_skip_rows(skip_rows.unwrap_or_default())
|
||||||
.with_schema(maybe_schema.map(|s| s.into()))
|
.with_schema(maybe_schema.map(|s| s.into()))
|
||||||
.with_columns(columns.map(Arc::new))
|
.with_columns(columns.map(Arc::new))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user