Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 7 additions & 16 deletions datafusion/functions/src/string/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

use std::sync::Arc;

use crate::strings::make_and_append_view;
use crate::strings::append_view;
use arrow::array::{
Array, ArrayRef, GenericStringArray, GenericStringBuilder, NullBufferBuilder,
OffsetSizeTrait, StringViewArray, StringViewBuilder, new_null_array,
Expand Down Expand Up @@ -152,13 +152,8 @@ fn string_view_trim<Tr: Trimmer>(args: &[ArrayRef]) -> Result<ArrayRef> {
{
if let Some(src_str) = src_str_opt {
let (trimmed, offset) = Tr::trim_ascii_char(src_str, b' ');
make_and_append_view(
&mut views_buf,
&mut null_builder,
raw_view,
trimmed,
offset,
);
append_view(&mut views_buf, raw_view, trimmed, offset);
null_builder.append_non_null();
} else {
null_builder.append_null();
views_buf.push(0);
Expand Down Expand Up @@ -204,13 +199,8 @@ fn string_view_trim<Tr: Trimmer>(args: &[ArrayRef]) -> Result<ArrayRef> {
pattern.clear();
pattern.extend(characters.chars());
let (trimmed, offset) = Tr::trim(src_str, &pattern);
make_and_append_view(
&mut views_buf,
&mut null_builder,
raw_view,
trimmed,
offset,
);
append_view(&mut views_buf, raw_view, trimmed, offset);
null_builder.append_non_null();
} else {
null_builder.append_null();
views_buf.push(0);
Expand Down Expand Up @@ -261,7 +251,8 @@ fn trim_and_append_view<Tr: Trimmer>(
) {
if let Some(src_str) = src_str_opt {
let (trimmed, offset) = Tr::trim(src_str, pattern);
make_and_append_view(views_buf, null_builder, original_view, trimmed, offset);
append_view(views_buf, original_view, trimmed, offset);
null_builder.append_non_null();
} else {
null_builder.append_null();
views_buf.push(0);
Expand Down
16 changes: 9 additions & 7 deletions datafusion/functions/src/strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use datafusion_common::{Result, exec_datafusion_err, internal_err};

use arrow::array::{
Array, ArrayAccessor, ArrayDataBuilder, BinaryArray, ByteView, LargeStringArray,
NullBufferBuilder, StringArray, StringViewArray, StringViewBuilder, make_view,
StringArray, StringViewArray, StringViewBuilder, make_view,
};
use arrow::buffer::{MutableBuffer, NullBuffer};
use arrow::datatypes::DataType;
Expand Down Expand Up @@ -372,7 +372,9 @@ impl LargeStringArrayBuilder {
}
}

/// Append a new view to the views buffer with the given substr
/// Append a new view to the views buffer with the given substr.
///
/// Callers are responsible for their own null tracking.
///
/// # Safety
///
Expand All @@ -381,13 +383,15 @@ impl LargeStringArrayBuilder {
///
/// # Arguments
/// - views_buffer: The buffer to append the new view to
/// - null_builder: The buffer to append the null value to
/// - original_view: The original view value
/// - substr: The substring to append. Must be a valid substring of the original view
/// - start_offset: The start offset of the substring in the view
pub fn make_and_append_view(
///
/// LLVM is apparently overly eager to inline this function into some hot loops,
/// which bloats them and regresses performance, so we disable inling for now.
#[inline(never)]
pub fn append_view(
views_buffer: &mut Vec<u128>,
null_builder: &mut NullBufferBuilder,
original_view: &u128,
substr: &str,
start_offset: u32,
Expand All @@ -401,11 +405,9 @@ pub fn make_and_append_view(
view.offset + start_offset,
)
} else {
// inline value does not need block id or offset
make_view(substr.as_bytes(), 0, 0)
};
views_buffer.push(sub_view);
null_builder.append_non_null();
}

#[derive(Debug)]
Expand Down
48 changes: 22 additions & 26 deletions datafusion/functions/src/unicode/substr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@

use std::sync::Arc;

use crate::strings::make_and_append_view;
use crate::strings::append_view;
use crate::utils::make_scalar_function;
use arrow::array::{
Array, ArrayRef, AsArray, Int64Array, NullBufferBuilder, StringArrayType,
StringViewArray, StringViewBuilder,
Array, ArrayRef, AsArray, Int64Array, StringArrayType, StringViewArray,
StringViewBuilder,
};
use arrow::buffer::ScalarBuffer;
use arrow::buffer::{NullBuffer, ScalarBuffer};
use arrow::datatypes::DataType;
use datafusion_common::cast::as_int64_array;
use datafusion_common::types::{
Expand Down Expand Up @@ -278,39 +278,32 @@ fn string_view_substr(
let enable_ascii_fast_path =
enable_ascii_fast_path(&string_view_array, start_array, count_array_opt);

// Combine null bitmaps from all inputs in bulk.
let nulls = NullBuffer::union(
NullBuffer::union(string_view_array.nulls(), start_array.nulls()).as_ref(),
count_array_opt.and_then(|a| a.nulls()),
);

let mut views_buf = Vec::with_capacity(string_view_array.len());
let mut null_builder = NullBufferBuilder::new(string_view_array.len());

for i in 0..string_view_array.len() {
if string_view_array.is_null(i)
|| start_array.is_null(i)
|| count_array_opt.map(|a| a.is_null(i)).unwrap_or(false)
{
null_builder.append_null();

for (i, raw_view) in string_view_array.views().iter().enumerate() {
if nulls.as_ref().is_some_and(|n| n.is_null(i)) {
views_buf.push(0);
continue;
}

let string = string_view_array.value(i);
let start = start_array.value(i);
let count = count_array_opt.map(|a| a.value(i));
let raw_view = string_view_array.views()[i];

let (start, end) =
get_true_start_end(string, start, count, enable_ascii_fast_path)?;
let substr = &string[start..end];

make_and_append_view(
&mut views_buf,
&mut null_builder,
&raw_view,
substr,
start as u32,
);
append_view(&mut views_buf, raw_view, substr, start as u32);
}

let views_buf = ScalarBuffer::from(views_buf);
let nulls_buf = null_builder.finish();

// Safety:
// (1) The blocks of the given views are all provided
Expand All @@ -320,7 +313,7 @@ fn string_view_substr(
let array = StringViewArray::new_unchecked(
views_buf,
string_view_array.data_buffers().to_vec(),
nulls_buf,
nulls,
);
Ok(Arc::new(array) as ArrayRef)
}
Expand All @@ -336,13 +329,16 @@ where
let enable_ascii_fast_path =
enable_ascii_fast_path(&string_array, start_array, count_array_opt);

// Combine null bitmaps from all inputs in bulk.
let nulls = NullBuffer::union(
NullBuffer::union(string_array.nulls(), start_array.nulls()).as_ref(),
count_array_opt.and_then(|a| a.nulls()),
);

let mut result_builder = StringViewBuilder::new();

for i in 0..string_array.len() {
if string_array.is_null(i)
|| start_array.is_null(i)
|| count_array_opt.map(|a| a.is_null(i)).unwrap_or(false)
{
if nulls.as_ref().is_some_and(|n| n.is_null(i)) {
result_builder.append_null();
continue;
}
Expand Down
Loading