diff --git a/arrow_util/src/bitset.rs b/arrow_util/src/bitset.rs index 71714ccbc9..24d7df404d 100644 --- a/arrow_util/src/bitset.rs +++ b/arrow_util/src/bitset.rs @@ -22,7 +22,7 @@ impl BitSet { } /// Creates a new BitSet with `count` unset bits. - pub fn with_capacity(count: usize) -> Self { + pub fn with_size(count: usize) -> Self { let mut bitset = Self::default(); bitset.append_unset(count); bitset diff --git a/read_buffer/src/column/string.rs b/read_buffer/src/column/string.rs index 1559b064a9..66d4ac884b 100644 --- a/read_buffer/src/column/string.rs +++ b/read_buffer/src/column/string.rs @@ -194,7 +194,8 @@ impl StringEncoding { } // create new ordinal offsets - the encoded values need to be shifted - // into a new domain [0, keys.len()). + // into a new domain `[0, ordinal_mapping.len())` which is the length + // of the new dictionary. let mut ordinal_mapping_keys = ordinal_mapping .keys() .into_iter() @@ -202,26 +203,26 @@ impl StringEncoding { .collect::>(); ordinal_mapping_keys.sort_unstable(); - for (i, key) in ordinal_mapping_keys.into_iter().enumerate() { + for (i, key) in ordinal_mapping_keys.iter().enumerate() { // now we can insert the new ordinal position of the encoded in key // in the final values vector. - ordinal_mapping.insert(key, i as u32); + ordinal_mapping.insert(*key, i as u32); } // Rewrite all the encoded values into the new domain. for key in keys.iter_mut() { - *key = *ordinal_mapping.get(id).unwrap(); + *key = *ordinal_mapping.get(key).unwrap(); } // now generate the values vector, which will contain the sorted set of // string values let mut values = match &self { - Self::RleDictionary(c) => ordinal_mapping - .keys() + Self::RleDictionary(c) => ordinal_mapping_keys + .iter() .map(|id| c.decode_id(*id)) .collect::>(), - Self::Dictionary(c) => ordinal_mapping - .keys() + Self::Dictionary(c) => ordinal_mapping_keys + .iter() .map(|id| c.decode_id(*id)) .collect::>(), }; @@ -680,6 +681,8 @@ mod test { fn _values_as_dictionary(enc: &StringEncoding) { // column is: [apple, apple, pear, NULL, NULL, orange, beta] + // Since the Read Buffer only accepts row IDs in order we only need to + // cover ascending rows in these tests. let cases = vec![ ( &[0, 3, 4][..], // apple NULL, NULL diff --git a/read_buffer/src/value.rs b/read_buffer/src/value.rs index 8929822b8b..9535ab7570 100644 --- a/read_buffer/src/value.rs +++ b/read_buffer/src/value.rs @@ -1511,7 +1511,7 @@ impl From> for arrow::array::ArrayRef { // on the null bitmap if there is at least one NULL // value. let null_bitmap = if matches!(values.first(), Some(None)) { - let mut bitset = BitSet::with_capacity(keys.len()); + let mut bitset = BitSet::with_size(keys.len()); for (i, v) in keys.iter_mut().enumerate() { if *v as usize != 0 { bitset.set(i); // valid value