feat: arrow_util truncate and bitset append (#2879)
* feat: arrow_util truncate and bitset append * chore: check still mutable after truncate * chore: review feedback Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>pull/24376/head
parent
f5a84122e3
commit
1518f30da3
|
@ -35,8 +35,41 @@ impl BitSet {
|
|||
self.buffer.resize(new_buf_len, 0);
|
||||
}
|
||||
|
||||
/// Appends `count` set bits
|
||||
pub fn append_set(&mut self, count: usize) {
|
||||
let new_len = self.len + count;
|
||||
let new_buf_len = (new_len + 7) >> 3;
|
||||
|
||||
let skew = self.len & 7;
|
||||
if skew != 0 {
|
||||
*self.buffer.last_mut().unwrap() |= 0xFF << skew;
|
||||
}
|
||||
|
||||
self.buffer.resize(new_buf_len, 0xFF);
|
||||
|
||||
let rem = new_len & 7;
|
||||
if rem != 0 {
|
||||
*self.buffer.last_mut().unwrap() &= (1 << rem) - 1;
|
||||
}
|
||||
|
||||
self.len = new_len;
|
||||
}
|
||||
|
||||
/// Truncates the bitset to the provided length
|
||||
pub fn truncate(&mut self, len: usize) {
|
||||
let new_buf_len = (len + 7) >> 3;
|
||||
self.buffer.truncate(new_buf_len);
|
||||
let overrun = len & 7;
|
||||
if overrun > 0 {
|
||||
*self.buffer.last_mut().unwrap() &= (1 << overrun) - 1;
|
||||
}
|
||||
self.len = len;
|
||||
}
|
||||
|
||||
/// Appends `count` boolean values from the slice of packed bits
|
||||
pub fn append_bits(&mut self, count: usize, to_set: &[u8]) {
|
||||
assert_eq!((count + 7) >> 3, to_set.len());
|
||||
|
||||
let new_len = self.len + count;
|
||||
let new_buf_len = (new_len + 7) >> 3;
|
||||
self.buffer.reserve(new_buf_len - self.buffer.len());
|
||||
|
@ -252,6 +285,53 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_append_fuzz() {
|
||||
let mut mask = BitSet::new();
|
||||
let mut all_bools = vec![];
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
for _ in 0..100 {
|
||||
let len = (rng.next_u32() % 32) as usize;
|
||||
let set = rng.next_u32() & 1 == 0;
|
||||
|
||||
match set {
|
||||
true => mask.append_set(len),
|
||||
false => mask.append_unset(len),
|
||||
}
|
||||
|
||||
all_bools.extend(std::iter::repeat(set).take(len));
|
||||
|
||||
let collected = compact_bools(&all_bools);
|
||||
assert_eq!(mask.buffer, collected);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truncate_fuzz() {
|
||||
let mut mask = BitSet::new();
|
||||
let mut all_bools = vec![];
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
for _ in 0..100 {
|
||||
let mask_length = (rng.next_u32() % 32) as usize;
|
||||
let bools: Vec<_> = std::iter::from_fn(|| Some(rng.next_u32() & 1 == 0))
|
||||
.take(mask_length)
|
||||
.collect();
|
||||
|
||||
let collected = compact_bools(&bools);
|
||||
mask.append_bits(mask_length, &collected);
|
||||
all_bools.extend_from_slice(&bools);
|
||||
|
||||
let truncate = rng.next_u32() as usize % all_bools.len();
|
||||
mask.truncate(truncate);
|
||||
all_bools.truncate(truncate);
|
||||
|
||||
let collected = compact_bools(&all_bools);
|
||||
assert_eq!(mask.buffer, collected);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_arrow_compat() {
|
||||
let bools = &[
|
||||
|
|
|
@ -112,6 +112,19 @@ impl<K: AsPrimitive<usize> + FromPrimitive + Zero> StringDictionary<K> {
|
|||
pub fn into_inner(self) -> PackedStringArray<K> {
|
||||
self.storage
|
||||
}
|
||||
|
||||
/// Truncates this dictionary removing all keys larger than `id`
|
||||
pub fn truncate(&mut self, id: K) {
|
||||
let id = id.as_();
|
||||
self.dedup.retain(|k, _| k.as_() <= id);
|
||||
self.storage.truncate(id + 1)
|
||||
}
|
||||
|
||||
/// Clears this dictionary removing all elements
|
||||
pub fn clear(&mut self) {
|
||||
self.storage.clear();
|
||||
self.dedup.clear()
|
||||
}
|
||||
}
|
||||
|
||||
fn hash_str(hasher: &ahash::RandomState, value: &str) -> u64 {
|
||||
|
@ -255,4 +268,30 @@ mod test {
|
|||
let err = TryInto::<StringDictionary<_>>::try_into(data).expect_err("expected failure");
|
||||
assert!(matches!(err, Error::DuplicateKeyFound { key } if &key == "cupcakes"))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truncate() {
|
||||
let mut dictionary = StringDictionary::<i32>::new();
|
||||
dictionary.lookup_value_or_insert("cupcake");
|
||||
dictionary.lookup_value_or_insert("cupcake");
|
||||
dictionary.lookup_value_or_insert("bingo");
|
||||
let bingo = dictionary.lookup_value_or_insert("bingo");
|
||||
let bongo = dictionary.lookup_value_or_insert("bongo");
|
||||
dictionary.lookup_value_or_insert("bingo");
|
||||
dictionary.lookup_value_or_insert("cupcake");
|
||||
|
||||
dictionary.truncate(bingo);
|
||||
|
||||
assert_eq!(dictionary.values().len(), 2);
|
||||
assert_eq!(dictionary.dedup.len(), 2);
|
||||
|
||||
assert_eq!(dictionary.lookup_value("cupcake"), Some(0));
|
||||
assert_eq!(dictionary.lookup_value("bingo"), Some(1));
|
||||
|
||||
assert!(dictionary.lookup_value("bongo").is_none());
|
||||
assert!(dictionary.lookup_id(bongo).is_none());
|
||||
|
||||
dictionary.lookup_value_or_insert("bongo");
|
||||
assert_eq!(dictionary.lookup_value("bongo"), Some(2));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -76,6 +76,19 @@ impl<K: AsPrimitive<usize> + FromPrimitive + Zero> PackedStringArray<K> {
|
|||
self.offsets.resize(self.offsets.len() + len, offset);
|
||||
}
|
||||
|
||||
/// Truncates the array to the given length
|
||||
pub fn truncate(&mut self, len: usize) {
|
||||
self.offsets.truncate(len + 1);
|
||||
let last_idx = self.offsets.last().expect("offsets empty");
|
||||
self.storage.truncate(last_idx.as_());
|
||||
}
|
||||
|
||||
/// Removes all elements from this array
|
||||
pub fn clear(&mut self) {
|
||||
self.offsets.truncate(1);
|
||||
self.storage.clear();
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> PackedStringIterator<'_, K> {
|
||||
PackedStringIterator {
|
||||
array: self,
|
||||
|
@ -170,4 +183,22 @@ mod tests {
|
|||
assert_eq!(array.get(9).unwrap(), "");
|
||||
assert_eq!(array.get(3).unwrap(), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truncate() {
|
||||
let mut array = PackedStringArray::<i32>::new();
|
||||
|
||||
array.append("hello");
|
||||
array.append("world");
|
||||
array.append("cupcake");
|
||||
|
||||
array.truncate(1);
|
||||
assert_eq!(array.len(), 1);
|
||||
assert_eq!(array.get(0).unwrap(), "hello");
|
||||
|
||||
array.append("world");
|
||||
assert_eq!(array.len(), 2);
|
||||
assert_eq!(array.get(0).unwrap(), "hello");
|
||||
assert_eq!(array.get(1).unwrap(), "world");
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue