--- a PPN by Garber Painting Akron. With Image Size Reduction included!URL: http://github.com/apache/iceberg-cpp/pull/595.patch
_t RoaringPositionBitmap::Cardinality() const {
+ size_t total = 0;
+ for (const auto& bitmap : impl_->bitmaps) {
+ total += bitmap.cardinality();
+ }
+ return total;
+}
+
+void RoaringPositionBitmap::Or(const RoaringPositionBitmap& other) {
+ impl_->AllocateBitmapsIfNeeded(static_cast(other.impl_->bitmaps.size()));
+ for (size_t key = 0; key < other.impl_->bitmaps.size(); ++key) {
+ impl_->bitmaps[key] |= other.impl_->bitmaps[key];
+ }
+}
+
+bool RoaringPositionBitmap::Optimize() {
+ bool changed = false;
+ for (auto& bitmap : impl_->bitmaps) {
+ changed |= bitmap.runOptimize();
+ }
+ return changed;
+}
+
+void RoaringPositionBitmap::ForEach(const std::function& fn) const {
+ for (size_t key = 0; key < impl_->bitmaps.size(); ++key) {
+ for (uint32_t pos32 : impl_->bitmaps[key]) {
+ fn(ToPosition(static_cast(key), pos32));
+ }
+ }
+}
+
+size_t RoaringPositionBitmap::SerializedSizeInBytes() const {
+ size_t size = kBitmapCountSizeBytes;
+ for (const auto& bitmap : impl_->bitmaps) {
+ size += kBitmapKeySizeBytes + bitmap.getSizeInBytes(/*portable=*/true);
+ }
+ return size;
+}
+
+Result RoaringPositionBitmap::Serialize() const {
+ size_t size = SerializedSizeInBytes();
+ std::string result(size, '\0');
+ char* buf = result.data();
+
+ // Write bitmap count (array length including empties)
+ WriteLE64(buf, static_cast(impl_->bitmaps.size()));
+ buf += kBitmapCountSizeBytes;
+
+ // Write each bitmap with its key
+ for (int32_t key = 0; key < static_cast(impl_->bitmaps.size()); ++key) {
+ WriteLE32(buf, key);
+ buf += kBitmapKeySizeBytes;
+ size_t written = impl_->bitmaps[key].write(buf, /*portable=*/true);
+ buf += written;
+ }
+
+ return result;
+}
+
+Result RoaringPositionBitmap::Deserialize(std::string_view bytes) {
+ const char* buf = bytes.data();
+ size_t remaining = bytes.size();
+
+ ICEBERG_PRECHECK(remaining >= kBitmapCountSizeBytes,
+ "Buffer too small for bitmap count: {} bytes", remaining);
+
+ int64_t bitmap_count = ReadLE64(buf);
+ buf += kBitmapCountSizeBytes;
+ remaining -= kBitmapCountSizeBytes;
+
+ ICEBERG_PRECHECK(
+ bitmap_count >= 0 && bitmap_count <= std::numeric_limits::max(),
+ "Invalid bitmap count: {}", bitmap_count);
+
+ auto impl = std::make_unique();
+ int32_t last_key = -1;
+ auto remaining_count = static_cast(bitmap_count);
+
+ while (remaining_count > 0) {
+ ICEBERG_PRECHECK(remaining >= kBitmapKeySizeBytes,
+ "Buffer too small for bitmap key: {} bytes", remaining);
+
+ int32_t key = ReadLE32(buf);
+ buf += kBitmapKeySizeBytes;
+ remaining -= kBitmapKeySizeBytes;
+
+ ICEBERG_PRECHECK(key >= 0, "Invalid unsigned key: {}", key);
+ ICEBERG_PRECHECK(key < std::numeric_limits::max(), "Key is too large: {}",
+ key);
+ ICEBERG_PRECHECK(key > last_key,
+ "Keys must be sorted in ascending order, got key {} after {}", key,
+ last_key);
+
+ // Fill gaps with empty bitmaps
+ while (last_key < key - 1) {
+ impl->bitmaps.emplace_back();
+ ++last_key;
+ }
+
+ // Read bitmap using portable safe deserialization.
+ // CRoaring's readSafe may throw on corrupted data.
+ roaring::Roaring bitmap;
+ try {
+ bitmap = roaring::Roaring::readSafe(buf, remaining);
+ } catch (const std::exception& e) {
+ return InvalidArgument("Failed to deserialize bitmap at key {}: {}", key, e.what());
+ }
+ size_t bitmap_size = bitmap.getSizeInBytes(/*portable=*/true);
+ ICEBERG_PRECHECK(
+ bitmap_size <= remaining,
+ "Buffer too small for bitmap key {}: {} bytes needed, {} bytes available", key,
+ bitmap_size, remaining);
+ buf += bitmap_size;
+ remaining -= bitmap_size;
+
+ impl->bitmaps.push_back(std::move(bitmap));
+ last_key = key;
+ --remaining_count;
+ }
+
+ return RoaringPositionBitmap(std::move(impl));
+}
+
+} // namespace iceberg
diff --git a/src/iceberg/deletes/roaring_position_bitmap.h b/src/iceberg/deletes/roaring_position_bitmap.h
new file mode 100644
index 000000000..df197740b
--- /dev/null
+++ b/src/iceberg/deletes/roaring_position_bitmap.h
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+//github.com/ \file iceberg/deletes/roaring_position_bitmap.h
+//github.com/ A 64-bit position bitmap using an array of 32-bit Roaring bitmaps.
+
+#include
+#include
+#include
+#include
+#include
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/result.h"
+
+namespace iceberg {
+
+//github.com/ \brief A bitmap that supports positive 64-bit positions, optimized
+//github.com/ for cases where most positions fit in 32 bits.
+//github.com/
+//github.com/ Incoming 64-bit positions are divided into a 32-bit "key" using the
+//github.com/ most significant 4 bytes and a 32-bit position using the least
+//github.com/ significant 4 bytes. For each key, a 32-bit Roaring bitmap is
+//github.com/ maintained to store positions for that key.
+//github.com/
+//github.com/ \note The Puffin deletion-vector-v1 wrapping (length prefix, magic
+//github.com/ bytes, CRC-32) is handled by the Puffin writer/reader layer, not
+//github.com/ this class.
+class ICEBERG_EXPORT RoaringPositionBitmap {
+ public:
+ //github.com/ \brief Maximum supported position.
+ static constexpr int64_t kMaxPosition = 0x7FFFFFFE80000000LL;
+
+ RoaringPositionBitmap();
+ ~RoaringPositionBitmap();
+
+ RoaringPositionBitmap(RoaringPositionBitmap&& other) noexcept;
+ RoaringPositionBitmap& operator=(RoaringPositionBitmap&& other) noexcept;
+
+ RoaringPositionBitmap(const RoaringPositionBitmap&) = delete;
+ RoaringPositionBitmap& operator=(const RoaringPositionBitmap&) = delete;
+
+ //github.com/ \brief Sets a position in the bitmap.
+ //github.com/ \param pos the position (must be >= 0 and <= kMaxPosition)
+ //github.com/ \return Status indicating success or InvalidArgument error
+ [[nodiscard]] Status Add(int64_t pos);
+
+ //github.com/ \brief Sets a range of positions [pos_start, pos_end).
+ //github.com/ \return Status indicating success or InvalidArgument error
+ [[nodiscard]] Status AddRange(int64_t pos_start, int64_t pos_end);
+
+ //github.com/ \brief Checks if a position is set in the bitmap.
+ //github.com/ \return Result or InvalidArgument error
+ [[nodiscard]] Result Contains(int64_t pos) const;
+
+ //github.com/ \brief Returns true if the bitmap has no positions set.
+ bool IsEmpty() const;
+
+ //github.com/ \brief Returns the number of set positions in the bitmap.
+ size_t Cardinality() const;
+
+ //github.com/ \brief Merges all positions from the other bitmap into this one
+ //github.com/ (in-place union).
+ void Or(const RoaringPositionBitmap& other);
+
+ //github.com/ \brief Optimizes the bitmap by applying run-length encoding to
+ //github.com/ containers where it is more space efficient than array or bitset
+ //github.com/ representations.
+ //github.com/ \return true if any container was changed
+ bool Optimize();
+
+ //github.com/ \brief Iterates over all set positions in ascending order.
+ void ForEach(const std::function& fn) const;
+
+ //github.com/ \brief Returns the serialized size in bytes.
+ size_t SerializedSizeInBytes() const;
+
+ //github.com/ \brief Serializes using the portable format (little-endian).
+ Result Serialize() const;
+
+ //github.com/ \brief Deserializes a bitmap from bytes.
+ static Result Deserialize(std::string_view bytes);
+
+ private:
+ struct Impl;
+ std::unique_ptr impl_;
+
+ explicit RoaringPositionBitmap(std::unique_ptr impl);
+};
+
+} // namespace iceberg
diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build
index 2cf1065b0..42f46d5a9 100644
--- a/src/iceberg/meson.build
+++ b/src/iceberg/meson.build
@@ -43,6 +43,7 @@ iceberg_sources = files(
'arrow_c_data_guard_internal.cc',
'catalog/memory/in_memory_catalog.cc',
'delete_file_index.cc',
+ 'deletes/roaring_position_bitmap.cc',
'expression/aggregate.cc',
'expression/binder.cc',
'expression/evaluator.cc',
@@ -221,6 +222,7 @@ install_headers(
)
subdir('catalog')
+subdir('deletes')
subdir('expression')
subdir('manifest')
subdir('puffin')
diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt
index 768e0507e..995579b20 100644
--- a/src/iceberg/test/CMakeLists.txt
+++ b/src/iceberg/test/CMakeLists.txt
@@ -124,6 +124,8 @@ add_iceberg_test(util_test
add_iceberg_test(roaring_test SOURCES roaring_test.cc)
+add_iceberg_test(roaring_position_bitmap_test SOURCES roaring_position_bitmap_test.cc)
+
if(ICEBERG_BUILD_BUNDLE)
add_iceberg_test(avro_test
USE_BUNDLE
diff --git a/src/iceberg/test/resources/64map32bitvals.bin b/src/iceberg/test/resources/64map32bitvals.bin
new file mode 100644
index 0000000000000000000000000000000000000000..475b894417e44cff61d8810057fc1530cef05718
GIT binary patch
literal 48
ocmZQ%KmaQP1_nkjmy9
literal 0
HcmV?d00001
diff --git a/src/iceberg/test/resources/64maphighvals.bin b/src/iceberg/test/resources/64maphighvals.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d4312b8d22713991026a36d5d1293cf1960d89ed
GIT binary patch
literal 1086
zcmd;PfPnY=_rj5t0RsagP#7Y>#UKD@!S*SERgUMnOxf@r{zi~~v
PF5QrBO1Grj(uH&%!J7vn
literal 0
HcmV?d00001
diff --git a/src/iceberg/test/roaring_position_bitmap_test.cc b/src/iceberg/test/roaring_position_bitmap_test.cc
new file mode 100644
index 000000000..e3428784e
--- /dev/null
+++ b/src/iceberg/test/roaring_position_bitmap_test.cc
@@ -0,0 +1,490 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/deletes/roaring_position_bitmap.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+#include "iceberg/test/matchers.h"
+#include "iceberg/test/test_config.h"
+
+namespace iceberg {
+
+namespace {
+
+constexpr int64_t kBitmapSize = 0xFFFFFFFFL;
+constexpr int64_t kBitmapOffset = kBitmapSize + 1L;
+constexpr int64_t kContainerSize = 0xFFFF; // Character.MAX_VALUE
+constexpr int64_t kContainerOffset = kContainerSize + 1L;
+
+// Helper to construct a position from bitmap index, container index, and value
+int64_t Position(int32_t bitmap_index, int32_t container_index, int64_t value) {
+ return bitmap_index * kBitmapOffset + container_index * kContainerOffset + value;
+}
+
+std::string ReadTestResource(const std::string& filename) {
+ std::filesystem::path path = std::filesystem::path(ICEBERG_TEST_RESOURCES) / filename;
+ std::ifstream file(path, std::ios::binary);
+ EXPECT_TRUE(file.good()) << "Cannot open: " << path;
+ return {std::istreambuf_iterator(file), std::istreambuf_iterator()};
+}
+
+RoaringPositionBitmap RoundTripSerialize(const RoaringPositionBitmap& bitmap) {
+ auto serialized = bitmap.Serialize();
+ EXPECT_THAT(serialized, IsOk());
+ auto deserialized = RoaringPositionBitmap::Deserialize(serialized.value());
+ EXPECT_THAT(deserialized, IsOk());
+ return std::move(deserialized.value());
+}
+
+void AssertEqualContent(const RoaringPositionBitmap& bitmap,
+ const std::set& positions) {
+ ASSERT_EQ(bitmap.Cardinality(), positions.size());
+ for (int64_t pos : positions) {
+ auto result = bitmap.Contains(pos);
+ ASSERT_THAT(result, IsOk()) << "Error for pos: " << pos;
+ ASSERT_TRUE(result.value()) << "Missing position: " << pos;
+ }
+ bitmap.ForEach([&](int64_t pos) {
+ ASSERT_TRUE(positions.count(pos) > 0) << "Unexpected position: " << pos;
+ });
+}
+
+void AssertEqual(RoaringPositionBitmap& bitmap, const std::set& positions) {
+ AssertEqualContent(bitmap, positions);
+
+ auto copy1 = RoundTripSerialize(bitmap);
+ AssertEqualContent(copy1, positions);
+
+ bitmap.Optimize();
+ auto copy2 = RoundTripSerialize(bitmap);
+ AssertEqualContent(copy2, positions);
+}
+
+} // namespace
+
+TEST(RoaringPositionBitmapTest, TestAdd) {
+ RoaringPositionBitmap bitmap;
+
+ EXPECT_THAT(bitmap.Add(10L), IsOk());
+ EXPECT_TRUE(bitmap.Contains(10L).value());
+
+ EXPECT_THAT(bitmap.Add(0L), IsOk());
+ EXPECT_TRUE(bitmap.Contains(0L).value());
+
+ EXPECT_THAT(bitmap.Add(10L), IsOk()); // duplicate
+ EXPECT_TRUE(bitmap.Contains(10L).value());
+}
+
+TEST(RoaringPositionBitmapTest, TestAddPositionsRequiringMultipleBitmaps) {
+ RoaringPositionBitmap bitmap;
+
+ int64_t pos1 = (static_cast(0) << 32) | 10L;
+ int64_t pos2 = (static_cast(1) << 32) | 20L;
+ int64_t pos3 = (static_cast(2) << 32) | 30L;
+ int64_t pos4 = (static_cast(100) << 32) | 40L;
+
+ EXPECT_THAT(bitmap.Add(pos1), IsOk());
+ EXPECT_THAT(bitmap.Add(pos2), IsOk());
+ EXPECT_THAT(bitmap.Add(pos3), IsOk());
+ EXPECT_THAT(bitmap.Add(pos4), IsOk());
+
+ EXPECT_TRUE(bitmap.Contains(pos1).value());
+ EXPECT_TRUE(bitmap.Contains(pos2).value());
+ EXPECT_TRUE(bitmap.Contains(pos3).value());
+ EXPECT_TRUE(bitmap.Contains(pos4).value());
+ EXPECT_EQ(bitmap.Cardinality(), 4);
+ EXPECT_GT(bitmap.SerializedSizeInBytes(), 4);
+}
+
+TEST(RoaringPositionBitmapTest, TestAddRange) {
+ RoaringPositionBitmap bitmap;
+
+ int64_t start = 10;
+ int64_t end = 20;
+ EXPECT_THAT(bitmap.AddRange(start, end), IsOk());
+
+ for (int64_t pos = start; pos < end; ++pos) {
+ EXPECT_TRUE(bitmap.Contains(pos).value());
+ }
+ EXPECT_FALSE(bitmap.Contains(9).value());
+ EXPECT_FALSE(bitmap.Contains(20).value());
+ EXPECT_EQ(bitmap.Cardinality(), 10);
+}
+
+TEST(RoaringPositionBitmapTest, TestAddRangeAcrossKeys) {
+ RoaringPositionBitmap bitmap;
+
+ int64_t start = (static_cast(1) << 32) - 5;
+ int64_t end = (static_cast(1) << 32) + 5;
+ EXPECT_THAT(bitmap.AddRange(start, end), IsOk());
+
+ for (int64_t pos = start; pos < end; ++pos) {
+ EXPECT_TRUE(bitmap.Contains(pos).value());
+ }
+ EXPECT_FALSE(bitmap.Contains(0).value());
+ EXPECT_FALSE(bitmap.Contains(end).value());
+ EXPECT_EQ(bitmap.Cardinality(), 10);
+}
+
+TEST(RoaringPositionBitmapTest, TestAddEmptyRange) {
+ RoaringPositionBitmap bitmap;
+ EXPECT_THAT(bitmap.AddRange(10, 10), IsOk());
+ EXPECT_TRUE(bitmap.IsEmpty());
+}
+
+TEST(RoaringPositionBitmapTest, TestOr) {
+ RoaringPositionBitmap bitmap1;
+ EXPECT_THAT(bitmap1.Add(10L), IsOk());
+ EXPECT_THAT(bitmap1.Add(20L), IsOk());
+
+ RoaringPositionBitmap bitmap2;
+ EXPECT_THAT(bitmap2.Add(30L), IsOk());
+ EXPECT_THAT(bitmap2.Add(40L), IsOk());
+ EXPECT_THAT(bitmap2.Add(static_cast(2) << 32), IsOk());
+
+ bitmap1.Or(bitmap2);
+
+ EXPECT_TRUE(bitmap1.Contains(10L).value());
+ EXPECT_TRUE(bitmap1.Contains(20L).value());
+ EXPECT_TRUE(bitmap1.Contains(30L).value());
+ EXPECT_TRUE(bitmap1.Contains(40L).value());
+ EXPECT_TRUE(bitmap1.Contains(static_cast(2) << 32).value());
+ EXPECT_EQ(bitmap1.Cardinality(), 5);
+
+ // bitmap2 should be unchanged
+ EXPECT_FALSE(bitmap2.Contains(10L).value());
+ EXPECT_FALSE(bitmap2.Contains(20L).value());
+ EXPECT_EQ(bitmap2.Cardinality(), 3);
+}
+
+TEST(RoaringPositionBitmapTest, TestOrWithEmptyBitmap) {
+ RoaringPositionBitmap bitmap1;
+ EXPECT_THAT(bitmap1.Add(10L), IsOk());
+ EXPECT_THAT(bitmap1.Add(20L), IsOk());
+
+ RoaringPositionBitmap empty_bitmap;
+ bitmap1.Or(empty_bitmap);
+
+ EXPECT_TRUE(bitmap1.Contains(10L).value());
+ EXPECT_TRUE(bitmap1.Contains(20L).value());
+ EXPECT_EQ(bitmap1.Cardinality(), 2);
+
+ EXPECT_FALSE(empty_bitmap.Contains(10L).value());
+ EXPECT_FALSE(empty_bitmap.Contains(20L).value());
+ EXPECT_EQ(empty_bitmap.Cardinality(), 0);
+ EXPECT_TRUE(empty_bitmap.IsEmpty());
+}
+
+TEST(RoaringPositionBitmapTest, TestOrWithOverlapping) {
+ RoaringPositionBitmap bitmap1;
+ EXPECT_THAT(bitmap1.Add(10L), IsOk());
+ EXPECT_THAT(bitmap1.Add(20L), IsOk());
+ EXPECT_THAT(bitmap1.Add(30L), IsOk());
+
+ RoaringPositionBitmap bitmap2;
+ EXPECT_THAT(bitmap2.Add(20L), IsOk());
+ EXPECT_THAT(bitmap2.Add(40L), IsOk());
+
+ bitmap1.Or(bitmap2);
+
+ EXPECT_TRUE(bitmap1.Contains(10L).value());
+ EXPECT_TRUE(bitmap1.Contains(20L).value());
+ EXPECT_TRUE(bitmap1.Contains(30L).value());
+ EXPECT_TRUE(bitmap1.Contains(40L).value());
+ EXPECT_EQ(bitmap1.Cardinality(), 4);
+
+ EXPECT_FALSE(bitmap2.Contains(10L).value());
+ EXPECT_TRUE(bitmap2.Contains(20L).value());
+ EXPECT_FALSE(bitmap2.Contains(30L).value());
+ EXPECT_TRUE(bitmap2.Contains(40L).value());
+ EXPECT_EQ(bitmap2.Cardinality(), 2);
+}
+
+TEST(RoaringPositionBitmapTest, TestOrSparseBitmaps) {
+ RoaringPositionBitmap bitmap1;
+ EXPECT_THAT(bitmap1.Add((static_cast(0) << 32) | 100L), IsOk());
+ EXPECT_THAT(bitmap1.Add((static_cast(1) << 32) | 200L), IsOk());
+
+ RoaringPositionBitmap bitmap2;
+ EXPECT_THAT(bitmap2.Add((static_cast(2) << 32) | 300L), IsOk());
+ EXPECT_THAT(bitmap2.Add((static_cast(3) << 32) | 400L), IsOk());
+
+ bitmap1.Or(bitmap2);
+
+ EXPECT_TRUE(bitmap1.Contains((static_cast(0) << 32) | 100L).value());
+ EXPECT_TRUE(bitmap1.Contains((static_cast(1) << 32) | 200L).value());
+ EXPECT_TRUE(bitmap1.Contains((static_cast(2) << 32) | 300L).value());
+ EXPECT_TRUE(bitmap1.Contains((static_cast(3) << 32) | 400L).value());
+ EXPECT_EQ(bitmap1.Cardinality(), 4);
+}
+
+TEST(RoaringPositionBitmapTest, TestCardinality) {
+ RoaringPositionBitmap bitmap;
+
+ EXPECT_EQ(bitmap.Cardinality(), 0);
+
+ EXPECT_THAT(bitmap.Add(10L), IsOk());
+ EXPECT_THAT(bitmap.Add(20L), IsOk());
+ EXPECT_THAT(bitmap.Add(30L), IsOk());
+ EXPECT_EQ(bitmap.Cardinality(), 3);
+
+ EXPECT_THAT(bitmap.Add(10L), IsOk()); // already exists
+ EXPECT_EQ(bitmap.Cardinality(), 3);
+}
+
+TEST(RoaringPositionBitmapTest, TestCardinalitySparseBitmaps) {
+ RoaringPositionBitmap bitmap;
+
+ EXPECT_THAT(bitmap.Add((static_cast(0) << 32) | 100L), IsOk());
+ EXPECT_THAT(bitmap.Add((static_cast(0) << 32) | 101L), IsOk());
+ EXPECT_THAT(bitmap.Add((static_cast(0) << 32) | 105L), IsOk());
+ EXPECT_THAT(bitmap.Add((static_cast(1) << 32) | 200L), IsOk());
+ EXPECT_THAT(bitmap.Add((static_cast(100) << 32) | 300L), IsOk());
+
+ EXPECT_EQ(bitmap.Cardinality(), 5);
+}
+
+TEST(RoaringPositionBitmapTest, TestSerializeDeserializeRoundTrip) {
+ RoaringPositionBitmap bitmap;
+ EXPECT_THAT(bitmap.Add(10L), IsOk());
+ EXPECT_THAT(bitmap.Add(20L), IsOk());
+ EXPECT_THAT(bitmap.Add((static_cast(1) << 32) | 30L), IsOk());
+
+ auto copy = RoundTripSerialize(bitmap);
+
+ EXPECT_EQ(copy.Cardinality(), bitmap.Cardinality());
+ EXPECT_TRUE(copy.Contains(10L).value());
+ EXPECT_TRUE(copy.Contains(20L).value());
+ EXPECT_TRUE(copy.Contains((static_cast(1) << 32) | 30L).value());
+}
+
+TEST(RoaringPositionBitmapTest, TestSerializeDeserializeEmpty) {
+ RoaringPositionBitmap bitmap;
+ auto copy = RoundTripSerialize(bitmap);
+ EXPECT_TRUE(copy.IsEmpty());
+ EXPECT_EQ(copy.Cardinality(), 0);
+}
+
+TEST(RoaringPositionBitmapTest, TestSerializeDeserializeAllContainerBitmap) {
+ RoaringPositionBitmap bitmap;
+
+ // bitmap 0, container 0 (array - few elements)
+ EXPECT_THAT(bitmap.Add(Position(0, 0, 5)), IsOk());
+ EXPECT_THAT(bitmap.Add(Position(0, 0, 7)), IsOk());
+
+ // bitmap 0, container 1 (array that can be compressed)
+ EXPECT_THAT(bitmap.AddRange(Position(0, 1, 1), Position(0, 1, 1000)), IsOk());
+
+ // bitmap 0, container 2 (bitset - nearly full container)
+ EXPECT_THAT(bitmap.AddRange(Position(0, 2, 1), Position(0, 2, kContainerOffset - 1)),
+ IsOk());
+
+ // bitmap 1, container 0 (array)
+ EXPECT_THAT(bitmap.Add(Position(1, 0, 10)), IsOk());
+ EXPECT_THAT(bitmap.Add(Position(1, 0, 20)), IsOk());
+
+ // bitmap 1, container 1 (array that can be compressed)
+ EXPECT_THAT(bitmap.AddRange(Position(1, 1, 10), Position(1, 1, 500)), IsOk());
+
+ // bitmap 1, container 2 (bitset)
+ EXPECT_THAT(bitmap.AddRange(Position(1, 2, 1), Position(1, 2, kContainerOffset - 1)),
+ IsOk());
+
+ EXPECT_TRUE(bitmap.Optimize());
+
+ auto copy = RoundTripSerialize(bitmap);
+
+ EXPECT_EQ(copy.Cardinality(), bitmap.Cardinality());
+ copy.ForEach([&](int64_t pos) { EXPECT_TRUE(bitmap.Contains(pos).value()); });
+ bitmap.ForEach([&](int64_t pos) { EXPECT_TRUE(copy.Contains(pos).value()); });
+}
+
+TEST(RoaringPositionBitmapTest, TestForEach) {
+ RoaringPositionBitmap bitmap;
+ EXPECT_THAT(bitmap.Add(30L), IsOk());
+ EXPECT_THAT(bitmap.Add(10L), IsOk());
+ EXPECT_THAT(bitmap.Add(20L), IsOk());
+ EXPECT_THAT(bitmap.Add((static_cast(1) << 32) | 5L), IsOk());
+
+ std::vector positions;
+ bitmap.ForEach([&](int64_t pos) { positions.push_back(pos); });
+
+ ASSERT_EQ(positions.size(), 4u);
+ EXPECT_EQ(positions[0], 10L);
+ EXPECT_EQ(positions[1], 20L);
+ EXPECT_EQ(positions[2], 30L);
+ EXPECT_EQ(positions[3], (static_cast(1) << 32) | 5L);
+}
+
+TEST(RoaringPositionBitmapTest, TestIsEmpty) {
+ RoaringPositionBitmap bitmap;
+ EXPECT_TRUE(bitmap.IsEmpty());
+
+ EXPECT_THAT(bitmap.Add(10L), IsOk());
+ EXPECT_FALSE(bitmap.IsEmpty());
+}
+
+TEST(RoaringPositionBitmapTest, TestOptimize) {
+ RoaringPositionBitmap bitmap;
+ EXPECT_THAT(bitmap.AddRange(0, 10000), IsOk());
+
+ bool changed = bitmap.Optimize();
+ EXPECT_TRUE(changed);
+
+ // Content should be unchanged after RLE optimization
+ EXPECT_EQ(bitmap.Cardinality(), 10000);
+ for (int64_t i = 0; i < 10000; ++i) {
+ EXPECT_TRUE(bitmap.Contains(i).value());
+ }
+
+ // Round-trip should preserve content after RLE
+ auto copy = RoundTripSerialize(bitmap);
+ EXPECT_EQ(copy.Cardinality(), 10000);
+}
+
+TEST(RoaringPositionBitmapTest, TestUnsupportedPositions) {
+ RoaringPositionBitmap bitmap;
+
+ // Negative position
+ EXPECT_THAT(bitmap.Add(-1L), IsError(ErrorKind::kInvalidArgument));
+
+ // Contains with negative position
+ EXPECT_THAT(bitmap.Contains(-1L), IsError(ErrorKind::kInvalidArgument));
+
+ // Position exceeding MAX_POSITION
+ EXPECT_THAT(bitmap.Add(RoaringPositionBitmap::kMaxPosition + 1L),
+ IsError(ErrorKind::kInvalidArgument));
+
+ // Contains with position exceeding MAX_POSITION
+ EXPECT_THAT(bitmap.Contains(RoaringPositionBitmap::kMaxPosition + 1L),
+ IsError(ErrorKind::kInvalidArgument));
+}
+
+TEST(RoaringPositionBitmapTest, TestDeserializeSupportedRoaringExamples) {
+ for (const auto& file :
+ {"64map32bitvals.bin", "64mapempty.bin", "64mapspreadvals.bin"}) {
+ std::string data = ReadTestResource(file);
+ auto result = RoaringPositionBitmap::Deserialize(data);
+ EXPECT_THAT(RoaringPositionBitmap::Deserialize(data), IsOk());
+ }
+}
+
+TEST(RoaringPositionBitmapTest, TestDeserializeUnsupportedRoaringExample) {
+ // This file contains a value with key larger than max supported
+ std::string data = ReadTestResource("64maphighvals.bin");
+ auto result = RoaringPositionBitmap::Deserialize(data);
+ EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument));
+ EXPECT_THAT(result, HasErrorMessage("Invalid unsigned key"));
+}
+
+TEST(RoaringPositionBitmapTest, TestRandomSparseBitmap) {
+ std::mt19937_64 rng(42);
+ RoaringPositionBitmap bitmap;
+ std::set positions;
+
+ std::uniform_int_distribution dist(0, static_cast(5) << 32);
+
+ for (int i = 0; i < 100000; ++i) {
+ int64_t pos = dist(rng);
+ positions.insert(pos);
+ ASSERT_THAT(bitmap.Add(pos), IsOk());
+ }
+
+ AssertEqual(bitmap, positions);
+
+ // Random lookups
+ std::mt19937_64 rng2(123);
+ std::uniform_int_distribution lookup_dist(0,
+ RoaringPositionBitmap::kMaxPosition);
+ for (int i = 0; i < 20000; ++i) {
+ int64_t pos = lookup_dist(rng2);
+ auto result = bitmap.Contains(pos);
+ ASSERT_THAT(result, IsOk());
+ EXPECT_EQ(result.value(), positions.count(pos) > 0);
+ }
+}
+
+TEST(RoaringPositionBitmapTest, TestRandomDenseBitmap) {
+ RoaringPositionBitmap bitmap;
+ std::set positions;
+
+ // Create dense ranges across multiple bitmap keys
+ for (int64_t offset : {static_cast(0), static_cast(2) << 32,
+ static_cast(5) << 32}) {
+ for (int64_t i = 0; i < 10000; ++i) {
+ ASSERT_THAT(bitmap.Add(offset + i), IsOk());
+ positions.insert(offset + i);
+ }
+ }
+
+ AssertEqual(bitmap, positions);
+}
+
+TEST(RoaringPositionBitmapTest, TestRandomMixedBitmap) {
+ std::mt19937_64 rng(42);
+ RoaringPositionBitmap bitmap;
+ std::set positions;
+
+ // Sparse positions in [3<<32, 5<<32)
+ std::uniform_int_distribution dist1(static_cast(3) << 32,
+ static_cast(5) << 32);
+ for (int i = 0; i < 50000; ++i) {
+ int64_t pos = dist1(rng);
+ positions.insert(pos);
+ ASSERT_THAT(bitmap.Add(pos), IsOk());
+ }
+
+ // Dense range in [0, 10000)
+ for (int64_t i = 0; i < 10000; ++i) {
+ ASSERT_THAT(bitmap.Add(i), IsOk());
+ positions.insert(i);
+ }
+
+ // More sparse positions in [0, 1<<32)
+ std::uniform_int_distribution dist2(0, static_cast(1) << 32);
+ for (int i = 0; i < 5000; ++i) {
+ int64_t pos = dist2(rng);
+ positions.insert(pos);
+ ASSERT_THAT(bitmap.Add(pos), IsOk());
+ }
+
+ AssertEqual(bitmap, positions);
+}
+
+TEST(RoaringPositionBitmapTest, TestDeserializeInvalidData) {
+ // Buffer too small
+ auto result = RoaringPositionBitmap::Deserialize("");
+ EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument));
+
+ // Invalid bitmap count (very large)
+ std::string buf(8, '\xFF');
+ result = RoaringPositionBitmap::Deserialize(buf);
+ EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument));
+}
+
+} // namespace iceberg
From c3d16d8ae7bcc6b48e9701123fef13f52db31066 Mon Sep 17 00:00:00 2001
From: Gang Wu
Date: Tue, 24 Mar 2026 10:14:26 +0800
Subject: [PATCH 2/5] address comments and polish test
---
.../deletes/roaring_position_bitmap.cc | 22 +-
src/iceberg/deletes/roaring_position_bitmap.h | 6 +-
src/iceberg/test/CMakeLists.txt | 3 +-
src/iceberg/test/meson.build | 1 +
.../test/roaring_position_bitmap_test.cc | 505 ++++++++++--------
5 files changed, 314 insertions(+), 223 deletions(-)
diff --git a/src/iceberg/deletes/roaring_position_bitmap.cc b/src/iceberg/deletes/roaring_position_bitmap.cc
index 2d812ef21..e47c2551f 100644
--- a/src/iceberg/deletes/roaring_position_bitmap.cc
+++ b/src/iceberg/deletes/roaring_position_bitmap.cc
@@ -41,12 +41,12 @@ constexpr size_t kBitmapKeySizeBytes = 4;
int32_t Key(int64_t pos) { return static_cast(pos >> 32); }
// Extracts low 32 bits from a 64-bit position.
-uint32_t Pos32Bits(int64_t pos) { return static_cast(pos); }
+uint32_t Pos32Bits(int64_t pos) { return static_cast(0xFFFFFFFF & pos); }
// Combines key (high 32 bits) and pos32 (low 32 bits) into a 64-bit
// position. The low 32 bits are zero-extended to avoid sign extension.
int64_t ToPosition(int32_t key, uint32_t pos32) {
- return (static_cast(key) << 32) | static_cast(pos32);
+ return (int64_t{key} << 32) | int64_t{pos32};
}
void WriteLE64(char* buf, int64_t value) {
@@ -95,6 +95,20 @@ RoaringPositionBitmap::RoaringPositionBitmap() : impl_(std::make_unique())
RoaringPositionBitmap::~RoaringPositionBitmap() = default;
+RoaringPositionBitmap::RoaringPositionBitmap(const RoaringPositionBitmap& other)
+ : impl_(other.impl_ != nullptr ? std::make_unique(*other.impl_)
+ : std::make_unique()) {}
+
+RoaringPositionBitmap& RoaringPositionBitmap::operator=(
+ const RoaringPositionBitmap& other) {
+ if (this == &other) {
+ return *this;
+ }
+ impl_ = other.impl_ != nullptr ? std::make_unique(*other.impl_)
+ : std::make_unique();
+ return *this;
+}
+
RoaringPositionBitmap::RoaringPositionBitmap(RoaringPositionBitmap&&) noexcept = default;
RoaringPositionBitmap& RoaringPositionBitmap::operator=(
@@ -167,6 +181,8 @@ size_t RoaringPositionBitmap::SerializedSizeInBytes() const {
return size;
}
+// Serializes using the portable format (little-endian).
+// See https://iceberg.apache.org/puffin-spec/#deletion-vector-v1-blob-type
Result RoaringPositionBitmap::Serialize() const {
size_t size = SerializedSizeInBytes();
std::string result(size, '\0');
@@ -243,7 +259,7 @@ Result RoaringPositionBitmap::Deserialize(std::string_vie
buf += bitmap_size;
remaining -= bitmap_size;
- impl->bitmaps.push_back(std::move(bitmap));
+ impl->bitmaps.emplace_back(std::move(bitmap));
last_key = key;
--remaining_count;
}
diff --git a/src/iceberg/deletes/roaring_position_bitmap.h b/src/iceberg/deletes/roaring_position_bitmap.h
index df197740b..650160c1a 100644
--- a/src/iceberg/deletes/roaring_position_bitmap.h
+++ b/src/iceberg/deletes/roaring_position_bitmap.h
@@ -46,7 +46,7 @@ namespace iceberg {
//github.com/ this class.
class ICEBERG_EXPORT RoaringPositionBitmap {
public:
- //github.com/ \brief Maximum supported position.
+ //github.com/ \brief Maximum supported position (aligned with the Java implementation).
static constexpr int64_t kMaxPosition = 0x7FFFFFFE80000000LL;
RoaringPositionBitmap();
@@ -55,8 +55,8 @@ class ICEBERG_EXPORT RoaringPositionBitmap {
RoaringPositionBitmap(RoaringPositionBitmap&& other) noexcept;
RoaringPositionBitmap& operator=(RoaringPositionBitmap&& other) noexcept;
- RoaringPositionBitmap(const RoaringPositionBitmap&) = delete;
- RoaringPositionBitmap& operator=(const RoaringPositionBitmap&) = delete;
+ RoaringPositionBitmap(const RoaringPositionBitmap& other);
+ RoaringPositionBitmap& operator=(const RoaringPositionBitmap& other);
//github.com/ \brief Sets a position in the bitmap.
//github.com/ \param pos the position (must be >= 0 and <= kMaxPosition)
diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt
index 995579b20..c802df8a5 100644
--- a/src/iceberg/test/CMakeLists.txt
+++ b/src/iceberg/test/CMakeLists.txt
@@ -115,6 +115,7 @@ add_iceberg_test(util_test
endian_test.cc
formatter_test.cc
location_util_test.cc
+ roaring_position_bitmap_test.cc
string_util_test.cc
transform_util_test.cc
truncate_util_test.cc
@@ -124,8 +125,6 @@ add_iceberg_test(util_test
add_iceberg_test(roaring_test SOURCES roaring_test.cc)
-add_iceberg_test(roaring_position_bitmap_test SOURCES roaring_position_bitmap_test.cc)
-
if(ICEBERG_BUILD_BUNDLE)
add_iceberg_test(avro_test
USE_BUNDLE
diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build
index df2d5db8e..1eafc5fe1 100644
--- a/src/iceberg/test/meson.build
+++ b/src/iceberg/test/meson.build
@@ -90,6 +90,7 @@ iceberg_tests = {
'endian_test.cc',
'formatter_test.cc',
'location_util_test.cc',
+ 'roaring_position_bitmap_test.cc',
'string_util_test.cc',
'transform_util_test.cc',
'truncate_util_test.cc',
diff --git a/src/iceberg/test/roaring_position_bitmap_test.cc b/src/iceberg/test/roaring_position_bitmap_test.cc
index e3428784e..8d7020b09 100644
--- a/src/iceberg/test/roaring_position_bitmap_test.cc
+++ b/src/iceberg/test/roaring_position_bitmap_test.cc
@@ -19,6 +19,7 @@
#include "iceberg/deletes/roaring_position_bitmap.h"
+#include
#include
#include
#include
@@ -65,12 +66,11 @@ void AssertEqualContent(const RoaringPositionBitmap& bitmap,
const std::set& positions) {
ASSERT_EQ(bitmap.Cardinality(), positions.size());
for (int64_t pos : positions) {
- auto result = bitmap.Contains(pos);
- ASSERT_THAT(result, IsOk()) << "Error for pos: " << pos;
- ASSERT_TRUE(result.value()) << "Missing position: " << pos;
+ ASSERT_THAT(bitmap.Contains(pos), HasValue(::testing::Eq(true)))
+ << "Position not found: " << pos;
}
bitmap.ForEach([&](int64_t pos) {
- ASSERT_TRUE(positions.count(pos) > 0) << "Unexpected position: " << pos;
+ ASSERT_TRUE(positions.contains(pos)) << "Unexpected position: " << pos;
});
}
@@ -85,329 +85,368 @@ void AssertEqual(RoaringPositionBitmap& bitmap, const std::set& positio
AssertEqualContent(copy2, positions);
}
-} // namespace
-
-TEST(RoaringPositionBitmapTest, TestAdd) {
+RoaringPositionBitmap BuildBitmap(const std::set& positions) {
RoaringPositionBitmap bitmap;
-
- EXPECT_THAT(bitmap.Add(10L), IsOk());
- EXPECT_TRUE(bitmap.Contains(10L).value());
-
- EXPECT_THAT(bitmap.Add(0L), IsOk());
- EXPECT_TRUE(bitmap.Contains(0L).value());
-
- EXPECT_THAT(bitmap.Add(10L), IsOk()); // duplicate
- EXPECT_TRUE(bitmap.Contains(10L).value());
+ for (int64_t pos : positions) {
+ EXPECT_THAT(bitmap.Add(pos), IsOk());
+ }
+ return bitmap;
}
-TEST(RoaringPositionBitmapTest, TestAddPositionsRequiringMultipleBitmaps) {
- RoaringPositionBitmap bitmap;
+struct AddRangeParams {
+ const char* name;
+ int64_t start;
+ int64_t end;
+ std::vector absent_positions;
+};
- int64_t pos1 = (static_cast(0) << 32) | 10L;
- int64_t pos2 = (static_cast(1) << 32) | 20L;
- int64_t pos3 = (static_cast(2) << 32) | 30L;
- int64_t pos4 = (static_cast(100) << 32) | 40L;
-
- EXPECT_THAT(bitmap.Add(pos1), IsOk());
- EXPECT_THAT(bitmap.Add(pos2), IsOk());
- EXPECT_THAT(bitmap.Add(pos3), IsOk());
- EXPECT_THAT(bitmap.Add(pos4), IsOk());
-
- EXPECT_TRUE(bitmap.Contains(pos1).value());
- EXPECT_TRUE(bitmap.Contains(pos2).value());
- EXPECT_TRUE(bitmap.Contains(pos3).value());
- EXPECT_TRUE(bitmap.Contains(pos4).value());
- EXPECT_EQ(bitmap.Cardinality(), 4);
- EXPECT_GT(bitmap.SerializedSizeInBytes(), 4);
-}
+class RoaringPositionBitmapAddRangeTest
+ : public ::testing::TestWithParam {};
-TEST(RoaringPositionBitmapTest, TestAddRange) {
+TEST_P(RoaringPositionBitmapAddRangeTest, AddsExpectedPositions) {
+ const auto& param = GetParam();
RoaringPositionBitmap bitmap;
+ ASSERT_THAT(bitmap.AddRange(param.start, param.end), IsOk());
- int64_t start = 10;
- int64_t end = 20;
- EXPECT_THAT(bitmap.AddRange(start, end), IsOk());
-
- for (int64_t pos = start; pos < end; ++pos) {
- EXPECT_TRUE(bitmap.Contains(pos).value());
+ std::set expected_positions;
+ for (int64_t pos = param.start; pos < param.end; ++pos) {
+ expected_positions.insert(pos);
+ }
+ AssertEqualContent(bitmap, expected_positions);
+ for (int64_t pos : param.absent_positions) {
+ ASSERT_FALSE(bitmap.Contains(pos).value());
}
- EXPECT_FALSE(bitmap.Contains(9).value());
- EXPECT_FALSE(bitmap.Contains(20).value());
- EXPECT_EQ(bitmap.Cardinality(), 10);
}
-TEST(RoaringPositionBitmapTest, TestAddRangeAcrossKeys) {
- RoaringPositionBitmap bitmap;
+INSTANTIATE_TEST_SUITE_P(
+ AddRangeScenarios, RoaringPositionBitmapAddRangeTest,
+ ::testing::Values(AddRangeParams{.name = "single_key",
+ .start = 10,
+ .end = 20,
+ .absent_positions = {9, 20}},
+ AddRangeParams{
+ .name = "across_keys",
+ .start = (int64_t{1} << 32) - 5,
+ .end = (int64_t{1} << 32) + 5,
+ .absent_positions = {0, (int64_t{1} << 32) + 5},
+ }),
+ [](const ::testing::TestParamInfo& info) { return info.param.name; });
+
+struct OrParams {
+ const char* name;
+ std::set lhs_input;
+ std::set rhs_input;
+ std::set lhs_expected;
+ std::set rhs_expected;
+};
+
+class RoaringPositionBitmapOrTest : public ::testing::TestWithParam {};
+
+TEST_P(RoaringPositionBitmapOrTest, ProducesExpectedUnionAndKeepsRhsUnchanged) {
+ const auto& param = GetParam();
+ auto lhs = BuildBitmap(param.lhs_input);
+ auto rhs = BuildBitmap(param.rhs_input);
+
+ lhs.Or(rhs);
+
+ AssertEqualContent(lhs, param.lhs_expected);
+ AssertEqualContent(rhs, param.rhs_expected);
+}
- int64_t start = (static_cast(1) << 32) - 5;
- int64_t end = (static_cast(1) << 32) + 5;
- EXPECT_THAT(bitmap.AddRange(start, end), IsOk());
+INSTANTIATE_TEST_SUITE_P(
+ OrScenarios, RoaringPositionBitmapOrTest,
+ ::testing::Values(
+ OrParams{
+ .name = "disjoint",
+ .lhs_input = {10L, 20L},
+ .rhs_input = {30L, 40L, int64_t{2} << 32},
+ .lhs_expected = {10L, 20L, 30L, 40L, int64_t{2} << 32},
+ .rhs_expected = {30L, 40L, int64_t{2} << 32},
+ },
+ OrParams{
+ .name = "rhs_empty",
+ .lhs_input = {10L, 20L},
+ .rhs_input = {},
+ .lhs_expected = {10L, 20L},
+ .rhs_expected = {},
+ },
+ OrParams{
+ .name = "overlapping",
+ .lhs_input = {10L, 20L, 30L},
+ .rhs_input = {20L, 40L},
+ .lhs_expected = {10L, 20L, 30L, 40L},
+ .rhs_expected = {20L, 40L},
+ },
+ OrParams{
+ .name = "sparse_multi_key",
+ .lhs_input = {100L, (int64_t{1} << 32) | 200L},
+ .rhs_input = {(int64_t{2} << 32) | 300L, (int64_t{3} << 32) | 400L},
+ .lhs_expected = {100L, (int64_t{1} << 32) | 200L, (int64_t{2} << 32) | 300L,
+ (int64_t{3} << 32) | 400L},
+ .rhs_expected = {(int64_t{2} << 32) | 300L, (int64_t{3} << 32) | 400L},
+ }),
+ [](const ::testing::TestParamInfo& info) { return info.param.name; });
+
+enum class InteropBitmapShape {
+ kEmpty,
+ kOnly32BitPositions,
+ kSpreadAcrossKeys,
+};
+
+struct InteropCase {
+ const char* file_name;
+ InteropBitmapShape expected_shape;
+};
+
+void AssertInteropBitmapShape(const RoaringPositionBitmap& bitmap,
+ InteropBitmapShape expected_shape) {
+ bool saw_pos_lt_32_bit = false;
+ bool saw_pos_ge_32_bit = false;
- for (int64_t pos = start; pos < end; ++pos) {
- EXPECT_TRUE(bitmap.Contains(pos).value());
- }
- EXPECT_FALSE(bitmap.Contains(0).value());
- EXPECT_FALSE(bitmap.Contains(end).value());
- EXPECT_EQ(bitmap.Cardinality(), 10);
-}
+ bitmap.ForEach([&](int64_t pos) {
+ if (pos < (int64_t{1} << 32)) {
+ saw_pos_lt_32_bit = true;
+ } else {
+ saw_pos_ge_32_bit = true;
+ }
+ });
-TEST(RoaringPositionBitmapTest, TestAddEmptyRange) {
- RoaringPositionBitmap bitmap;
- EXPECT_THAT(bitmap.AddRange(10, 10), IsOk());
- EXPECT_TRUE(bitmap.IsEmpty());
+ switch (expected_shape) {
+ case InteropBitmapShape::kEmpty:
+ ASSERT_TRUE(bitmap.IsEmpty());
+ ASSERT_EQ(bitmap.Cardinality(), 0u);
+ break;
+ case InteropBitmapShape::kOnly32BitPositions:
+ ASSERT_GT(bitmap.Cardinality(), 0u);
+ ASSERT_TRUE(saw_pos_lt_32_bit);
+ ASSERT_FALSE(saw_pos_ge_32_bit);
+ break;
+ case InteropBitmapShape::kSpreadAcrossKeys:
+ ASSERT_GT(bitmap.Cardinality(), 0u);
+ ASSERT_TRUE(saw_pos_lt_32_bit);
+ ASSERT_TRUE(saw_pos_ge_32_bit);
+ break;
+ }
}
-TEST(RoaringPositionBitmapTest, TestOr) {
- RoaringPositionBitmap bitmap1;
- EXPECT_THAT(bitmap1.Add(10L), IsOk());
- EXPECT_THAT(bitmap1.Add(20L), IsOk());
-
- RoaringPositionBitmap bitmap2;
- EXPECT_THAT(bitmap2.Add(30L), IsOk());
- EXPECT_THAT(bitmap2.Add(40L), IsOk());
- EXPECT_THAT(bitmap2.Add(static_cast(2) << 32), IsOk());
-
- bitmap1.Or(bitmap2);
-
- EXPECT_TRUE(bitmap1.Contains(10L).value());
- EXPECT_TRUE(bitmap1.Contains(20L).value());
- EXPECT_TRUE(bitmap1.Contains(30L).value());
- EXPECT_TRUE(bitmap1.Contains(40L).value());
- EXPECT_TRUE(bitmap1.Contains(static_cast(2) << 32).value());
- EXPECT_EQ(bitmap1.Cardinality(), 5);
-
- // bitmap2 should be unchanged
- EXPECT_FALSE(bitmap2.Contains(10L).value());
- EXPECT_FALSE(bitmap2.Contains(20L).value());
- EXPECT_EQ(bitmap2.Cardinality(), 3);
-}
+} // namespace
-TEST(RoaringPositionBitmapTest, TestOrWithEmptyBitmap) {
- RoaringPositionBitmap bitmap1;
- EXPECT_THAT(bitmap1.Add(10L), IsOk());
- EXPECT_THAT(bitmap1.Add(20L), IsOk());
+TEST(RoaringPositionBitmapTest, TestAdd) {
+ RoaringPositionBitmap bitmap;
- RoaringPositionBitmap empty_bitmap;
- bitmap1.Or(empty_bitmap);
+ ASSERT_THAT(bitmap.Add(10L), IsOk());
+ ASSERT_TRUE(bitmap.Contains(10L).value());
- EXPECT_TRUE(bitmap1.Contains(10L).value());
- EXPECT_TRUE(bitmap1.Contains(20L).value());
- EXPECT_EQ(bitmap1.Cardinality(), 2);
+ ASSERT_THAT(bitmap.Add(0L), IsOk());
+ ASSERT_TRUE(bitmap.Contains(0L).value());
- EXPECT_FALSE(empty_bitmap.Contains(10L).value());
- EXPECT_FALSE(empty_bitmap.Contains(20L).value());
- EXPECT_EQ(empty_bitmap.Cardinality(), 0);
- EXPECT_TRUE(empty_bitmap.IsEmpty());
+ ASSERT_THAT(bitmap.Add(10L), IsOk()); // duplicate
+ ASSERT_TRUE(bitmap.Contains(10L).value());
}
-TEST(RoaringPositionBitmapTest, TestOrWithOverlapping) {
- RoaringPositionBitmap bitmap1;
- EXPECT_THAT(bitmap1.Add(10L), IsOk());
- EXPECT_THAT(bitmap1.Add(20L), IsOk());
- EXPECT_THAT(bitmap1.Add(30L), IsOk());
-
- RoaringPositionBitmap bitmap2;
- EXPECT_THAT(bitmap2.Add(20L), IsOk());
- EXPECT_THAT(bitmap2.Add(40L), IsOk());
-
- bitmap1.Or(bitmap2);
-
- EXPECT_TRUE(bitmap1.Contains(10L).value());
- EXPECT_TRUE(bitmap1.Contains(20L).value());
- EXPECT_TRUE(bitmap1.Contains(30L).value());
- EXPECT_TRUE(bitmap1.Contains(40L).value());
- EXPECT_EQ(bitmap1.Cardinality(), 4);
-
- EXPECT_FALSE(bitmap2.Contains(10L).value());
- EXPECT_TRUE(bitmap2.Contains(20L).value());
- EXPECT_FALSE(bitmap2.Contains(30L).value());
- EXPECT_TRUE(bitmap2.Contains(40L).value());
- EXPECT_EQ(bitmap2.Cardinality(), 2);
-}
+TEST(RoaringPositionBitmapTest, TestAddPositionsRequiringMultipleBitmaps) {
+ RoaringPositionBitmap bitmap;
-TEST(RoaringPositionBitmapTest, TestOrSparseBitmaps) {
- RoaringPositionBitmap bitmap1;
- EXPECT_THAT(bitmap1.Add((static_cast(0) << 32) | 100L), IsOk());
- EXPECT_THAT(bitmap1.Add((static_cast(1) << 32) | 200L), IsOk());
+ int64_t pos1 = 10L;
+ int64_t pos2 = (int64_t{1} << 32) | 20L;
+ int64_t pos3 = (int64_t{2} << 32) | 30L;
+ int64_t pos4 = (int64_t{100} << 32) | 40L;
- RoaringPositionBitmap bitmap2;
- EXPECT_THAT(bitmap2.Add((static_cast(2) << 32) | 300L), IsOk());
- EXPECT_THAT(bitmap2.Add((static_cast(3) << 32) | 400L), IsOk());
+ ASSERT_THAT(bitmap.Add(pos1), IsOk());
+ ASSERT_THAT(bitmap.Add(pos2), IsOk());
+ ASSERT_THAT(bitmap.Add(pos3), IsOk());
+ ASSERT_THAT(bitmap.Add(pos4), IsOk());
- bitmap1.Or(bitmap2);
+ AssertEqualContent(bitmap, {pos1, pos2, pos3, pos4});
+ ASSERT_EQ(bitmap.SerializedSizeInBytes(), 1260);
+}
- EXPECT_TRUE(bitmap1.Contains((static_cast(0) << 32) | 100L).value());
- EXPECT_TRUE(bitmap1.Contains((static_cast(1) << 32) | 200L).value());
- EXPECT_TRUE(bitmap1.Contains((static_cast(2) << 32) | 300L).value());
- EXPECT_TRUE(bitmap1.Contains((static_cast(3) << 32) | 400L).value());
- EXPECT_EQ(bitmap1.Cardinality(), 4);
+TEST(RoaringPositionBitmapTest, TestAddEmptyRange) {
+ RoaringPositionBitmap bitmap;
+ ASSERT_THAT(bitmap.AddRange(10, 10), IsOk());
+ ASSERT_TRUE(bitmap.IsEmpty());
}
TEST(RoaringPositionBitmapTest, TestCardinality) {
RoaringPositionBitmap bitmap;
- EXPECT_EQ(bitmap.Cardinality(), 0);
+ ASSERT_EQ(bitmap.Cardinality(), 0);
- EXPECT_THAT(bitmap.Add(10L), IsOk());
- EXPECT_THAT(bitmap.Add(20L), IsOk());
- EXPECT_THAT(bitmap.Add(30L), IsOk());
- EXPECT_EQ(bitmap.Cardinality(), 3);
+ ASSERT_THAT(bitmap.Add(10L), IsOk());
+ ASSERT_THAT(bitmap.Add(20L), IsOk());
+ ASSERT_THAT(bitmap.Add(30L), IsOk());
+ ASSERT_EQ(bitmap.Cardinality(), 3);
- EXPECT_THAT(bitmap.Add(10L), IsOk()); // already exists
- EXPECT_EQ(bitmap.Cardinality(), 3);
+ ASSERT_THAT(bitmap.Add(10L), IsOk()); // already exists
+ ASSERT_EQ(bitmap.Cardinality(), 3);
}
TEST(RoaringPositionBitmapTest, TestCardinalitySparseBitmaps) {
RoaringPositionBitmap bitmap;
- EXPECT_THAT(bitmap.Add((static_cast(0) << 32) | 100L), IsOk());
- EXPECT_THAT(bitmap.Add((static_cast(0) << 32) | 101L), IsOk());
- EXPECT_THAT(bitmap.Add((static_cast(0) << 32) | 105L), IsOk());
- EXPECT_THAT(bitmap.Add((static_cast(1) << 32) | 200L), IsOk());
- EXPECT_THAT(bitmap.Add((static_cast(100) << 32) | 300L), IsOk());
+ ASSERT_THAT(bitmap.Add(100L), IsOk());
+ ASSERT_THAT(bitmap.Add(101L), IsOk());
+ ASSERT_THAT(bitmap.Add(105L), IsOk());
+ ASSERT_THAT(bitmap.Add((int64_t{1} << 32) | 200L), IsOk());
+ ASSERT_THAT(bitmap.Add((int64_t{100} << 32) | 300L), IsOk());
- EXPECT_EQ(bitmap.Cardinality(), 5);
+ ASSERT_EQ(bitmap.Cardinality(), 5);
}
TEST(RoaringPositionBitmapTest, TestSerializeDeserializeRoundTrip) {
RoaringPositionBitmap bitmap;
- EXPECT_THAT(bitmap.Add(10L), IsOk());
- EXPECT_THAT(bitmap.Add(20L), IsOk());
- EXPECT_THAT(bitmap.Add((static_cast(1) << 32) | 30L), IsOk());
+ ASSERT_THAT(bitmap.Add(10L), IsOk());
+ ASSERT_THAT(bitmap.Add(20L), IsOk());
+ ASSERT_THAT(bitmap.Add((int64_t{1} << 32) | 30L), IsOk());
auto copy = RoundTripSerialize(bitmap);
+ AssertEqualContent(copy, {10L, 20L, (int64_t{1} << 32) | 30L});
+}
+
+TEST(RoaringPositionBitmapTest, TestCopyConstructor) {
+ RoaringPositionBitmap bitmap;
+ ASSERT_THAT(bitmap.Add(10L), IsOk());
+ ASSERT_THAT(bitmap.Add((int64_t{2} << 32) | 30L), IsOk());
+
+ RoaringPositionBitmap copy(bitmap);
- EXPECT_EQ(copy.Cardinality(), bitmap.Cardinality());
- EXPECT_TRUE(copy.Contains(10L).value());
- EXPECT_TRUE(copy.Contains(20L).value());
- EXPECT_TRUE(copy.Contains((static_cast(1) << 32) | 30L).value());
+ AssertEqualContent(copy, {10L, (int64_t{2} << 32) | 30L});
+
+ // Copy is independent from source.
+ ASSERT_THAT(copy.Add(99L), IsOk());
+ ASSERT_THAT(bitmap.Contains(99L), HasValue(::testing::Eq(false)));
+}
+
+TEST(RoaringPositionBitmapTest, TestCopyAssignment) {
+ RoaringPositionBitmap bitmap;
+ ASSERT_THAT(bitmap.Add(10L), IsOk());
+ ASSERT_THAT(bitmap.Add((int64_t{3} << 32) | 40L), IsOk());
+
+ RoaringPositionBitmap assigned;
+ ASSERT_THAT(assigned.Add(1L), IsOk());
+
+ assigned = bitmap;
+ AssertEqualContent(assigned, {10L, (int64_t{3} << 32) | 40L});
+
+ // Assignment result is independent from source.
+ ASSERT_THAT(bitmap.Add(200L), IsOk());
+ ASSERT_THAT(assigned.Contains(200L), HasValue(::testing::Eq(false)));
}
TEST(RoaringPositionBitmapTest, TestSerializeDeserializeEmpty) {
RoaringPositionBitmap bitmap;
auto copy = RoundTripSerialize(bitmap);
- EXPECT_TRUE(copy.IsEmpty());
- EXPECT_EQ(copy.Cardinality(), 0);
+ ASSERT_TRUE(copy.IsEmpty());
+ ASSERT_EQ(copy.Cardinality(), 0);
}
TEST(RoaringPositionBitmapTest, TestSerializeDeserializeAllContainerBitmap) {
RoaringPositionBitmap bitmap;
// bitmap 0, container 0 (array - few elements)
- EXPECT_THAT(bitmap.Add(Position(0, 0, 5)), IsOk());
- EXPECT_THAT(bitmap.Add(Position(0, 0, 7)), IsOk());
+ ASSERT_THAT(bitmap.Add(Position(0, 0, 5)), IsOk());
+ ASSERT_THAT(bitmap.Add(Position(0, 0, 7)), IsOk());
// bitmap 0, container 1 (array that can be compressed)
- EXPECT_THAT(bitmap.AddRange(Position(0, 1, 1), Position(0, 1, 1000)), IsOk());
+ ASSERT_THAT(bitmap.AddRange(Position(0, 1, 1), Position(0, 1, 1000)), IsOk());
// bitmap 0, container 2 (bitset - nearly full container)
- EXPECT_THAT(bitmap.AddRange(Position(0, 2, 1), Position(0, 2, kContainerOffset - 1)),
+ ASSERT_THAT(bitmap.AddRange(Position(0, 2, 1), Position(0, 2, kContainerOffset - 1)),
IsOk());
// bitmap 1, container 0 (array)
- EXPECT_THAT(bitmap.Add(Position(1, 0, 10)), IsOk());
- EXPECT_THAT(bitmap.Add(Position(1, 0, 20)), IsOk());
+ ASSERT_THAT(bitmap.Add(Position(1, 0, 10)), IsOk());
+ ASSERT_THAT(bitmap.Add(Position(1, 0, 20)), IsOk());
// bitmap 1, container 1 (array that can be compressed)
- EXPECT_THAT(bitmap.AddRange(Position(1, 1, 10), Position(1, 1, 500)), IsOk());
+ ASSERT_THAT(bitmap.AddRange(Position(1, 1, 10), Position(1, 1, 500)), IsOk());
// bitmap 1, container 2 (bitset)
- EXPECT_THAT(bitmap.AddRange(Position(1, 2, 1), Position(1, 2, kContainerOffset - 1)),
+ ASSERT_THAT(bitmap.AddRange(Position(1, 2, 1), Position(1, 2, kContainerOffset - 1)),
IsOk());
- EXPECT_TRUE(bitmap.Optimize());
+ ASSERT_TRUE(bitmap.Optimize());
auto copy = RoundTripSerialize(bitmap);
+ std::set expected_positions;
+ bitmap.ForEach([&](int64_t pos) { expected_positions.insert(pos); });
- EXPECT_EQ(copy.Cardinality(), bitmap.Cardinality());
- copy.ForEach([&](int64_t pos) { EXPECT_TRUE(bitmap.Contains(pos).value()); });
- bitmap.ForEach([&](int64_t pos) { EXPECT_TRUE(copy.Contains(pos).value()); });
+ AssertEqualContent(copy, expected_positions);
}
TEST(RoaringPositionBitmapTest, TestForEach) {
RoaringPositionBitmap bitmap;
- EXPECT_THAT(bitmap.Add(30L), IsOk());
- EXPECT_THAT(bitmap.Add(10L), IsOk());
- EXPECT_THAT(bitmap.Add(20L), IsOk());
- EXPECT_THAT(bitmap.Add((static_cast(1) << 32) | 5L), IsOk());
+ ASSERT_THAT(bitmap.Add(30L), IsOk());
+ ASSERT_THAT(bitmap.Add(10L), IsOk());
+ ASSERT_THAT(bitmap.Add(20L), IsOk());
+ ASSERT_THAT(bitmap.Add((int64_t{1} << 32) | 5L), IsOk());
std::vector positions;
bitmap.ForEach([&](int64_t pos) { positions.push_back(pos); });
ASSERT_EQ(positions.size(), 4u);
- EXPECT_EQ(positions[0], 10L);
- EXPECT_EQ(positions[1], 20L);
- EXPECT_EQ(positions[2], 30L);
- EXPECT_EQ(positions[3], (static_cast(1) << 32) | 5L);
+ ASSERT_EQ(positions[0], 10L);
+ ASSERT_EQ(positions[1], 20L);
+ ASSERT_EQ(positions[2], 30L);
+ ASSERT_EQ(positions[3], (int64_t{1} << 32) | 5L);
}
TEST(RoaringPositionBitmapTest, TestIsEmpty) {
RoaringPositionBitmap bitmap;
- EXPECT_TRUE(bitmap.IsEmpty());
+ ASSERT_TRUE(bitmap.IsEmpty());
- EXPECT_THAT(bitmap.Add(10L), IsOk());
- EXPECT_FALSE(bitmap.IsEmpty());
+ ASSERT_THAT(bitmap.Add(10L), IsOk());
+ ASSERT_FALSE(bitmap.IsEmpty());
}
TEST(RoaringPositionBitmapTest, TestOptimize) {
RoaringPositionBitmap bitmap;
- EXPECT_THAT(bitmap.AddRange(0, 10000), IsOk());
+ ASSERT_THAT(bitmap.AddRange(0, 10000), IsOk());
+ size_t size_before = bitmap.SerializedSizeInBytes();
bool changed = bitmap.Optimize();
- EXPECT_TRUE(changed);
+ ASSERT_TRUE(changed);
+
+ // RLE optimization should reduce size for this dense range
+ size_t size_after = bitmap.SerializedSizeInBytes();
+ ASSERT_GT(size_before, size_after);
// Content should be unchanged after RLE optimization
- EXPECT_EQ(bitmap.Cardinality(), 10000);
+ std::set expected_positions;
for (int64_t i = 0; i < 10000; ++i) {
- EXPECT_TRUE(bitmap.Contains(i).value());
+ expected_positions.insert(i);
}
+ AssertEqualContent(bitmap, expected_positions);
// Round-trip should preserve content after RLE
auto copy = RoundTripSerialize(bitmap);
- EXPECT_EQ(copy.Cardinality(), 10000);
+ AssertEqualContent(copy, expected_positions);
}
TEST(RoaringPositionBitmapTest, TestUnsupportedPositions) {
RoaringPositionBitmap bitmap;
// Negative position
- EXPECT_THAT(bitmap.Add(-1L), IsError(ErrorKind::kInvalidArgument));
+ ASSERT_THAT(bitmap.Add(-1L), IsError(ErrorKind::kInvalidArgument));
// Contains with negative position
- EXPECT_THAT(bitmap.Contains(-1L), IsError(ErrorKind::kInvalidArgument));
+ ASSERT_THAT(bitmap.Contains(-1L), IsError(ErrorKind::kInvalidArgument));
// Position exceeding MAX_POSITION
- EXPECT_THAT(bitmap.Add(RoaringPositionBitmap::kMaxPosition + 1L),
+ ASSERT_THAT(bitmap.Add(RoaringPositionBitmap::kMaxPosition + 1L),
IsError(ErrorKind::kInvalidArgument));
// Contains with position exceeding MAX_POSITION
- EXPECT_THAT(bitmap.Contains(RoaringPositionBitmap::kMaxPosition + 1L),
+ ASSERT_THAT(bitmap.Contains(RoaringPositionBitmap::kMaxPosition + 1L),
IsError(ErrorKind::kInvalidArgument));
}
-TEST(RoaringPositionBitmapTest, TestDeserializeSupportedRoaringExamples) {
- for (const auto& file :
- {"64map32bitvals.bin", "64mapempty.bin", "64mapspreadvals.bin"}) {
- std::string data = ReadTestResource(file);
- auto result = RoaringPositionBitmap::Deserialize(data);
- EXPECT_THAT(RoaringPositionBitmap::Deserialize(data), IsOk());
- }
-}
-
-TEST(RoaringPositionBitmapTest, TestDeserializeUnsupportedRoaringExample) {
- // This file contains a value with key larger than max supported
- std::string data = ReadTestResource("64maphighvals.bin");
- auto result = RoaringPositionBitmap::Deserialize(data);
- EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument));
- EXPECT_THAT(result, HasErrorMessage("Invalid unsigned key"));
-}
-
TEST(RoaringPositionBitmapTest, TestRandomSparseBitmap) {
std::mt19937_64 rng(42);
RoaringPositionBitmap bitmap;
std::set positions;
- std::uniform_int_distribution dist(0, static_cast(5) << 32);
+ std::uniform_int_distribution dist(0, int64_t{5} << 32);
for (int i = 0; i < 100000; ++i) {
int64_t pos = dist(rng);
@@ -425,7 +464,7 @@ TEST(RoaringPositionBitmapTest, TestRandomSparseBitmap) {
int64_t pos = lookup_dist(rng2);
auto result = bitmap.Contains(pos);
ASSERT_THAT(result, IsOk());
- EXPECT_EQ(result.value(), positions.count(pos) > 0);
+ ASSERT_EQ(result.value(), positions.contains(pos));
}
}
@@ -434,8 +473,7 @@ TEST(RoaringPositionBitmapTest, TestRandomDenseBitmap) {
std::set positions;
// Create dense ranges across multiple bitmap keys
- for (int64_t offset : {static_cast(0), static_cast(2) << 32,
- static_cast(5) << 32}) {
+ for (int64_t offset : {int64_t{0}, int64_t{2} << 32, int64_t{5} << 32}) {
for (int64_t i = 0; i < 10000; ++i) {
ASSERT_THAT(bitmap.Add(offset + i), IsOk());
positions.insert(offset + i);
@@ -451,8 +489,7 @@ TEST(RoaringPositionBitmapTest, TestRandomMixedBitmap) {
std::set positions;
// Sparse positions in [3<<32, 5<<32)
- std::uniform_int_distribution dist1(static_cast(3) << 32,
- static_cast(5) << 32);
+ std::uniform_int_distribution dist1(int64_t{3} << 32, int64_t{5} << 32);
for (int i = 0; i < 50000; ++i) {
int64_t pos = dist1(rng);
positions.insert(pos);
@@ -466,7 +503,7 @@ TEST(RoaringPositionBitmapTest, TestRandomMixedBitmap) {
}
// More sparse positions in [0, 1<<32)
- std::uniform_int_distribution dist2(0, static_cast(1) << 32);
+ std::uniform_int_distribution dist2(0, int64_t{1} << 32);
for (int i = 0; i < 5000; ++i) {
int64_t pos = dist2(rng);
positions.insert(pos);
@@ -479,12 +516,50 @@ TEST(RoaringPositionBitmapTest, TestRandomMixedBitmap) {
TEST(RoaringPositionBitmapTest, TestDeserializeInvalidData) {
// Buffer too small
auto result = RoaringPositionBitmap::Deserialize("");
- EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument));
+ ASSERT_THAT(result, IsError(ErrorKind::kInvalidArgument));
// Invalid bitmap count (very large)
std::string buf(8, '\xFF');
result = RoaringPositionBitmap::Deserialize(buf);
- EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument));
+ ASSERT_THAT(result, IsError(ErrorKind::kInvalidArgument));
+}
+
+TEST(RoaringPositionBitmapInteropTest, TestDeserializeSupportedRoaringExamples) {
+ // These .bin fixtures are copied from the Apache Iceberg Java repository's
+ // roaring position bitmap interoperability test resources.
+ static const std::vector kCases = {
+ {.file_name = "64map32bitvals.bin",
+ .expected_shape = InteropBitmapShape::kOnly32BitPositions},
+ {.file_name = "64mapempty.bin", .expected_shape = InteropBitmapShape::kEmpty},
+ {.file_name = "64mapspreadvals.bin",
+ .expected_shape = InteropBitmapShape::kSpreadAcrossKeys},
+ };
+
+ for (const auto& test_case : kCases) {
+ SCOPED_TRACE(test_case.file_name);
+ std::string data = ReadTestResource(test_case.file_name);
+ auto result = RoaringPositionBitmap::Deserialize(data);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& bitmap = result.value();
+ AssertInteropBitmapShape(bitmap, test_case.expected_shape);
+
+ std::set positions;
+ bitmap.ForEach([&](int64_t pos) { positions.insert(pos); });
+ AssertEqualContent(bitmap, positions);
+
+ auto copy = RoundTripSerialize(bitmap);
+ AssertEqualContent(copy, positions);
+ }
+}
+
+TEST(RoaringPositionBitmapInteropTest, TestDeserializeUnsupportedRoaringExample) {
+ // This file is copied from the Apache Iceberg Java repository and it contains
+ // a value with key larger than max supported
+ std::string data = ReadTestResource("64maphighvals.bin");
+ auto result = RoaringPositionBitmap::Deserialize(data);
+ ASSERT_THAT(result, IsError(ErrorKind::kInvalidArgument));
+ ASSERT_THAT(result, HasErrorMessage("Invalid unsigned key"));
}
} // namespace iceberg
From 40e01c029cff3d7d294ebe50bf603b0aada7d16c Mon Sep 17 00:00:00 2001
From: Gang Wu
Date: Tue, 24 Mar 2026 11:03:58 +0800
Subject: [PATCH 3/5] fix linter
src/iceberg/deletes/roaring_position_bitmap.cc:196:25 [modernize-use-integer-sign-comparison]
Check warning: src/iceberg/deletes/roaring_position_bitmap.cc:196:25 [modernize-use-integer-sign-comparison]
comparison between 'signed' and 'unsigned' integers
---
src/iceberg/deletes/roaring_position_bitmap.cc | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/iceberg/deletes/roaring_position_bitmap.cc b/src/iceberg/deletes/roaring_position_bitmap.cc
index e47c2551f..fec037887 100644
--- a/src/iceberg/deletes/roaring_position_bitmap.cc
+++ b/src/iceberg/deletes/roaring_position_bitmap.cc
@@ -193,7 +193,7 @@ Result RoaringPositionBitmap::Serialize() const {
buf += kBitmapCountSizeBytes;
// Write each bitmap with its key
- for (int32_t key = 0; key < static_cast(impl_->bitmaps.size()); ++key) {
+ for (int32_t key = 0; std::cmp_less(key, impl_->bitmaps.size()); ++key) {
WriteLE32(buf, key);
buf += kBitmapKeySizeBytes;
size_t written = impl_->bitmaps[key].write(buf, /*portable=*/true);
From 91657d1ec1f6f9e8e861378a7d947799176c3ddf Mon Sep 17 00:00:00 2001
From: Gang Wu
Date: Wed, 25 Mar 2026 21:05:52 +0800
Subject: [PATCH 4/5] Update src/iceberg/deletes/roaring_position_bitmap.h
Co-authored-by: emkornfield
---
src/iceberg/deletes/roaring_position_bitmap.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/iceberg/deletes/roaring_position_bitmap.h b/src/iceberg/deletes/roaring_position_bitmap.h
index 650160c1a..83a2d64ce 100644
--- a/src/iceberg/deletes/roaring_position_bitmap.h
+++ b/src/iceberg/deletes/roaring_position_bitmap.h
@@ -41,7 +41,7 @@ namespace iceberg {
//github.com/ significant 4 bytes. For each key, a 32-bit Roaring bitmap is
//github.com/ maintained to store positions for that key.
//github.com/
-//github.com/ \note The Puffin deletion-vector-v1 wrapping (length prefix, magic
+//github.com/ \note This class is used to represent deletion vectors. The Puffin puffin reader/write handle adding the additional required framing (length prefix, magic, magic bytes, CRC-32) for `deletion-vector-v1` persistence.
//github.com/ bytes, CRC-32) is handled by the Puffin writer/reader layer, not
//github.com/ this class.
class ICEBERG_EXPORT RoaringPositionBitmap {
From 9509e077a3e88da7edecbd8a9049be4291e01412 Mon Sep 17 00:00:00 2001
From: Gang Wu
Date: Wed, 25 Mar 2026 21:07:36 +0800
Subject: [PATCH 5/5] fix precommit
---
src/iceberg/deletes/roaring_position_bitmap.h | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/src/iceberg/deletes/roaring_position_bitmap.h b/src/iceberg/deletes/roaring_position_bitmap.h
index 83a2d64ce..37a3fe14a 100644
--- a/src/iceberg/deletes/roaring_position_bitmap.h
+++ b/src/iceberg/deletes/roaring_position_bitmap.h
@@ -41,9 +41,10 @@ namespace iceberg {
//github.com/ significant 4 bytes. For each key, a 32-bit Roaring bitmap is
//github.com/ maintained to store positions for that key.
//github.com/
-//github.com/ \note This class is used to represent deletion vectors. The Puffin puffin reader/write handle adding the additional required framing (length prefix, magic, magic bytes, CRC-32) for `deletion-vector-v1` persistence.
-//github.com/ bytes, CRC-32) is handled by the Puffin writer/reader layer, not
-//github.com/ this class.
+
+//github.com/ \note This class is used to represent deletion vectors. The Puffin puffin
+//github.com/ reader/write handle adding the additional required framing (length prefix, magic,
+//github.com/ magic bytes, CRC-32) for `deletion-vector-v1` persistence.
class ICEBERG_EXPORT RoaringPositionBitmap {
public:
//github.com/ \brief Maximum supported position (aligned with the Java implementation).
pFad - Phonifier reborn
Pfad - The Proxy pFad © 2024 Your Company Name. All rights reserved.
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies:
Alternative Proxy
pFad Proxy
pFad v3 Proxy
pFad v4 Proxy