sparrow-ipc 0.3.0
Loading...
Searching...
No Matches
deserialize_variable_size_binary_view_array.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <span>
4#include <unordered_set>
5
6#include <sparrow/arrow_interface/arrow_array_schema_proxy.hpp>
7#include <sparrow/variable_size_binary_view_array.hpp>
8
13
14namespace sparrow_ipc
15{
16 template <typename T>
19 const field_descriptor& field_desc,
20 const int64_t data_buffers_size
21 )
22 {
23 const std::string_view format = sparrow::data_type_to_format(sparrow::detail::get_data_type_from_array<T>::get());
24
25 ArrowSchema schema = make_non_owning_arrow_schema(
26 format,
27 field_desc.name,
28 field_desc.metadata,
29 field_desc.flags,
30 0,
31 nullptr,
32 nullptr
33 );
34
35 const auto compression = context.record_batch.compression();
36 std::vector<arrow_array_private_data::optionally_owned_buffer> buffers;
37 const auto nb_buffers = data_buffers_size + 3;
38 buffers.reserve(nb_buffers);
39
40 {
41 auto validity_buffer_span = utils::get_buffer(context.record_batch, context.body, context.buffer_index);
42 auto views_buffer_span = utils::get_buffer(context.record_batch, context.body, context.buffer_index);
43
44 if (compression)
45 {
46 buffers.push_back(utils::get_decompressed_buffer(validity_buffer_span, compression));
47 buffers.push_back(utils::get_decompressed_buffer(views_buffer_span, compression));
48 }
49 else
50 {
51 buffers.push_back(std::move(validity_buffer_span));
52 buffers.push_back(std::move(views_buffer_span));
53 }
54 }
55
56 std::vector<int64_t> variadic_buffer_sizes;
57 variadic_buffer_sizes.reserve(data_buffers_size);
58
59 auto push_buffer = [&](auto&& buffer)
60 {
61 variadic_buffer_sizes.push_back(static_cast<int64_t>(buffer.size()));
62 buffers.push_back(std::forward<decltype(buffer)>(buffer));
63 };
64
65 for (auto i = 0; i < data_buffers_size; ++i)
66 {
67 auto data_buffer_span = utils::get_buffer(context.record_batch, context.body, context.buffer_index);
68
69 if (compression)
70 {
71 auto decompressed = utils::get_decompressed_buffer(data_buffer_span, compression);
72 std::visit(
73 [&](auto&& buf) { push_buffer(buf); },
74 std::move(decompressed));
75 }
76 else
77 {
78 push_buffer(data_buffer_span);
79 }
80 }
81
82 buffers.push_back(
83 sparrow::buffer<uint8_t>(
84 std::vector<uint8_t>(
85 reinterpret_cast<const uint8_t*>(variadic_buffer_sizes.data()),
86 reinterpret_cast<const uint8_t*>(variadic_buffer_sizes.data() + variadic_buffer_sizes.size())
87 ),
88 sparrow::buffer<uint8_t>::default_allocator{}
89 )
90 );
91
92 const auto null_count = std::visit(
93 [length = field_desc.length](const auto& arg) {
94 std::span<const uint8_t> span(arg.data(), arg.size());
95 return utils::get_bitmap_pointer_and_null_count(span, length).second;
96 },
97 buffers[0]
98 );
99
101 field_desc.length,
102 null_count,
103 0,
104 0,
105 nullptr,
106 nullptr,
107 std::move(buffers)
108 );
109
110 sparrow::arrow_proxy ap{std::move(array), std::move(schema)};
111 return T{std::move(ap)};
112 }
113}
std::span< const uint8_t > get_buffer(const org::apache::arrow::flatbuf::RecordBatch &record_batch, std::span< const uint8_t > body, size_t &buffer_index)
Extracts a buffer from a RecordBatch's body.
std::variant< sparrow::buffer< std::uint8_t >, std::span< const std::uint8_t > > get_decompressed_buffer(std::span< const uint8_t > buffer_span, const org::apache::arrow::flatbuf::BodyCompression *compression)
Retrieves a decompressed buffer or a view of the original buffer.
T deserialize_variable_size_binary_view_array(deserialization_context &context, const field_descriptor &field_desc, const int64_t data_buffers_size)
ArrowSchema make_non_owning_arrow_schema(std::string_view format, std::string_view name, std::optional< M > metadata, std::optional< std::unordered_set< sparrow::ArrowFlag > > flags, size_t children_count, ArrowSchema **children, ArrowSchema *dictionary)
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, size_t children_count, ArrowArray **children, ArrowArray *dictionary, Arg &&private_data_arg)
Encapsulates the context required for deserialization.
const org::apache::arrow::flatbuf::RecordBatch & record_batch
Encapsulates the description of a field to be deserialized.
std::optional< std::vector< sparrow::metadata_pair > > metadata
std::optional< std::unordered_set< sparrow::ArrowFlag > > flags