sparrow-ipc 0.3.0
Loading...
Searching...
No Matches
deserialize_array_impl.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <cstdint>
4#include <optional>
5#include <span>
6#include <string>
7#include <string_view>
8#include <unordered_set>
9#include <vector>
10
11#include <sparrow/arrow_interface/arrow_array_schema_proxy.hpp>
12
17
19{
35 template <template<typename...> class ArrayType, typename T>
36 [[nodiscard]] ArrayType<T> deserialize_simple_array(
38 const field_descriptor& field_desc,
39 std::optional<std::string> format_override = std::nullopt
40 )
41 {
42 const std::string_view format = format_override.has_value()
43 ? *format_override
44 : sparrow::data_type_to_format(sparrow::detail::get_data_type_from_array<ArrayType<T>>::get());
45
46 ArrowSchema schema = make_non_owning_arrow_schema(
47 format,
48 field_desc.name,
49 field_desc.metadata,
50 field_desc.flags,
51 0,
52 nullptr,
53 nullptr
54 );
55
56 const auto compression = context.record_batch.compression();
57 std::vector<arrow_array_private_data::optionally_owned_buffer> buffers;
58 constexpr auto nb_buffers = 2;
59 buffers.reserve(nb_buffers);
60 {
61 auto validity_buffer_span = utils::get_buffer(context.record_batch, context.body, context.buffer_index);
62 auto data_buffer_span = utils::get_buffer(context.record_batch, context.body, context.buffer_index);
63
64 if (compression)
65 {
66 buffers.push_back(utils::get_decompressed_buffer(validity_buffer_span, compression));
67 buffers.push_back(utils::get_decompressed_buffer(data_buffer_span, compression));
68 }
69 else
70 {
71 buffers.push_back(std::move(validity_buffer_span));
72 buffers.push_back(std::move(data_buffer_span));
73 }
74 }
75
76 const auto null_count = std::visit(
77 [length = field_desc.length](const auto& arg) {
78 std::span<const uint8_t> span(arg.data(), arg.size());
79 return utils::get_bitmap_pointer_and_null_count(span, length).second;
80 },
81 buffers[0]
82 );
83
85 field_desc.length,
86 null_count,
87 0,
88 0,
89 nullptr,
90 nullptr,
91 std::move(buffers)
92 );
93
94 sparrow::arrow_proxy ap{std::move(array), std::move(schema)};
95 return ArrayType<T>{std::move(ap)};
96 }
97}
ArrayType< T > deserialize_simple_array(deserialization_context &context, const field_descriptor &field_desc, std::optional< std::string > format_override=std::nullopt)
Generic implementation for deserializing non-owning arrays with simple layout.
std::span< const uint8_t > get_buffer(const org::apache::arrow::flatbuf::RecordBatch &record_batch, std::span< const uint8_t > body, size_t &buffer_index)
Extracts a buffer from a RecordBatch's body.
std::variant< sparrow::buffer< std::uint8_t >, std::span< const std::uint8_t > > get_decompressed_buffer(std::span< const uint8_t > buffer_span, const org::apache::arrow::flatbuf::BodyCompression *compression)
Retrieves a decompressed buffer or a view of the original buffer.
ArrowSchema make_non_owning_arrow_schema(std::string_view format, std::string_view name, std::optional< M > metadata, std::optional< std::unordered_set< sparrow::ArrowFlag > > flags, size_t children_count, ArrowSchema **children, ArrowSchema *dictionary)
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, size_t children_count, ArrowArray **children, ArrowArray *dictionary, Arg &&private_data_arg)
Encapsulates the context required for deserialization.
const org::apache::arrow::flatbuf::RecordBatch & record_batch
Encapsulates the description of a field to be deserialized.
std::optional< std::vector< sparrow::metadata_pair > > metadata
std::optional< std::unordered_set< sparrow::ArrowFlag > > flags