sparrow-ipc 0.2.0
Loading...
Searching...
No Matches
deserialize_variable_size_binary_view_array.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <span>
4#include <unordered_set>
5
6#include <sparrow/arrow_interface/arrow_array_schema_proxy.hpp>
7#include <sparrow/variable_size_binary_view_array.hpp>
8
9#include "Message_generated.h"
13
14namespace sparrow_ipc
15{
16 template <typename T>
18 const org::apache::arrow::flatbuf::RecordBatch& record_batch,
19 std::span<const uint8_t> body,
20 std::string_view name,
21 const std::optional<std::vector<sparrow::metadata_pair>>& metadata,
22 bool nullable,
23 size_t& buffer_index,
24 const int64_t data_buffers_size
25 )
26 {
27 // TODO Use the commented line below instead of the following snippet when this is handled/added in sparrow
28 // const std::string_view format = data_type_to_format(sparrow::detail::get_data_type_from_array<T>::get());
29 std::string format;
30 if (sparrow::detail::get_data_type_from_array<T>::get() == sparrow::data_type::STRING_VIEW)
31 {
32 format = "vu";
33 }
34 else if (sparrow::detail::get_data_type_from_array<T>::get() == sparrow::data_type::BINARY_VIEW)
35 {
36 format = "vz";
37 }
38 else
39 {
40 throw std::runtime_error("Unsupported view type");
41 }
42
43 // Set up flags based on nullable
44 std::optional<std::unordered_set<sparrow::ArrowFlag>> flags;
45 if (nullable)
46 {
47 flags = std::unordered_set<sparrow::ArrowFlag>{sparrow::ArrowFlag::NULLABLE};
48 }
49
50 ArrowSchema schema = make_non_owning_arrow_schema(
51 format,
52 name.data(),
53 metadata,
54 flags,
55 0,
56 nullptr,
57 nullptr
58 );
59
60 const auto compression = record_batch.compression();
61 std::vector<arrow_array_private_data::optionally_owned_buffer> buffers;
62
63 auto validity_buffer_span = utils::get_buffer(record_batch, body, buffer_index);
64 auto views_buffer_span = utils::get_buffer(record_batch, body, buffer_index);
65
66 if (compression)
67 {
68 buffers.push_back(utils::get_decompressed_buffer(validity_buffer_span, compression));
69 buffers.push_back(utils::get_decompressed_buffer(views_buffer_span, compression));
70 }
71 else
72 {
73 buffers.push_back(validity_buffer_span);
74 buffers.push_back(views_buffer_span);
75 }
76
77 // If no data buffers are present, we still need to push an empty data buffer to have things valid in sparrow
78 if (data_buffers_size == 0)
79 {
80 buffers.push_back(arrow_array_private_data::optionally_owned_buffer(std::span<const uint8_t>{}));
81 }
82
83 for (auto i = 0; i < data_buffers_size; ++i)
84 {
85 auto data_buffer_span =
86 utils::get_buffer(record_batch, body, buffer_index);
87
88 if (compression)
89 {
90 buffers.push_back(
91 utils::get_decompressed_buffer(data_buffer_span, compression)
92 );
93 }
94 else
95 {
96 buffers.push_back(data_buffer_span);
97 }
98 }
99
100 const auto [bitmap_ptr, null_count] = utils::get_bitmap_pointer_and_null_count(validity_buffer_span, record_batch.length());
101
103 record_batch.length(),
104 null_count,
105 0, // n_children
106 0, // n_dictionaries
107 nullptr, // children
108 nullptr, // dictionary
109 std::move(buffers)
110 );
111
112 sparrow::arrow_proxy ap{std::move(array), std::move(schema)};
113 return T{std::move(ap)};
114 }
115}
std::variant< sparrow::buffer< uint8_t >, std::span< const uint8_t > > optionally_owned_buffer
std::span< const uint8_t > get_buffer(const org::apache::arrow::flatbuf::RecordBatch &record_batch, std::span< const uint8_t > body, size_t &buffer_index)
Extracts a buffer from a RecordBatch's body.
std::variant< sparrow::buffer< std::uint8_t >, std::span< const std::uint8_t > > get_decompressed_buffer(std::span< const uint8_t > buffer_span, const org::apache::arrow::flatbuf::BodyCompression *compression)
Retrieves a decompressed buffer or a view of the original buffer.
std::pair< std::uint8_t *, int64_t > get_bitmap_pointer_and_null_count(std::span< const uint8_t > validity_buffer_span, const int64_t length)
Extracts bitmap pointer and null count from a validity buffer span.
ArrowSchema make_non_owning_arrow_schema(std::string_view format, const char *name, std::optional< M > metadata, std::optional< std::unordered_set< sparrow::ArrowFlag > > flags, size_t children_count, ArrowSchema **children, ArrowSchema *dictionary)
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, size_t children_count, ArrowArray **children, ArrowArray *dictionary, Arg &&private_data_arg)
T deserialize_variable_size_binary_view_array(const org::apache::arrow::flatbuf::RecordBatch &record_batch, std::span< const uint8_t > body, std::string_view name, const std::optional< std::vector< sparrow::metadata_pair > > &metadata, bool nullable, size_t &buffer_index, const int64_t data_buffers_size)