sparrow-ipc 0.3.0
Loading...
Searching...
No Matches
deserialize_run_end_encoded_array.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <cstddef>
4#include <optional>
5#include <string_view>
6#include <unordered_set>
7#include <vector>
8
9#include <sparrow/arrow_interface/arrow_array_schema_proxy.hpp>
10#include <sparrow/run_end_encoded_array.hpp>
11
16
17class array_deserializer;
18
19namespace sparrow_ipc
20{
37 [[nodiscard]] sparrow::run_end_encoded_array deserialize_run_end_encoded_array(
39 const field_descriptor& field_desc
40 )
41 {
42 ++context.node_index; // Consume one FieldNode for this run-end encoded array (parent)
43 constexpr size_t n_children = 2;
44
45 if (!field_desc.field.children() || field_desc.field.children()->size() != n_children)
46 {
47 throw std::runtime_error(
48 "Run-end encoded array field must have exactly 2 children (run ends and values)"
49 );
50 }
51
52 const auto* run_ends_field = field_desc.field.children()->Get(0);
53 if (!run_ends_field)
54 {
55 throw std::runtime_error("Run-end encoded array field has null run ends child.");
56 }
57
58 const auto* nodes = context.record_batch.nodes();
59 if (!nodes || context.node_index >= nodes->size())
60 {
61 throw std::runtime_error(
62 "Run-end encoded array: insufficient FieldNodes. Expected run_ends child node at index "
63 + std::to_string(context.node_index)
64 );
65 }
66
67 const auto* run_ends_node = nodes->Get(context.node_index);
68 if (!run_ends_node)
69 {
70 throw std::runtime_error("Run-end encoded array: null run_ends FieldNode.");
71 }
72
73 const int64_t encoded_length = run_ends_node->length();
74
75 std::optional<std::vector<sparrow::metadata_pair>> run_ends_metadata;
76 if (run_ends_field->custom_metadata())
77 {
78 run_ends_metadata = to_sparrow_metadata(*run_ends_field->custom_metadata());
79 }
80
81 deserialization_context run_ends_context(
82 context.record_batch,
83 context.body,
84 context.buffer_index,
85 context.node_index,
87 );
88
89 field_descriptor run_ends_desc(
90 encoded_length,
91 utils::get_fb_name(run_ends_field),
92 std::move(run_ends_metadata),
93 utils::get_sparrow_flags(*run_ends_field),
94 true,
95 *run_ends_field,
96 field_desc.dictionaries
97 );
98
99 sparrow::array run_ends_array = array_deserializer::deserialize(
100 run_ends_context,
101 run_ends_desc
102 );
103
104 // Deserialize the second child: encoded values
105 const auto* values_field = field_desc.field.children()->Get(1);
106 if (!values_field)
107 {
108 throw std::runtime_error("Run-end encoded array field has null values child.");
109 }
110
111 std::optional<std::vector<sparrow::metadata_pair>> values_metadata;
112 if (values_field->custom_metadata())
113 {
114 values_metadata = to_sparrow_metadata(*values_field->custom_metadata());
115 }
116
117 deserialization_context values_context(
118 context.record_batch,
119 context.body,
120 context.buffer_index,
121 context.node_index,
122 context.variadic_counts_idx
123 );
124
125 field_descriptor values_desc(
126 encoded_length, // Same encoded length as run ends
127 utils::get_fb_name(values_field),
128 std::move(values_metadata),
129 utils::get_sparrow_flags(*values_field),
130 true,
131 *values_field,
132 field_desc.dictionaries
133 );
134
135 sparrow::array values_array = array_deserializer::deserialize(
136 values_context,
137 values_desc
138 );
139
140 auto [run_ends_arrow_array, run_ends_arrow_schema] = sparrow::extract_arrow_structures(std::move(run_ends_array));
141 auto [values_arrow_array, values_arrow_schema] = sparrow::extract_arrow_structures(std::move(values_array));
142
143 auto** schema_children = new ArrowSchema*[n_children];
144 schema_children[0] = new ArrowSchema(std::move(run_ends_arrow_schema));
145 schema_children[1] = new ArrowSchema(std::move(values_arrow_schema));
146
147 const std::string_view format = "+r"; // TODO: Use sparrow::data_type_to_format(sparrow::data_type::RUN_ENCODED); when it will be available on sparrow
148 ArrowSchema schema = make_non_owning_arrow_schema(
149 format,
150 field_desc.name,
151 field_desc.metadata,
152 field_desc.flags,
153 n_children,
154 schema_children,
155 nullptr
156 );
157
158 auto** array_children = new ArrowArray*[n_children];
159 array_children[0] = new ArrowArray(std::move(run_ends_arrow_array));
160 array_children[1] = new ArrowArray(std::move(values_arrow_array));
161
162
164 field_desc.length,
165 0,
166 0,
167 n_children,
168 array_children,
169 nullptr,
170 std::vector<arrow_array_private_data::optionally_owned_buffer>{} // No buffers at parent level
171 );
172
173 sparrow::arrow_proxy ap{std::move(array), std::move(schema)};
174 return sparrow::run_end_encoded_array{std::move(ap)};
175 }
176}
std::string get_fb_name(const T *obj, const std::string &default_val="")
Safely extracts a string from a FlatBuffers object that has a name() method.
std::optional< std::unordered_set< sparrow::ArrowFlag > > get_sparrow_flags(const org::apache::arrow::flatbuf::Field &field)
Extract sparrow flags from FlatBuffers Field.
std::vector< sparrow::metadata_pair > to_sparrow_metadata(const ::flatbuffers::Vector<::flatbuffers::Offset< org::apache::arrow::flatbuf::KeyValue > > &metadata)
Converts FlatBuffers metadata to Sparrow metadata format.
sparrow::run_end_encoded_array deserialize_run_end_encoded_array(deserialization_context &context, const field_descriptor &field_desc)
Deserialize a run-end encoded array from IPC format.
ArrowSchema make_non_owning_arrow_schema(std::string_view format, std::string_view name, std::optional< M > metadata, std::optional< std::unordered_set< sparrow::ArrowFlag > > flags, size_t children_count, ArrowSchema **children, ArrowSchema *dictionary)
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, size_t children_count, ArrowArray **children, ArrowArray *dictionary, Arg &&private_data_arg)
Encapsulates the context required for deserialization.
const org::apache::arrow::flatbuf::RecordBatch & record_batch
Encapsulates the description of a field to be deserialized.
std::optional< std::vector< sparrow::metadata_pair > > metadata
const org::apache::arrow::flatbuf::Field & field
std::optional< std::unordered_set< sparrow::ArrowFlag > > flags