sparrow-ipc 0.2.0
Loading...
Searching...
No Matches
flatbuffer_utils.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <ranges>
4
5#include <flatbuffers/flatbuffers.h>
6#include <Message_generated.h>
7
8#include <sparrow/c_interface.hpp>
9#include <sparrow/record_batch.hpp>
10
11#include "File_generated.h"
13#include "sparrow_ipc/utils.hpp"
14
15namespace sparrow_ipc
16{
17 // Creates a Flatbuffers Decimal type from a format string
18 // The format string is expected to be in the format "d:precision,scale"
19 [[nodiscard]] std::pair<org::apache::arrow::flatbuf::Type, flatbuffers::Offset<void>>
21 flatbuffers::FlatBufferBuilder& builder,
22 std::string_view format_str,
23 const int32_t bitWidth
24 );
25
26 // Creates a Flatbuffers type from a format string
27 // This function maps a sparrow data type to the corresponding Flatbuffers type
28 [[nodiscard]] std::pair<org::apache::arrow::flatbuf::Type, flatbuffers::Offset<void>>
29 get_flatbuffer_type(flatbuffers::FlatBufferBuilder& builder, std::string_view format_str);
30
47 [[nodiscard]] flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>>
48 create_metadata(flatbuffers::FlatBufferBuilder& builder, const ArrowSchema& arrow_schema);
49
71 [[nodiscard]] ::flatbuffers::Offset<org::apache::arrow::flatbuf::Field> create_field(
72 flatbuffers::FlatBufferBuilder& builder,
73 const ArrowSchema& arrow_schema,
74 std::optional<std::string_view> name_override = std::nullopt
75 );
76
97 [[nodiscard]] ::flatbuffers::Offset<
98 ::flatbuffers::Vector<::flatbuffers::Offset<org::apache::arrow::flatbuf::Field>>>
99 create_children(flatbuffers::FlatBufferBuilder& builder, const sparrow::record_batch& record_batch);
100
101
119 [[nodiscard]] ::flatbuffers::Offset<
120 ::flatbuffers::Vector<::flatbuffers::Offset<org::apache::arrow::flatbuf::Field>>>
121 create_children(flatbuffers::FlatBufferBuilder& builder, const ArrowSchema& arrow_schema);
122
139 [[nodiscard]] flatbuffers::FlatBufferBuilder
140 get_schema_message_builder(const sparrow::record_batch& record_batch);
141
159 const sparrow::arrow_proxy& arrow_proxy,
160 std::vector<org::apache::arrow::flatbuf::FieldNode>& nodes
161 );
162
174 [[nodiscard]] std::vector<org::apache::arrow::flatbuf::FieldNode>
175 create_fieldnodes(const sparrow::record_batch& record_batch);
176
177 namespace details
178 {
179 std::size_t get_nb_buffers_to_process(const std::string_view& format, const std::size_t orig_buffers_size);
180
181 template <typename Func>
183 const sparrow::arrow_proxy& arrow_proxy,
184 std::vector<org::apache::arrow::flatbuf::Buffer>& flatbuf_buffers,
185 int64_t& offset,
186 Func&& get_buffer_size
187 )
188 {
189 const auto& buffers = arrow_proxy.buffers();
190 auto nb_buffers = get_nb_buffers_to_process(arrow_proxy.schema().format, buffers.size());
191 std::ranges::for_each(buffers | std::views::take(nb_buffers),
192 [&](const auto& buffer)
193 {
194 int64_t size = get_buffer_size(buffer);
195 flatbuf_buffers.emplace_back(offset, size);
196 offset += utils::align_to_8(size);
197 });
198
199 for (const auto& child : arrow_proxy.children())
200 {
201 fill_buffers_impl(child, flatbuf_buffers, offset, get_buffer_size);
202 }
203 }
204
205 template <typename Func>
206 std::vector<org::apache::arrow::flatbuf::Buffer>
207 get_buffers_impl(const sparrow::record_batch& record_batch, Func&& fill_buffers_func)
208 {
209 std::vector<org::apache::arrow::flatbuf::Buffer> buffers;
210 int64_t offset = 0;
211 for (const auto& column : record_batch.columns())
212 {
213 const auto& arrow_proxy = sparrow::detail::array_access::get_arrow_proxy(column);
214 fill_buffers_func(arrow_proxy, buffers, offset);
215 }
216 return buffers;
217 }
218 } // namespace details
219
238 const sparrow::arrow_proxy& arrow_proxy,
239 std::vector<org::apache::arrow::flatbuf::Buffer>& flatbuf_buffers,
240 int64_t& offset
241 );
242
257 [[nodiscard]] std::vector<org::apache::arrow::flatbuf::Buffer>
258 get_buffers(const sparrow::record_batch& record_batch);
259
274 const sparrow::arrow_proxy& arrow_proxy,
275 std::vector<org::apache::arrow::flatbuf::Buffer>& flatbuf_compressed_buffers,
276 int64_t& offset,
277 const CompressionType compression_type,
278 CompressionCache& cache
279 );
280
295 [[nodiscard]] std::vector<org::apache::arrow::flatbuf::Buffer> get_compressed_buffers(
296 const sparrow::record_batch& record_batch,
297 const CompressionType compression_type,
298 CompressionCache& cache
299 );
300
315 [[nodiscard]] int64_t calculate_body_size(
316 const sparrow::arrow_proxy& arrow_proxy,
317 std::optional<CompressionType> compression = std::nullopt,
318 std::optional<std::reference_wrapper<CompressionCache>> cache = std::nullopt
319 );
320
335 [[nodiscard]] int64_t calculate_body_size(
336 const sparrow::record_batch& record_batch,
337 std::optional<CompressionType> compression = std::nullopt,
338 std::optional<std::reference_wrapper<CompressionCache>> cache = std::nullopt
339 );
340
360 [[nodiscard]] flatbuffers::FlatBufferBuilder get_record_batch_message_builder(
361 const sparrow::record_batch& record_batch,
362 std::optional<CompressionType> compression = std::nullopt,
363 std::optional<std::reference_wrapper<CompressionCache>> cache = std::nullopt
364 );
365
366 // Helper function to extract and parse the footer from Arrow IPC file data
367 [[nodiscard]] SPARROW_IPC_API const org::apache::arrow::flatbuf::Footer* get_footer_from_file_data(std::span<const uint8_t> file_data);
368}
#define SPARROW_IPC_API
Definition config.hpp:12
std::vector< org::apache::arrow::flatbuf::Buffer > get_buffers_impl(const sparrow::record_batch &record_batch, Func &&fill_buffers_func)
void fill_buffers_impl(const sparrow::arrow_proxy &arrow_proxy, std::vector< org::apache::arrow::flatbuf::Buffer > &flatbuf_buffers, int64_t &offset, Func &&get_buffer_size)
std::size_t get_nb_buffers_to_process(const std::string_view &format, const std::size_t orig_buffers_size)
constexpr size_t align_to_8(const size_t n)
Definition utils.hpp:14
flatbuffers::Offset< flatbuffers::Vector< flatbuffers::Offset< org::apache::arrow::flatbuf::KeyValue > > > create_metadata(flatbuffers::FlatBufferBuilder &builder, const ArrowSchema &arrow_schema)
Creates a FlatBuffers vector of KeyValue pairs from ArrowSchema metadata.
::flatbuffers::Offset< org::apache::arrow::flatbuf::Field > create_field(flatbuffers::FlatBufferBuilder &builder, const ArrowSchema &arrow_schema, std::optional< std::string_view > name_override=std::nullopt)
Creates a FlatBuffer Field object from an ArrowSchema.
std::vector< org::apache::arrow::flatbuf::Buffer > get_buffers(const sparrow::record_batch &record_batch)
Extracts buffer information from a record batch for serialization.
void fill_fieldnodes(const sparrow::arrow_proxy &arrow_proxy, std::vector< org::apache::arrow::flatbuf::FieldNode > &nodes)
Recursively fills a vector of FieldNode objects from an arrow_proxy and its children.
flatbuffers::FlatBufferBuilder get_record_batch_message_builder(const sparrow::record_batch &record_batch, std::optional< CompressionType > compression=std::nullopt, std::optional< std::reference_wrapper< CompressionCache > > cache=std::nullopt)
Creates a FlatBuffer message containing a serialized Apache Arrow RecordBatch.
int64_t calculate_body_size(const sparrow::arrow_proxy &arrow_proxy, std::optional< CompressionType > compression=std::nullopt, std::optional< std::reference_wrapper< CompressionCache > > cache=std::nullopt)
Calculates the total aligned size in bytes of all buffers in an Arrow array structure.
void fill_compressed_buffers(const sparrow::arrow_proxy &arrow_proxy, std::vector< org::apache::arrow::flatbuf::Buffer > &flatbuf_compressed_buffers, int64_t &offset, const CompressionType compression_type, CompressionCache &cache)
Recursively populates a vector with compressed buffer metadata from an Arrow proxy.
SPARROW_IPC_API const org::apache::arrow::flatbuf::Footer * get_footer_from_file_data(std::span< const uint8_t > file_data)
std::vector< org::apache::arrow::flatbuf::Buffer > get_compressed_buffers(const sparrow::record_batch &record_batch, const CompressionType compression_type, CompressionCache &cache)
Retrieves metadata describing the layout of compressed buffers within a record batch.
std::pair< org::apache::arrow::flatbuf::Type, flatbuffers::Offset< void > > get_flatbuffer_decimal_type(flatbuffers::FlatBufferBuilder &builder, std::string_view format_str, const int32_t bitWidth)
flatbuffers::FlatBufferBuilder get_schema_message_builder(const sparrow::record_batch &record_batch)
Creates a FlatBuffer builder containing a serialized Arrow schema message.
void fill_buffers(const sparrow::arrow_proxy &arrow_proxy, std::vector< org::apache::arrow::flatbuf::Buffer > &flatbuf_buffers, int64_t &offset)
Recursively fills a vector of FlatBuffer Buffer objects with buffer information from an Arrow proxy.
std::pair< org::apache::arrow::flatbuf::Type, flatbuffers::Offset< void > > get_flatbuffer_type(flatbuffers::FlatBufferBuilder &builder, std::string_view format_str)
std::vector< org::apache::arrow::flatbuf::FieldNode > create_fieldnodes(const sparrow::record_batch &record_batch)
Creates a vector of Apache Arrow FieldNode objects from a record batch.
::flatbuffers::Offset< ::flatbuffers::Vector<::flatbuffers::Offset< org::apache::arrow::flatbuf::Field > > > create_children(flatbuffers::FlatBufferBuilder &builder, const sparrow::record_batch &record_batch)
Creates a FlatBuffers vector of Field objects from a record batch.