5#include <flatbuffers/flatbuffers.h>
7#include <sparrow/c_interface.hpp>
8#include <sparrow/record_batch.hpp>
10#include "File_generated.h"
11#include "Message_generated.h"
20 [[nodiscard]] std::pair<org::apache::arrow::flatbuf::Type, flatbuffers::Offset<void>>
22 flatbuffers::FlatBufferBuilder& builder,
23 std::string_view format_str,
24 const int32_t bitWidth
29 [[nodiscard]] std::pair<org::apache::arrow::flatbuf::Type, flatbuffers::Offset<void>>
31 flatbuffers::FlatBufferBuilder& builder,
32 std::string_view format_str,
33 const std::optional<std::unordered_set<sparrow::ArrowFlag>>& flags = std::nullopt
52 [[nodiscard]] flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>>
53 create_metadata(flatbuffers::FlatBufferBuilder& builder,
const ArrowSchema& arrow_schema);
76 [[nodiscard]] ::flatbuffers::Offset<org::apache::arrow::flatbuf::Field>
create_field(
77 flatbuffers::FlatBufferBuilder& builder,
78 const ArrowSchema& arrow_schema,
79 std::optional<std::string_view> name_override = std::nullopt,
80 std::optional<int64_t> dictionary_id_override = std::nullopt
103 [[nodiscard]] ::flatbuffers::Offset<
104 ::flatbuffers::Vector<::flatbuffers::Offset<org::apache::arrow::flatbuf::Field>>>
105 create_children(flatbuffers::FlatBufferBuilder& builder,
const sparrow::record_batch& record_batch);
125 [[nodiscard]] ::flatbuffers::Offset<
126 ::flatbuffers::Vector<::flatbuffers::Offset<org::apache::arrow::flatbuf::Field>>>
127 create_children(flatbuffers::FlatBufferBuilder& builder,
const ArrowSchema& arrow_schema);
145 [[nodiscard]] flatbuffers::FlatBufferBuilder
165 const sparrow::arrow_proxy& arrow_proxy,
166 std::vector<org::apache::arrow::flatbuf::FieldNode>& nodes
180 [[nodiscard]] std::vector<org::apache::arrow::flatbuf::FieldNode>
187 template <
typename Func>
189 const sparrow::arrow_proxy& arrow_proxy,
190 std::vector<org::apache::arrow::flatbuf::Buffer>& flatbuf_buffers,
192 Func&& get_buffer_size
195 const auto& buffers = arrow_proxy.buffers();
197 std::ranges::for_each(buffers | std::views::take(nb_buffers),
198 [&](
const auto& buffer)
200 int64_t size = get_buffer_size(buffer);
201 flatbuf_buffers.emplace_back(offset, size);
205 for (
const auto& child : arrow_proxy.children())
211 template <
typename Func>
212 std::vector<org::apache::arrow::flatbuf::Buffer>
215 std::vector<org::apache::arrow::flatbuf::Buffer> buffers;
217 for (
const auto& column : record_batch.columns())
219 const auto& arrow_proxy = sparrow::detail::array_access::get_arrow_proxy(column);
220 fill_buffers_func(arrow_proxy, buffers, offset);
244 const sparrow::arrow_proxy& arrow_proxy,
245 std::vector<org::apache::arrow::flatbuf::Buffer>& flatbuf_buffers,
263 [[nodiscard]] std::vector<org::apache::arrow::flatbuf::Buffer>
280 const sparrow::arrow_proxy& arrow_proxy,
281 std::vector<org::apache::arrow::flatbuf::Buffer>& flatbuf_compressed_buffers,
302 const sparrow::record_batch& record_batch,
322 const sparrow::arrow_proxy& arrow_proxy,
323 std::optional<CompressionType> compression = std::nullopt,
324 std::optional<std::reference_wrapper<CompressionCache>> cache = std::nullopt
342 const sparrow::record_batch& record_batch,
343 std::optional<CompressionType> compression = std::nullopt,
344 std::optional<std::reference_wrapper<CompressionCache>> cache = std::nullopt
367 const sparrow::record_batch& record_batch,
368 std::optional<CompressionType> compression = std::nullopt,
369 std::optional<std::reference_wrapper<CompressionCache>> cache = std::nullopt
398 int64_t dictionary_id,
399 const sparrow::record_batch& record_batch,
400 bool is_delta =
false,
401 std::optional<CompressionType> compression = std::nullopt,
402 std::optional<std::reference_wrapper<CompressionCache>> cache = std::nullopt
std::vector< org::apache::arrow::flatbuf::Buffer > get_buffers_impl(const sparrow::record_batch &record_batch, Func &&fill_buffers_func)
void fill_buffers_impl(const sparrow::arrow_proxy &arrow_proxy, std::vector< org::apache::arrow::flatbuf::Buffer > &flatbuf_buffers, int64_t &offset, Func &&get_buffer_size)
std::size_t get_nb_buffers_to_process(const std::string_view &format, const std::size_t orig_buffers_size)
constexpr size_t align_to_8(const size_t n)
flatbuffers::Offset< flatbuffers::Vector< flatbuffers::Offset< org::apache::arrow::flatbuf::KeyValue > > > create_metadata(flatbuffers::FlatBufferBuilder &builder, const ArrowSchema &arrow_schema)
Creates a FlatBuffers vector of KeyValue pairs from ArrowSchema metadata.
std::pair< org::apache::arrow::flatbuf::Type, flatbuffers::Offset< void > > get_flatbuffer_type(flatbuffers::FlatBufferBuilder &builder, std::string_view format_str, const std::optional< std::unordered_set< sparrow::ArrowFlag > > &flags=std::nullopt)
flatbuffers::FlatBufferBuilder get_dictionary_batch_message_builder(int64_t dictionary_id, const sparrow::record_batch &record_batch, bool is_delta=false, std::optional< CompressionType > compression=std::nullopt, std::optional< std::reference_wrapper< CompressionCache > > cache=std::nullopt)
Creates a FlatBuffer message for a dictionary batch.
std::vector< org::apache::arrow::flatbuf::Buffer > get_buffers(const sparrow::record_batch &record_batch)
Extracts buffer information from a record batch for serialization.
void fill_fieldnodes(const sparrow::arrow_proxy &arrow_proxy, std::vector< org::apache::arrow::flatbuf::FieldNode > &nodes)
Recursively fills a vector of FieldNode objects from an arrow_proxy and its children.
flatbuffers::FlatBufferBuilder get_record_batch_message_builder(const sparrow::record_batch &record_batch, std::optional< CompressionType > compression=std::nullopt, std::optional< std::reference_wrapper< CompressionCache > > cache=std::nullopt)
Creates a FlatBuffer message containing a serialized Apache Arrow RecordBatch.
int64_t calculate_body_size(const sparrow::arrow_proxy &arrow_proxy, std::optional< CompressionType > compression=std::nullopt, std::optional< std::reference_wrapper< CompressionCache > > cache=std::nullopt)
Calculates the total aligned size in bytes of all buffers in an Arrow array structure.
void fill_compressed_buffers(const sparrow::arrow_proxy &arrow_proxy, std::vector< org::apache::arrow::flatbuf::Buffer > &flatbuf_compressed_buffers, int64_t &offset, const CompressionType compression_type, CompressionCache &cache)
Recursively populates a vector with compressed buffer metadata from an Arrow proxy.
SPARROW_IPC_API const org::apache::arrow::flatbuf::Footer * get_footer_from_file_data(std::span< const uint8_t > file_data)
std::vector< org::apache::arrow::flatbuf::Buffer > get_compressed_buffers(const sparrow::record_batch &record_batch, const CompressionType compression_type, CompressionCache &cache)
Retrieves metadata describing the layout of compressed buffers within a record batch.
std::pair< org::apache::arrow::flatbuf::Type, flatbuffers::Offset< void > > get_flatbuffer_decimal_type(flatbuffers::FlatBufferBuilder &builder, std::string_view format_str, const int32_t bitWidth)
flatbuffers::FlatBufferBuilder get_schema_message_builder(const sparrow::record_batch &record_batch)
Creates a FlatBuffer builder containing a serialized Arrow schema message.
void fill_buffers(const sparrow::arrow_proxy &arrow_proxy, std::vector< org::apache::arrow::flatbuf::Buffer > &flatbuf_buffers, int64_t &offset)
Recursively fills a vector of FlatBuffer Buffer objects with buffer information from an Arrow proxy.
std::vector< org::apache::arrow::flatbuf::FieldNode > create_fieldnodes(const sparrow::record_batch &record_batch)
Creates a vector of Apache Arrow FieldNode objects from a record batch.
::flatbuffers::Offset< ::flatbuffers::Vector<::flatbuffers::Offset< org::apache::arrow::flatbuf::Field > > > create_children(flatbuffers::FlatBufferBuilder &builder, const sparrow::record_batch &record_batch)
Creates a FlatBuffers vector of Field objects from a record batch.
::flatbuffers::Offset< org::apache::arrow::flatbuf::Field > create_field(flatbuffers::FlatBufferBuilder &builder, const ArrowSchema &arrow_schema, std::optional< std::string_view > name_override=std::nullopt, std::optional< int64_t > dictionary_id_override=std::nullopt)
Creates a FlatBuffer Field object from an ArrowSchema.