sparrow-ipc 0.3.0
Loading...
Searching...
No Matches
serialize_utils.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <ranges>
4#include <vector>
5
6#include <sparrow/record_batch.hpp>
7
11#include "sparrow_ipc/utils.hpp"
12
13namespace sparrow_ipc
14{
26 serialize_schema_message(const sparrow::record_batch& record_batch, any_output_stream& stream);
27
41 [[nodiscard]] SPARROW_IPC_API std::size_t
42 calculate_schema_message_size(const sparrow::record_batch& record_batch);
43
61 [[nodiscard]] SPARROW_IPC_API std::size_t
62 calculate_record_batch_message_size(const sparrow::record_batch& record_batch,
63 std::optional<CompressionType> compression = std::nullopt,
64 std::optional<std::reference_wrapper<CompressionCache>> cache = std::nullopt);
65
85 [[nodiscard]] SPARROW_IPC_API std::size_t
87 int64_t dictionary_id,
88 const sparrow::record_batch& record_batch,
89 bool is_delta,
90 std::optional<CompressionType> compression = std::nullopt,
91 std::optional<std::reference_wrapper<CompressionCache>> cache = std::nullopt
92 );
93
108 template <std::ranges::input_range R>
109 requires std::same_as<std::ranges::range_value_t<R>, sparrow::record_batch>
110 [[nodiscard]] std::size_t calculate_total_serialized_size(const R& record_batches,
111 std::optional<CompressionType> compression = std::nullopt,
112 std::optional<std::reference_wrapper<CompressionCache>> cache = std::nullopt)
113 {
114 if (record_batches.empty())
115 {
116 return 0;
117 }
118
119 if (!utils::check_record_batches_consistency(record_batches))
120 {
121 throw std::invalid_argument("Record batches have inconsistent schemas");
122 }
123
124 // Calculate schema message size (only once)
125 auto it = std::ranges::begin(record_batches);
126 std::size_t total_size = calculate_schema_message_size(*it);
127
128 // Calculate record batch message sizes
129 for (const auto& record_batch : record_batches)
130 {
131 total_size += calculate_record_batch_message_size(record_batch, compression, cache);
132 }
133
134 return total_size;
135 }
136
156 SPARROW_IPC_API void fill_body(const sparrow::arrow_proxy& arrow_proxy, any_output_stream& stream,
157 std::optional<CompressionType> compression = std::nullopt,
158 std::optional<std::reference_wrapper<CompressionCache>> cache = std::nullopt);
159
173 SPARROW_IPC_API void generate_body(const sparrow::record_batch& record_batch, any_output_stream& stream,
174 std::optional<CompressionType> compression = std::nullopt,
175 std::optional<std::reference_wrapper<CompressionCache>> cache = std::nullopt);
176
177 SPARROW_IPC_API std::vector<sparrow::data_type> get_column_dtypes(const sparrow::record_batch& rb);
178}
Type-erased wrapper for any stream-like object.
#define SPARROW_IPC_API
Definition config.hpp:12
bool check_record_batches_consistency(const R &record_batches)
Checks if all record batches in a collection have consistent structure.
Definition utils.hpp:116
SPARROW_IPC_API void generate_body(const sparrow::record_batch &record_batch, any_output_stream &stream, std::optional< CompressionType > compression=std::nullopt, std::optional< std::reference_wrapper< CompressionCache > > cache=std::nullopt)
Generates a serialized body from a record batch.
SPARROW_IPC_API void fill_body(const sparrow::arrow_proxy &arrow_proxy, any_output_stream &stream, std::optional< CompressionType > compression=std::nullopt, std::optional< std::reference_wrapper< CompressionCache > > cache=std::nullopt)
Fills the body vector with serialized data from an arrow proxy and its children.
SPARROW_IPC_API void serialize_schema_message(const sparrow::record_batch &record_batch, any_output_stream &stream)
Serializes a schema message for a record batch into a byte buffer.
SPARROW_IPC_API std::size_t calculate_dictionary_batch_message_size(int64_t dictionary_id, const sparrow::record_batch &record_batch, bool is_delta, std::optional< CompressionType > compression=std::nullopt, std::optional< std::reference_wrapper< CompressionCache > > cache=std::nullopt)
Calculates the total serialized size of a dictionary batch message.
SPARROW_IPC_API std::size_t calculate_record_batch_message_size(const sparrow::record_batch &record_batch, std::optional< CompressionType > compression=std::nullopt, std::optional< std::reference_wrapper< CompressionCache > > cache=std::nullopt)
Calculates the total serialized size of a record batch message.
std::size_t calculate_total_serialized_size(const R &record_batches, std::optional< CompressionType > compression=std::nullopt, std::optional< std::reference_wrapper< CompressionCache > > cache=std::nullopt)
Calculates the total serialized size for a collection of record batches.
SPARROW_IPC_API std::size_t calculate_schema_message_size(const sparrow::record_batch &record_batch)
Calculates the total serialized size of a schema message.
SPARROW_IPC_API std::vector< sparrow::data_type > get_column_dtypes(const sparrow::record_batch &rb)