sparrow-ipc 0.3.0
Loading...
Searching...
No Matches
dictionary_tracker.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <cstdint>
4#include <set>
5#include <string>
6#include <unordered_map>
7#include <vector>
8
9#include <sparrow/record_batch.hpp>
10
12
13namespace sparrow_ipc
14{
19 {
20 int64_t id;
21 sparrow::record_batch data;
23 bool is_delta;
24 };
25
38 {
39 public:
50 [[nodiscard]] std::vector<dictionary_info> extract_dictionaries_from_batch(const sparrow::record_batch& batch);
51
61 void mark_emitted(int64_t id) noexcept;
62
69 [[nodiscard]] bool is_emitted(int64_t id) const noexcept;
70
76 void reset() noexcept;
77
78 private:
79 std::set<int64_t> m_emitted_dict_ids;
80 std::unordered_map<int64_t, std::string> m_dictionary_id_origins;
81 std::unordered_map<int64_t, std::size_t> m_emitted_dict_sizes;
82 std::unordered_map<int64_t, std::size_t> m_pending_dict_sizes;
83 };
84}
Tracks dictionaries during serialization.
void mark_emitted(int64_t id) noexcept
Mark a dictionary as emitted.
std::vector< dictionary_info > extract_dictionaries_from_batch(const sparrow::record_batch &batch)
Extract dictionaries from a record batch.
bool is_emitted(int64_t id) const noexcept
Check if a dictionary has been emitted.
void reset() noexcept
Reset tracking state.
#define SPARROW_IPC_API
Definition config.hpp:12
Information about a dictionary used for encoding.
bool is_delta
Whether this is a delta update.
sparrow::record_batch data
Dictionary values as a single-column record batch.
int64_t id
Dictionary identifier.
bool is_ordered
Whether dictionary values are ordered.