sparrow 2.4.0
C++20 idiomatic APIs for the Apache Arrow Columnar Format
Loading...
Searching...
No Matches
list_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <limits>
18#include <ranges>
19#include <stdexcept>
20#include <string> // for std::stoull
21#include <type_traits>
22#include <utility>
23#include <vector>
24
25#include "sparrow/array_api.hpp"
38
39namespace sparrow
40{
41 template <class DERIVED>
43
44 template <bool BIG>
45 class list_array_impl;
46
47 template <bool BIG>
49
50 namespace copy_tracker
51 {
52 template <typename T>
53 requires std::same_as<T, list_array_impl<false>> || std::same_as<T, list_array_impl<true>>
54 std::string key()
55 {
56 return "list_array";
57 }
58
59 template <typename T>
60 requires std::same_as<T, list_view_array_impl<false>> || std::same_as<T, list_view_array_impl<true>>
61 std::string key()
62 {
63 return "list_view_array";
64 }
65 }
66
89
104
106
107 namespace copy_tracker
108 {
109 template <>
110 inline std::string key<fixed_sized_list_array>()
111 {
112 return "fixed_sized_list_array";
113 }
114 }
115
116
120 template <class T>
121 constexpr bool is_list_array_v = std::same_as<T, list_array>;
122
126 template <class T>
127 constexpr bool is_big_list_array_v = std::same_as<T, big_list_array>;
128
132 template <class T>
133 constexpr bool is_list_view_array_v = std::same_as<T, list_view_array>;
134
138 template <class T>
139 constexpr bool is_big_list_view_array_v = std::same_as<T, big_list_view_array>;
140
144 template <class T>
145 constexpr bool is_fixed_sized_list_array_v = std::same_as<T, fixed_sized_list_array>;
146
147 namespace detail
148 {
149 template <bool BIG>
151 {
152 [[nodiscard]] static constexpr sparrow::data_type get()
153 {
155 }
156 };
157
158 template <bool BIG>
160 {
161 [[nodiscard]] static constexpr sparrow::data_type get()
162 {
164 }
165 };
166
167 template <>
169 {
170 [[nodiscard]] static constexpr sparrow::data_type get()
171 {
173 }
174 };
175
176 // Helper to build arrow schema for list arrays
177 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
179 std::string format,
180 ArrowSchema&& flat_schema,
181 std::optional<std::string_view> name,
182 std::optional<METADATA_RANGE> metadata,
183 bool nullable
184 )
185 {
187 std::optional<std::unordered_set<ArrowFlag>>
188 flags = nullable ? std::optional<std::unordered_set<ArrowFlag>>{{ArrowFlag::NULLABLE}}
189 : std::nullopt;
190
191 ArrowSchema** child_schemas = new ArrowSchema*[1];
192 child_schemas[0] = new ArrowSchema(std::move(flat_schema));
193
194 return make_arrow_schema(
195 std::move(format),
196 name,
197 metadata,
198 flags,
199 child_schemas,
201 nullptr, // dictionary
202 true // dictionary ownership
203 );
204 }
205
206 // Helper to build arrow array for list arrays
208 std::int64_t size,
209 std::int64_t null_count,
210 std::vector<buffer<std::uint8_t>>&& arr_buffs,
211 ArrowArray&& flat_arr
212 )
213 {
215
216 ArrowArray** child_arrays = new ArrowArray*[1];
217 child_arrays[0] = new ArrowArray(std::move(flat_arr));
218 return make_arrow_array(
219 size,
220 null_count,
221 0, // offset
222 std::move(arr_buffs),
223 child_arrays,
225 nullptr, // dictionary
226 true // dictionary ownership
227 );
228 }
229 }
230
231 template <bool BIG>
244
245 template <bool BIG>
258
259 template <>
272
290 template <class DERIVED>
292 {
293 public:
294
298 using value_iterator = typename inner_types::value_iterator;
299 using const_value_iterator = typename inner_types::const_value_iterator;
301
303 using bitmap_reference = typename base_type::bitmap_reference;
305
307
309 using inner_reference = typename inner_types::inner_reference;
310 using inner_const_reference = typename inner_types::inner_const_reference;
311
316
324 [[nodiscard]] constexpr const array* raw_flat_array() const;
325
333 [[nodiscard]] constexpr array* raw_flat_array();
334
335 protected:
336
346
357
370
371 constexpr list_array_crtp_base(self_type&&) noexcept = default;
372 constexpr list_array_crtp_base& operator=(self_type&&) noexcept = default;
373
374 protected:
375
388 constexpr void throw_if_sliced_for_mutation(const char* operation) const;
389
390 [[nodiscard]] constexpr value_iterator value_begin();
391 [[nodiscard]] constexpr value_iterator value_end();
392 [[nodiscard]] constexpr const_value_iterator value_cbegin() const;
393 [[nodiscard]] constexpr const_value_iterator value_cend() const;
394
395 // Inserts `count` copies of value's flat elements into the flat array at flat_pos.
396 // No-op if value.size() == 0.
397 constexpr void insert_flat_elements(size_type flat_pos, const list_value& value, size_type count);
398
399 // Erases flat_count consecutive flat elements starting at flat_begin.
400 // No-op if flat_count == 0.
401 constexpr void erase_flat_elements(size_type flat_begin, size_type flat_count);
402
403 private:
404
405 using list_size_type = inner_types::list_size_type;
406
407 constexpr void resize_values(size_type new_length, const list_value& value);
408
409 template <std::input_iterator InputIt>
410 requires std::convertible_to<typename std::iterator_traits<InputIt>::value_type, list_value>
411 constexpr value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last);
412
413 [[nodiscard]] constexpr inner_reference value(size_type i);
414 [[nodiscard]] constexpr inner_const_reference value(size_type i) const;
415
416 [[nodiscard]] array make_flat_array();
417
418 // data members
419 array p_flat_array;
420
421 // friend classes
422 friend class array_crtp_base<DERIVED>;
423 friend class mutable_array_base<DERIVED>;
424 friend class list_reference<DERIVED>;
425
426 // needs access to this->value(i)
427 friend class detail::layout_value_functor<DERIVED, inner_reference>;
428 friend class detail::layout_value_functor<const DERIVED, inner_const_reference>;
429 };
430
431 template <bool BIG>
433 {
434 public:
435
439 using list_size_type = inner_types::list_size_type;
443 using offset_type = std::conditional_t<BIG, const std::int64_t, const std::int32_t>;
445
458
468 constexpr list_array_impl(const self_type&);
469
482
483 constexpr list_array_impl(self_type&&) noexcept = default;
484 constexpr list_array_impl& operator=(self_type&&) noexcept = default;
485
486 constexpr void slice_inplace(size_type start, size_type end);
487
499 template <class... ARGS>
500 requires(mpl::excludes_copy_and_move_ctor_v<list_array_impl<BIG>, ARGS...>)
501 explicit list_array_impl(ARGS&&... args)
502 : self_type(create_proxy(std::forward<ARGS>(args)...))
503 {
504 }
505
523 template <std::ranges::range SIZES_RANGE>
524 [[nodiscard]] static constexpr auto offset_from_sizes(SIZES_RANGE&& sizes) -> offset_buffer_type;
525
526 private:
527
547 template <
549 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
550 [[nodiscard]] static arrow_proxy create_proxy(
551 array&& flat_values,
552 offset_buffer_type&& list_offsets,
553 VB&& validity_input,
554 std::optional<std::string_view> name = std::nullopt,
555 std::optional<METADATA_RANGE> metadata = std::nullopt
556 );
557
576 template <
578 std::ranges::input_range OFFSET_BUFFER_RANGE,
579 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
580 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
581 [[nodiscard]] static arrow_proxy create_proxy(
582 array&& flat_values,
583 OFFSET_BUFFER_RANGE&& list_offsets_range,
584 VB&& validity_input,
585 std::optional<std::string_view> name = std::nullopt,
586 std::optional<METADATA_RANGE> metadata = std::nullopt
587 )
588 {
589 offset_buffer_type list_offsets{std::forward<OFFSET_BUFFER_RANGE>(list_offsets_range)};
590 return list_array_impl<BIG>::create_proxy(
591 std::move(flat_values),
592 std::move(list_offsets),
593 std::forward<VB>(validity_input),
594 std::forward<std::optional<std::string_view>>(name),
595 std::forward<std::optional<METADATA_RANGE>>(metadata)
596 );
597 }
598
599 template <
600 validity_bitmap_input VB = validity_bitmap,
601 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
602 [[nodiscard]] static arrow_proxy create_proxy(
603 array&& flat_values,
604 offset_buffer_type&& list_offsets,
605 bool nullable = true,
606 std::optional<std::string_view> name = std::nullopt,
607 std::optional<METADATA_RANGE> metadata = std::nullopt
608 );
609
610 template <
611 validity_bitmap_input VB = validity_bitmap,
612 std::ranges::input_range OFFSET_BUFFER_RANGE,
613 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
614 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
615 [[nodiscard]] static arrow_proxy create_proxy(
616 array&& flat_values,
617 OFFSET_BUFFER_RANGE&& list_offsets_range,
618 bool nullable = true,
619 std::optional<std::string_view> name = std::nullopt,
620 std::optional<METADATA_RANGE> metadata = std::nullopt
621 )
622 {
623 offset_buffer_type list_offsets{std::forward<OFFSET_BUFFER_RANGE>(list_offsets_range)};
624 return list_array_impl<BIG>::create_proxy(
625 std::move(flat_values),
626 std::move(list_offsets),
627 nullable,
628 std::forward<std::optional<std::string_view>>(name),
629 std::forward<std::optional<METADATA_RANGE>>(metadata)
630 );
631 }
632
633 static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
634 [[nodiscard]] constexpr std::pair<offset_type, offset_type> offset_range(size_type i) const;
635
636 [[nodiscard]] constexpr offset_type* make_list_offsets();
637
638 void replace_value(size_type index, const list_value& value);
639
640 constexpr value_iterator
641 insert_value(const_value_iterator pos, const list_value& value, size_type count);
642
643 constexpr value_iterator erase_values(const_value_iterator pos, size_type count);
644
645 offset_type* p_list_offsets;
646
647 // friend classes
648 friend class array_crtp_base<self_type>;
649 friend class mutable_array_base<self_type>;
650 friend class list_array_crtp_base<self_type>;
651 template <class L>
652 friend class list_reference;
653 };
654
655 template <bool BIG>
656 class list_view_array_impl final : public list_array_crtp_base<list_view_array_impl<BIG>>
657 {
658 public:
659
663 using list_size_type = inner_types::list_size_type;
667 using offset_type = std::conditional_t<BIG, const std::int64_t, const std::int32_t>;
670
683
694
707
708 constexpr list_view_array_impl(self_type&&) = default;
709 constexpr list_view_array_impl& operator=(self_type&&) = default;
710
711 constexpr void slice_inplace(size_type start, size_type end);
712
725 template <class... ARGS>
727 list_view_array_impl(ARGS&&... args)
728 : self_type(create_proxy(std::forward<ARGS>(args)...))
729 {
730 }
731
732 private:
733
757 template <
758 std::ranges::input_range OFFSET_BUFFER_RANGE,
759 std::ranges::input_range SIZE_RANGE,
761 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
762 requires(
763 std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
764 && std::convertible_to<std::ranges::range_value_t<SIZE_RANGE>, list_size_type>
765 )
766 [[nodiscard]] static arrow_proxy create_proxy(
767 array&& flat_values,
768 OFFSET_BUFFER_RANGE&& list_offsets,
769 SIZE_RANGE&& list_sizes,
770 VB&& validity_input,
771 std::optional<std::string_view> name = std::nullopt,
772 std::optional<METADATA_RANGE> metadata = std::nullopt
773 )
774 {
775 return list_view_array_impl<BIG>::create_proxy(
776 std::move(flat_values),
777 offset_buffer_type(std::forward<OFFSET_BUFFER_RANGE>(list_offsets)),
778 size_buffer_type(std::forward<SIZE_RANGE>(list_sizes)),
779 std::forward<VB>(validity_input),
780 name,
781 metadata
782 );
783 }
784
785 template <
786 validity_bitmap_input VB = validity_bitmap,
787 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
788 [[nodiscard]] static arrow_proxy create_proxy(
789 array&& flat_values,
790 offset_buffer_type&& list_offsets,
791 size_buffer_type&& list_sizes,
792 VB&& validity_input,
793 std::optional<std::string_view> name = std::nullopt,
794 std::optional<METADATA_RANGE> metadata = std::nullopt
795 );
796
797 template <
798 std::ranges::input_range OFFSET_BUFFER_RANGE,
799 std::ranges::input_range SIZE_RANGE,
800 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
801 requires(
802 std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
803 && std::convertible_to<std::ranges::range_value_t<SIZE_RANGE>, list_size_type>
804 )
805 [[nodiscard]] static arrow_proxy create_proxy(
806 array&& flat_values,
807 OFFSET_BUFFER_RANGE&& list_offsets,
808 SIZE_RANGE&& list_sizes,
809 bool nullable = true,
810 std::optional<std::string_view> name = std::nullopt,
811 std::optional<METADATA_RANGE> metadata = std::nullopt
812 )
813 {
814 return list_view_array_impl<BIG>::create_proxy(
815 std::move(flat_values),
816 offset_buffer_type(std::forward<OFFSET_BUFFER_RANGE>(list_offsets)),
817 size_buffer_type(std::forward<SIZE_RANGE>(list_sizes)),
818 nullable,
819 name,
820 metadata
821 );
822 }
823
824 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
825 [[nodiscard]] static arrow_proxy create_proxy(
826 array&& flat_values,
827 offset_buffer_type&& list_offsets,
828 size_buffer_type&& list_sizes,
829 bool nullable = true,
830 std::optional<std::string_view> name = std::nullopt,
831 std::optional<METADATA_RANGE> metadata = std::nullopt
832 );
833
834 static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
835 static constexpr std::size_t SIZES_BUFFER_INDEX = 2;
836 [[nodiscard]] constexpr std::pair<offset_type, offset_type> offset_range(size_type i) const;
837
838 [[nodiscard]] constexpr offset_type* make_list_offsets() const;
839 [[nodiscard]] constexpr const list_size_type* make_list_sizes() const;
840
841 void replace_value(size_type index, const list_value& value);
842
843 constexpr value_iterator
844 insert_value(const_value_iterator pos, const list_value& value, size_type count);
845
846 template <std::input_iterator InputIt>
847 requires std::convertible_to<typename std::iterator_traits<InputIt>::value_type, list_value>
848 constexpr value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last);
849
850 constexpr value_iterator erase_values(const_value_iterator pos, size_type count);
851
852 offset_type* p_list_offsets;
853 const list_size_type* p_list_sizes;
854
855 // friend classes
856 friend class array_crtp_base<self_type>;
857 friend class mutable_array_base<self_type>;
858 friend class list_array_crtp_base<self_type>;
859 template <class L>
860 friend class list_reference;
861 };
862
863 class fixed_sized_list_array final : public list_array_crtp_base<fixed_sized_list_array>
864 {
865 public:
866
870 using list_size_type = inner_types::list_size_type;
874 using offset_type = std::uint64_t;
875
887 explicit fixed_sized_list_array(arrow_proxy proxy);
888
891
894
906 template <class... ARGS>
909 : self_type(create_proxy(std::forward<ARGS>(args)...))
910 {
911 }
912
913 private:
914
933 template <
935 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
936 [[nodiscard]] static arrow_proxy create_proxy(
937 std::uint64_t list_size,
938 array&& flat_values,
939 R&& validity_input,
940 std::optional<std::string_view> name = std::nullopt,
941 std::optional<METADATA_RANGE> metadata = std::nullopt
942 );
943
962 template <
964 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
965 [[nodiscard]] static arrow_proxy create_proxy(
966 std::uint64_t list_size,
967 array&& flat_values,
968 bool nullable = true,
969 std::optional<std::string_view> name = std::nullopt,
970 std::optional<METADATA_RANGE> metadata = std::nullopt
971 );
972
985 [[nodiscard]] static uint64_t list_size_from_format(const std::string_view format);
986
997 [[nodiscard]] constexpr std::pair<offset_type, offset_type> offset_range(size_type i) const;
998
999 [[nodiscard]] constexpr size_type flat_element_count(size_type list_count) const;
1000
1001 void replace_value(size_type index, const list_value& value);
1002
1003 value_iterator insert_value(const_value_iterator pos, const list_value& value, size_type count);
1004
1005 value_iterator erase_values(const_value_iterator pos, size_type count);
1006
1007 uint64_t m_list_size;
1008
1009 // friend classes
1010 friend class array_crtp_base<self_type>;
1011 friend class mutable_array_base<self_type>;
1012 friend class list_array_crtp_base<self_type>;
1013 template <class L>
1014 friend class list_reference;
1015 };
1016
1017 /***************************************
1018 * list_array_crtp_base implementation *
1019 ***************************************/
1020
1021 template <class DERIVED>
1023 : base_type(std::move(proxy))
1024 , p_flat_array(make_flat_array())
1025 {
1026 }
1027
1028 template <class DERIVED>
1030 : base_type(rhs)
1031 , p_flat_array(make_flat_array())
1032 {
1033 }
1034
1035 template <class DERIVED>
1037 {
1039 p_flat_array = make_flat_array();
1040 return *this;
1041 }
1042
1043 template <class DERIVED>
1045 {
1046 return &p_flat_array;
1047 }
1048
1049 template <class DERIVED>
1051 {
1052 return &p_flat_array;
1053 }
1054
1055 template <class DERIVED>
1056 constexpr void list_array_crtp_base<DERIVED>::throw_if_sliced_for_mutation(const char* operation) const
1057 {
1058 if (this->get_arrow_proxy().offset() != 0)
1059 {
1060 throw std::logic_error(std::string(operation) + " does not support sliced arrays (non-zero offset)");
1061 }
1062 }
1063
1064 template <class DERIVED>
1069
1070 template <class DERIVED>
1072 {
1073 return value_iterator(
1075 this->size()
1076 );
1077 }
1078
1079 template <class DERIVED>
1087
1088 template <class DERIVED>
1090 {
1091 return const_value_iterator(
1093 this->size()
1094 );
1095 }
1096
1097 template <class DERIVED>
1098 constexpr auto list_array_crtp_base<DERIVED>::value(size_type i) -> inner_reference
1099 {
1100 return inner_reference(&this->derived_cast(), i);
1101 }
1102
1103 template <class DERIVED>
1104 constexpr auto list_array_crtp_base<DERIVED>::value(size_type i) const -> inner_const_reference
1105 {
1106 const auto r = this->derived_cast().offset_range(i);
1107 using st = typename list_value::size_type;
1108 return list_value{&p_flat_array, static_cast<st>(r.first), static_cast<st>(r.second)};
1109 }
1110
1111 template <class DERIVED>
1112 array list_array_crtp_base<DERIVED>::make_flat_array()
1113 {
1114 auto& child_proxy = this->get_arrow_proxy().children()[0];
1115 return array{&child_proxy.array(), &child_proxy.schema()};
1116 }
1117
1118 template <class DERIVED>
1119 constexpr void
1121 {
1122 if (value.size() == 0)
1123 {
1124 return;
1125 }
1126 SPARROW_ASSERT_TRUE(value.flat_array() != nullptr);
1127 array& flat_array = *raw_flat_array();
1128 flat_array.insert(
1129 flat_array.cbegin() + static_cast<std::ptrdiff_t>(flat_pos),
1130 value.flat_array()->cbegin() + static_cast<std::ptrdiff_t>(value.begin_index()),
1131 value.flat_array()->cbegin() + static_cast<std::ptrdiff_t>(value.end_index()),
1132 count
1133 );
1134 }
1135
1136 template <class DERIVED>
1137 constexpr void
1139 {
1140 if (flat_count == 0)
1141 {
1142 return;
1143 }
1144 array& flat_array = *raw_flat_array();
1145 flat_array.erase(
1146 flat_array.cbegin() + static_cast<std::ptrdiff_t>(flat_begin),
1147 flat_array.cbegin() + static_cast<std::ptrdiff_t>(flat_begin + flat_count)
1148 );
1149 }
1150
1151 template <class DERIVED>
1152 constexpr void list_array_crtp_base<DERIVED>::resize_values(size_type new_length, const list_value& value)
1153 {
1154 DERIVED& derived = static_cast<DERIVED&>(*this);
1155 const size_type n = this->size();
1156 if (new_length < n)
1157 {
1158 derived.erase_values(
1159 sparrow::next(this->value_cbegin(), static_cast<std::ptrdiff_t>(new_length)),
1160 n - new_length
1161 );
1162 }
1163 else if (new_length > n)
1164 {
1165 derived.insert_value(this->value_cend(), value, new_length - n);
1166 }
1167 }
1168
1169 template <class DERIVED>
1170 template <std::input_iterator InputIt>
1171 requires std::convertible_to<typename std::iterator_traits<InputIt>::value_type, list_value>
1172 constexpr auto
1173 list_array_crtp_base<DERIVED>::insert_values(const_value_iterator pos, InputIt first, InputIt last)
1175 {
1176 DERIVED& derived = static_cast<DERIVED&>(*this);
1177 const auto idx = static_cast<size_type>(std::distance(this->value_cbegin(), pos));
1178 size_type count = 0;
1179 for (auto it = first; it != last; ++it, ++count)
1180 {
1181 auto cur_pos = sparrow::next(this->value_cbegin(), static_cast<std::ptrdiff_t>(idx + count));
1182 derived.insert_value(cur_pos, *it, 1);
1183 }
1184 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1185 }
1186
1187 /**********************************
1188 * list_array_impl implementation *
1189 **********************************/
1190
1191#ifdef __GNUC__
1192# pragma GCC diagnostic push
1193# pragma GCC diagnostic ignored "-Wcast-align"
1194#endif
1195
1196 template <bool BIG>
1198 : base_type(std::move(proxy))
1199 , p_list_offsets(make_list_offsets())
1200 {
1201 }
1202
1203 template <bool BIG>
1204 template <std::ranges::range SIZES_RANGE>
1206 {
1208 std::forward<SIZES_RANGE>(sizes)
1209 );
1210 }
1211
1212 template <bool BIG>
1213 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
1214 arrow_proxy list_array_impl<BIG>::create_proxy(
1215 array&& flat_values,
1216 offset_buffer_type&& list_offsets,
1217 VB&& validity_input,
1218 std::optional<std::string_view> name,
1219 std::optional<METADATA_RANGE> metadata
1220 )
1221 {
1222 const auto size = list_offsets.size() - 1;
1223 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
1224 const auto null_count = vbitmap.null_count();
1225
1226 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1227
1229 BIG ? std::string("+L") : std::string("+l"),
1230 std::move(flat_schema),
1231 name,
1232 metadata,
1233 true // nullable
1234 );
1235
1236 std::vector<buffer<std::uint8_t>> arr_buffs;
1237 arr_buffs.reserve(2);
1238 arr_buffs.emplace_back(std::move(vbitmap).extract_storage());
1239 arr_buffs.emplace_back(std::move(list_offsets).extract_storage());
1240
1242 static_cast<std::int64_t>(size),
1243 static_cast<std::int64_t>(null_count),
1244 std::move(arr_buffs),
1245 std::move(flat_arr)
1246 );
1247
1248 return arrow_proxy{std::move(arr), std::move(schema)};
1249 }
1250
1251 template <bool BIG>
1252 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
1253 arrow_proxy list_array_impl<BIG>::create_proxy(
1254 array&& flat_values,
1255 offset_buffer_type&& list_offsets,
1256 bool nullable,
1257 std::optional<std::string_view> name,
1258 std::optional<METADATA_RANGE> metadata
1259 )
1260 {
1261 if (nullable)
1262 {
1263 return list_array_impl<BIG>::create_proxy(
1264 std::move(flat_values),
1265 std::move(list_offsets),
1267 name,
1268 metadata
1269 );
1270 }
1271
1272 const auto size = list_offsets.size() - 1;
1273 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1274
1275 ArrowSchema schema = detail::make_list_arrow_schema(
1276 BIG ? std::string("+L") : std::string("+l"),
1277 std::move(flat_schema),
1278 name,
1279 metadata,
1280 false // not nullable
1281 );
1282
1283 std::vector<buffer<std::uint8_t>> arr_buffs;
1284 arr_buffs.reserve(2);
1285 arr_buffs.emplace_back(nullptr, 0, buffer<std::uint8_t>::default_allocator()); // no validity bitmap
1286 arr_buffs.emplace_back(std::move(list_offsets).extract_storage());
1287
1288 ArrowArray arr = detail::make_list_arrow_array(
1289 static_cast<std::int64_t>(size),
1290 0, // null_count
1291 std::move(arr_buffs),
1292 std::move(flat_arr)
1293 );
1294
1295 return arrow_proxy{std::move(arr), std::move(schema)};
1296 }
1297
1298 template <bool BIG>
1300 : base_type(rhs)
1301 , p_list_offsets(make_list_offsets())
1302 {
1304 }
1305
1306 template <bool BIG>
1308 {
1310 if (this != &rhs)
1311 {
1313 p_list_offsets = make_list_offsets();
1314 }
1315 return *this;
1316 }
1317
1318 template <bool BIG>
1320 {
1321 base_type::slice_inplace(start, end);
1322 p_list_offsets = make_list_offsets();
1323 }
1324
1325 template <bool BIG>
1326 constexpr auto list_array_impl<BIG>::offset_range(size_type i) const -> std::pair<offset_type, offset_type>
1327 {
1328 return std::make_pair(p_list_offsets[i], p_list_offsets[i + 1]);
1329 }
1330
1331 template <bool BIG>
1332 constexpr auto list_array_impl<BIG>::make_list_offsets() -> offset_type*
1333 {
1334 return this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].template data<offset_type>()
1335 + static_cast<size_type>(this->get_arrow_proxy().offset());
1336 }
1337
1338 template <bool BIG>
1339 constexpr auto
1340 list_array_impl<BIG>::insert_value(const_value_iterator pos, const list_value& value, size_type count)
1341 -> value_iterator
1342 {
1343 using mutable_offset_type = std::remove_const_t<offset_type>;
1344 const auto idx = static_cast<size_type>(std::distance(this->value_cbegin(), pos));
1345 auto& proxy = this->get_arrow_proxy();
1346
1347 this->throw_if_sliced_for_mutation("list_array_impl::insert_value");
1348
1349 auto& offset_buffer = proxy.get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
1350 auto offset_adaptor = make_buffer_adaptor<mutable_offset_type>(offset_buffer);
1351 const size_type n = offset_adaptor.size() - 1;
1352
1353 // Insert flat elements: count copies of value's slice
1354 const auto flat_insert_pos = static_cast<size_type>(p_list_offsets[idx]);
1355 this->insert_flat_elements(flat_insert_pos, value, count);
1356
1357 // Update offset buffer
1358 const auto value_size = value.size();
1359 const auto count_size = count;
1360 SPARROW_ASSERT_TRUE(std::in_range<mutable_offset_type>(value_size));
1361 SPARROW_ASSERT_TRUE(std::in_range<mutable_offset_type>(count_size));
1362 const auto val_sz = static_cast<mutable_offset_type>(value_size);
1363 const auto count_mt = static_cast<mutable_offset_type>(count_size);
1364 const auto max_offset = std::numeric_limits<mutable_offset_type>::max();
1365 // Check multiplication overflow for total_delta = count * val_sz
1366 if (val_sz != mutable_offset_type{})
1367 {
1368 SPARROW_ASSERT_TRUE(count_mt <= max_offset / val_sz);
1369 }
1370 const mutable_offset_type total_delta = count_mt * val_sz;
1371 // Existing offsets should fit in the representable range once delta is applied
1372 const mutable_offset_type existing_max = offset_adaptor[n];
1373 SPARROW_ASSERT_TRUE(existing_max <= max_offset - total_delta);
1374
1375 offset_adaptor.resize(n + 1 + count, mutable_offset_type{});
1376
1377 // Shift existing entries [idx+1..n] to [idx+count+1..n+count], adding delta
1378 for (size_type i = n; i > idx; --i)
1379 {
1380 offset_adaptor[i + count] = offset_adaptor[i] + total_delta;
1381 }
1382
1383 const auto flat_insert_offset = static_cast<mutable_offset_type>(flat_insert_pos);
1384 SPARROW_ASSERT_TRUE(flat_insert_offset <= max_offset - total_delta);
1385 for (size_type k = 1; k <= count; ++k)
1386 {
1387 offset_adaptor[idx + k] = flat_insert_offset + static_cast<mutable_offset_type>(k) * val_sz;
1388 }
1389
1390 proxy.update_buffers();
1391 p_list_offsets = make_list_offsets();
1392 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1393 }
1394
1395 template <bool BIG>
1396 constexpr auto list_array_impl<BIG>::erase_values(const_value_iterator pos, size_type count)
1397 -> value_iterator
1398 {
1399 using mutable_offset_type = std::remove_const_t<offset_type>;
1400 const size_type idx = static_cast<size_type>(std::distance(this->value_cbegin(), pos));
1401 const size_type n = this->size();
1402 auto& proxy = this->get_arrow_proxy();
1403
1404 if (count == 0)
1405 {
1406 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1407 }
1408
1409 this->throw_if_sliced_for_mutation("list_array_impl::erase_values");
1410
1411 // Erase flat elements
1412 const mutable_offset_type flat_erase_begin = p_list_offsets[idx];
1413 const mutable_offset_type flat_erase_end = p_list_offsets[idx + count];
1414 const mutable_offset_type flat_erase_count_val = flat_erase_end - flat_erase_begin;
1415
1416 this->erase_flat_elements(
1417 static_cast<size_type>(flat_erase_begin),
1418 static_cast<size_type>(flat_erase_count_val)
1419 );
1420
1421 // Update offset buffer: remove count entries, shift remaining down
1422 auto& offset_buffer = proxy.get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
1423 auto offset_adaptor = make_buffer_adaptor<mutable_offset_type>(offset_buffer);
1424 std::copy(
1425 offset_adaptor.begin() + static_cast<std::ptrdiff_t>(idx + count + 1),
1426 offset_adaptor.begin() + static_cast<std::ptrdiff_t>(n + 1),
1427 offset_adaptor.begin() + static_cast<std::ptrdiff_t>(idx + 1)
1428 );
1429 auto delta_begin = offset_adaptor.begin() + static_cast<std::ptrdiff_t>(idx + 1);
1430 auto delta_end = offset_adaptor.begin() + static_cast<std::ptrdiff_t>(n + 1 - count);
1431 std::transform(
1432 delta_begin,
1433 delta_end,
1434 delta_begin,
1435 [flat_erase_count_val](mutable_offset_type v)
1436 {
1437 return v - flat_erase_count_val;
1438 }
1439 );
1440 offset_adaptor.resize(n + 1 - count);
1441
1442 proxy.update_buffers();
1443 p_list_offsets = make_list_offsets();
1444 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1445 }
1446
1447 template <bool BIG>
1448 void list_array_impl<BIG>::replace_value(size_type index, const list_value& value)
1449 {
1450 using mutable_offset_type = std::remove_const_t<offset_type>;
1451
1452 this->throw_if_sliced_for_mutation("list_array_impl::replace_value");
1453 const size_type new_size = value.size();
1454 SPARROW_ASSERT_TRUE(std::in_range<mutable_offset_type>(new_size));
1455
1456 const size_type n = this->size();
1457 const auto [flat_begin, flat_end] = offset_range(index);
1458 const auto old_size = static_cast<size_type>(flat_end - flat_begin);
1459 const auto old_size_mt = static_cast<mutable_offset_type>(old_size);
1460 const auto new_size_mt = static_cast<mutable_offset_type>(new_size);
1461
1462 array source_owner;
1463 const array* source = value.flat_array();
1464 if (source != nullptr && this->raw_flat_array() == source)
1465 {
1466 source_owner = *source;
1467 source = &source_owner;
1468 }
1469
1470 if (old_size == new_size)
1471 {
1472 if (new_size == 0)
1473 {
1474 return;
1475 }
1476 // Same element count: overwrite flat data in-place, no offset adjustment needed.
1477 SPARROW_ASSERT_TRUE(source != nullptr);
1478 const auto flat_begin_index = static_cast<size_type>(flat_begin);
1479 this->erase_flat_elements(flat_begin_index, old_size);
1480 this->insert_flat_elements(
1481 flat_begin_index,
1482 list_value{source, value.begin_index(), value.end_index()},
1483 1
1484 );
1485 auto& proxy_eq = this->get_arrow_proxy();
1486 proxy_eq.update_buffers();
1487 p_list_offsets = make_list_offsets();
1488 return;
1489 }
1490
1491 this->erase_flat_elements(static_cast<size_type>(flat_begin), old_size);
1492 if (new_size > 0)
1493 {
1494 SPARROW_ASSERT_TRUE(source != nullptr);
1495 this->insert_flat_elements(
1496 static_cast<size_type>(flat_begin),
1497 list_value{source, value.begin_index(), value.end_index()},
1498 1
1499 );
1500 }
1501
1502 auto& proxy = this->get_arrow_proxy();
1503 auto& offset_buffer = proxy.get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
1504 auto offset_adaptor = make_buffer_adaptor<mutable_offset_type>(offset_buffer);
1505
1506 // delta != 0 here (equal-size case already returned above)
1507 auto tail_begin = offset_adaptor.begin() + static_cast<std::ptrdiff_t>(index + 1);
1508 auto tail_end = offset_adaptor.begin() + static_cast<std::ptrdiff_t>(n + 1);
1509 if (new_size_mt > old_size_mt)
1510 {
1511 const mutable_offset_type delta = new_size_mt - old_size_mt;
1512 const auto max_offset = std::numeric_limits<mutable_offset_type>::max();
1513 SPARROW_ASSERT_TRUE(offset_adaptor[n] <= max_offset - delta);
1514 std::transform(
1515 tail_begin,
1516 tail_end,
1517 tail_begin,
1518 [delta](mutable_offset_type v)
1519 {
1520 return v + delta;
1521 }
1522 );
1523 }
1524 else
1525 {
1526 const mutable_offset_type delta = old_size_mt - new_size_mt;
1527 std::transform(
1528 tail_begin,
1529 tail_end,
1530 tail_begin,
1531 [delta](mutable_offset_type v)
1532 {
1533 return v - delta;
1534 }
1535 );
1536 }
1537
1538 proxy.update_buffers();
1539 p_list_offsets = make_list_offsets();
1540 }
1541
1542 /***************************************
1543 * list_view_array_impl implementation *
1544 ***************************************/
1545
1546 template <bool BIG>
1548 : base_type(std::move(proxy))
1549 , p_list_offsets(make_list_offsets())
1550 , p_list_sizes(make_list_sizes())
1551 {
1552 }
1553
1554 template <bool BIG>
1555 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
1556 arrow_proxy list_view_array_impl<BIG>::create_proxy(
1557 array&& flat_values,
1558 offset_buffer_type&& list_offsets,
1559 size_buffer_type&& list_sizes,
1560 VB&& validity_input,
1561 std::optional<std::string_view> name,
1562 std::optional<METADATA_RANGE> metadata
1563 )
1564 {
1565 SPARROW_ASSERT(list_offsets.size() == list_sizes.size(), "sizes and offset must have the same size");
1566 const auto size = list_sizes.size();
1567 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
1568 const auto null_count = vbitmap.null_count();
1569
1570 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1571
1573 BIG ? std::string("+vL") : std::string("+vl"),
1574 std::move(flat_schema),
1575 name,
1576 metadata,
1577 true // nullable
1578 );
1579
1580 std::vector<buffer<std::uint8_t>> arr_buffs;
1581 arr_buffs.reserve(3);
1582 arr_buffs.emplace_back(std::move(vbitmap).extract_storage());
1583 arr_buffs.emplace_back(std::move(list_offsets).extract_storage());
1584 arr_buffs.emplace_back(std::move(list_sizes).extract_storage());
1585
1587 static_cast<std::int64_t>(size),
1588 static_cast<std::int64_t>(null_count),
1589 std::move(arr_buffs),
1590 std::move(flat_arr)
1591 );
1592
1593 return arrow_proxy{std::move(arr), std::move(schema)};
1594 }
1595
1596 template <bool BIG>
1597 template <input_metadata_container METADATA_RANGE>
1598 arrow_proxy list_view_array_impl<BIG>::create_proxy(
1599 array&& flat_values,
1600 offset_buffer_type&& list_offsets,
1601 size_buffer_type&& list_sizes,
1602 bool nullable,
1603 std::optional<std::string_view> name,
1604 std::optional<METADATA_RANGE> metadata
1605 )
1606 {
1607 if (nullable)
1608 {
1609 return list_view_array_impl<BIG>::create_proxy(
1610 std::move(flat_values),
1611 std::move(list_offsets),
1612 std::move(list_sizes),
1614 name,
1615 metadata
1616 );
1617 }
1618
1619 SPARROW_ASSERT(list_offsets.size() == list_sizes.size(), "sizes and offset must have the same size");
1620 const auto size = list_sizes.size();
1621 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1622
1623 ArrowSchema schema = detail::make_list_arrow_schema(
1624 BIG ? std::string("+vL") : std::string("+vl"),
1625 std::move(flat_schema),
1626 name,
1627 metadata,
1628 false // not nullable
1629 );
1630
1631 std::vector<buffer<std::uint8_t>> arr_buffs;
1632 arr_buffs.reserve(3);
1633 arr_buffs.emplace_back(nullptr, 0, buffer<std::uint8_t>::default_allocator()); // no validity bitmap
1634 arr_buffs.emplace_back(std::move(list_offsets).extract_storage());
1635 arr_buffs.emplace_back(std::move(list_sizes).extract_storage());
1636
1637 ArrowArray arr = detail::make_list_arrow_array(
1638 static_cast<std::int64_t>(size),
1639 0, // null_count
1640 std::move(arr_buffs),
1641 std::move(flat_arr)
1642 );
1643
1644 return arrow_proxy{std::move(arr), std::move(schema)};
1645 }
1646
1647 template <bool BIG>
1649 : base_type(rhs)
1650 , p_list_offsets(make_list_offsets())
1651 , p_list_sizes(make_list_sizes())
1652 {
1654 }
1655
1656 template <bool BIG>
1658 {
1660 if (this != &rhs)
1661 {
1663 p_list_offsets = make_list_offsets();
1664 p_list_sizes = make_list_sizes();
1665 }
1666 return *this;
1667 }
1668
1669 template <bool BIG>
1671 {
1672 base_type::slice_inplace(start, end);
1673 p_list_offsets = make_list_offsets();
1674 p_list_sizes = make_list_sizes();
1675 }
1676
1677 template <bool BIG>
1678 inline constexpr auto list_view_array_impl<BIG>::offset_range(size_type i) const
1679 -> std::pair<offset_type, offset_type>
1680 {
1681 const auto offset = p_list_offsets[i];
1682 SPARROW_ASSERT_TRUE(std::in_range<std::remove_const_t<offset_type>>(p_list_sizes[i]));
1683 return std::make_pair(offset, offset + static_cast<offset_type>(p_list_sizes[i]));
1684 }
1685
1686 template <bool BIG>
1687 constexpr auto list_view_array_impl<BIG>::make_list_offsets() const -> offset_type*
1688 {
1689 return this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].template data<offset_type>()
1690 + static_cast<size_type>(this->get_arrow_proxy().offset());
1691 }
1692
1693 template <bool BIG>
1694 constexpr auto list_view_array_impl<BIG>::make_list_sizes() const -> const list_size_type*
1695 {
1696 return this->get_arrow_proxy().buffers()[SIZES_BUFFER_INDEX].template data<list_size_type>()
1697 + static_cast<size_type>(this->get_arrow_proxy().offset());
1698 }
1699
1700 template <bool BIG>
1701 constexpr auto
1702 list_view_array_impl<BIG>::insert_value(const_value_iterator pos, const list_value& value, size_type count)
1703 -> value_iterator
1704 {
1705 using mutable_offset_type = std::remove_const_t<offset_type>;
1706 using mutable_size_type = std::remove_const_t<list_size_type>;
1707 const size_type idx = static_cast<size_type>(std::distance(this->value_cbegin(), pos));
1708
1709 this->throw_if_sliced_for_mutation("list_view_array_impl::insert_value");
1710
1711 // Append new element(s) at the end of the flat array
1712 SPARROW_ASSERT_TRUE(std::in_range<mutable_offset_type>(value.size()));
1713 SPARROW_ASSERT_TRUE(std::in_range<mutable_size_type>(value.size()));
1714 const mutable_offset_type val_sz = static_cast<mutable_offset_type>(value.size());
1715 const size_type flat_append_pos = this->raw_flat_array()->size();
1716
1717 this->insert_flat_elements(flat_append_pos, value, count);
1718
1719 auto& proxy = this->get_arrow_proxy();
1720
1721 // Insert count new (offset, size) entries in the offset and sizes buffers at position idx
1722 const size_type n = this->size();
1723
1724 auto& offset_buffer = proxy.get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
1725 auto offset_adaptor = make_buffer_adaptor<mutable_offset_type>(offset_buffer);
1726 offset_adaptor.resize(n + count);
1727
1728 auto& sizes_buffer = proxy.get_array_private_data()->buffers()[SIZES_BUFFER_INDEX];
1729 auto sizes_adaptor = make_buffer_adaptor<mutable_size_type>(sizes_buffer);
1730 sizes_adaptor.resize(n + count);
1731
1732 // Shift existing entries [idx..n) → [idx+count..n+count): pure backward copy, no delta.
1733 // std::copy_backward lowers to memmove for trivially-copyable offset/size types.
1734 std::copy_backward(
1735 offset_adaptor.begin() + static_cast<std::ptrdiff_t>(idx),
1736 offset_adaptor.begin() + static_cast<std::ptrdiff_t>(n),
1737 offset_adaptor.begin() + static_cast<std::ptrdiff_t>(n + count)
1738 );
1739 std::copy_backward(
1740 sizes_adaptor.begin() + static_cast<std::ptrdiff_t>(idx),
1741 sizes_adaptor.begin() + static_cast<std::ptrdiff_t>(n),
1742 sizes_adaptor.begin() + static_cast<std::ptrdiff_t>(n + count)
1743 );
1744
1745 // Write new entries for the count inserted lists
1746 // Before writing, assert that the computed offsets are representable
1747 if (count > 0 && val_sz > 0)
1748 {
1749 const auto base_offset_ull = static_cast<unsigned long long>(flat_append_pos);
1750 const auto step_ull = static_cast<unsigned long long>(val_sz);
1751 const auto max_k_ull = static_cast<unsigned long long>(count - 1);
1752 const auto max_offset_ull = static_cast<unsigned long long>(
1753 std::numeric_limits<mutable_offset_type>::max()
1754 );
1755 const auto last_offset_ull = base_offset_ull + max_k_ull * step_ull;
1756 SPARROW_ASSERT_TRUE(base_offset_ull <= max_offset_ull);
1757 SPARROW_ASSERT_TRUE(last_offset_ull <= max_offset_ull);
1758 }
1759 for (size_type k = 0; k < count; ++k)
1760 {
1761 offset_adaptor[idx + k] = static_cast<mutable_offset_type>(flat_append_pos)
1762 + static_cast<mutable_offset_type>(k) * val_sz;
1763 sizes_adaptor[idx + k] = static_cast<mutable_size_type>(val_sz);
1764 }
1765
1766 proxy.update_buffers();
1767 p_list_offsets = make_list_offsets();
1768 p_list_sizes = make_list_sizes();
1769 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1770 }
1771
1772 template <bool BIG>
1773 template <std::input_iterator InputIt>
1774 requires std::convertible_to<typename std::iterator_traits<InputIt>::value_type, list_value>
1775 constexpr auto
1776 list_view_array_impl<BIG>::insert_values(const_value_iterator pos, InputIt first, InputIt last)
1778 {
1779 const size_type idx = static_cast<size_type>(std::distance(this->value_cbegin(), pos));
1780 auto& proxy = this->get_arrow_proxy();
1781 const size_type original_size = this->size();
1782 if constexpr (std::forward_iterator<InputIt>)
1783 {
1784 const auto count = static_cast<size_type>(std::distance(first, last));
1785 if (count == 0)
1786 {
1787 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1788 }
1789 size_type inserted_count = 0;
1790 // Temporarily extend logical length once for the whole bulk insertion.
1791 proxy.set_length(original_size + count);
1792 for (auto it = first; it != last; ++it)
1793 {
1794 auto cur_pos = sparrow::next(
1795 this->value_cbegin(),
1796 static_cast<std::ptrdiff_t>(idx + inserted_count)
1797 );
1798 insert_value(cur_pos, *it, 1);
1799 ++inserted_count;
1800 }
1801 proxy.set_length(original_size);
1802 }
1803 else
1804 {
1805 size_type inserted_count = 0;
1806 for (auto it = first; it != last; ++it)
1807 {
1808 auto cur_pos = sparrow::next(
1809 this->value_cbegin(),
1810 static_cast<std::ptrdiff_t>(idx + inserted_count)
1811 );
1812 insert_value(cur_pos, *it, 1);
1813 ++inserted_count;
1814 proxy.set_length(original_size + inserted_count);
1815 }
1816 proxy.set_length(original_size);
1817 }
1818 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1819 }
1820
1821 template <bool BIG>
1822 constexpr auto list_view_array_impl<BIG>::erase_values(const_value_iterator pos, size_type count)
1823 -> value_iterator
1824 {
1825 using mutable_offset_type = std::remove_const_t<offset_type>;
1826 using mutable_size_type = std::remove_const_t<list_size_type>;
1827 const size_type idx = static_cast<size_type>(std::distance(this->value_cbegin(), pos));
1828 const size_type n = this->size();
1829
1830 if (count == 0)
1831 {
1832 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1833 }
1834
1835 this->throw_if_sliced_for_mutation("list_view_array_impl::erase_values");
1836
1837 auto& proxy = this->get_arrow_proxy();
1838 auto& offset_buffer = proxy.get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
1839 auto offset_adaptor = make_buffer_adaptor<mutable_offset_type>(offset_buffer);
1840
1841 auto& sizes_buffer = proxy.get_array_private_data()->buffers()[SIZES_BUFFER_INDEX];
1842 auto sizes_adaptor = make_buffer_adaptor<mutable_size_type>(sizes_buffer);
1843
1844 // Shift entries [idx+count..n) to [idx..n-count)
1845 std::copy(
1846 offset_adaptor.begin() + static_cast<std::ptrdiff_t>(idx + count),
1847 offset_adaptor.end(),
1848 offset_adaptor.begin() + static_cast<std::ptrdiff_t>(idx)
1849 );
1850 std::copy(
1851 sizes_adaptor.begin() + static_cast<std::ptrdiff_t>(idx + count),
1852 sizes_adaptor.end(),
1853 sizes_adaptor.begin() + static_cast<std::ptrdiff_t>(idx)
1854 );
1855 offset_adaptor.resize(n - count);
1856 sizes_adaptor.resize(n - count);
1857 proxy.update_buffers();
1858 p_list_offsets = make_list_offsets();
1859 p_list_sizes = make_list_sizes();
1860 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1861 }
1862
1863 template <bool BIG>
1864 void list_view_array_impl<BIG>::replace_value(size_type index, const list_value& value)
1865 {
1866 using mutable_offset_type = std::remove_const_t<offset_type>;
1867 using mutable_size_type = std::remove_const_t<list_size_type>;
1868
1869 this->throw_if_sliced_for_mutation("list_view_array_impl::replace_value");
1870
1871 SPARROW_ASSERT_TRUE(std::in_range<mutable_offset_type>(value.size()));
1872 SPARROW_ASSERT_TRUE(std::in_range<mutable_size_type>(value.size()));
1873
1874 const auto value_size = static_cast<mutable_offset_type>(value.size());
1875 const size_type flat_append_pos = this->raw_flat_array()->size();
1876 SPARROW_ASSERT_TRUE(std::in_range<mutable_offset_type>(flat_append_pos));
1877
1878 this->insert_flat_elements(flat_append_pos, value, 1);
1879
1880 auto& proxy = this->get_arrow_proxy();
1881 auto& offset_buffer = proxy.get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
1882 auto offset_adaptor = make_buffer_adaptor<mutable_offset_type>(offset_buffer);
1883 auto& sizes_buffer = proxy.get_array_private_data()->buffers()[SIZES_BUFFER_INDEX];
1884 auto sizes_adaptor = make_buffer_adaptor<mutable_size_type>(sizes_buffer);
1885 offset_adaptor[index] = static_cast<mutable_offset_type>(flat_append_pos);
1886 sizes_adaptor[index] = static_cast<mutable_size_type>(value_size);
1887 proxy.update_buffers();
1888 p_list_offsets = make_list_offsets();
1889 p_list_sizes = make_list_sizes();
1890 }
1891
1892#ifdef __GNUC__
1893# pragma GCC diagnostic pop
1894#endif
1895
1896 /*****************************************
1897 * fixed_sized_list_array implementation *
1898 *****************************************/
1899
1900 inline auto fixed_sized_list_array::list_size_from_format(const std::string_view format) -> uint64_t
1901 {
1902 SPARROW_ASSERT(format.size() >= 3, "Invalid format string");
1903 const auto n_digits = format.size() - 3;
1904 const auto list_size_str = format.substr(3, n_digits);
1905 return std::stoull(std::string(list_size_str));
1906 }
1907
1909 : base_type(std::move(proxy))
1910 , m_list_size(fixed_sized_list_array::list_size_from_format(this->get_arrow_proxy().format()))
1911 {
1912 }
1913
1915 : base_type(rhs)
1916 , m_list_size(rhs.m_list_size)
1917 {
1919 }
1920
1922 {
1924 if (this != &rhs)
1925 {
1927 m_list_size = rhs.m_list_size;
1928 }
1929 return *this;
1930 }
1931
1932 constexpr auto fixed_sized_list_array::offset_range(size_type i) const
1933 -> std::pair<offset_type, offset_type>
1934 {
1935 const auto offset = (i + this->offset()) * m_list_size;
1936 return std::make_pair(offset, offset + m_list_size);
1937 }
1938
1939 constexpr auto fixed_sized_list_array::flat_element_count(size_type list_count) const -> size_type
1940 {
1941 const offset_type max_flat_count = static_cast<offset_type>(std::numeric_limits<size_type>::max());
1943 m_list_size == 0 || static_cast<offset_type>(list_count) <= max_flat_count / m_list_size
1944 );
1945 return static_cast<size_type>(static_cast<offset_type>(list_count) * m_list_size);
1946 }
1947
1948 inline auto
1949 fixed_sized_list_array::insert_value(const_value_iterator pos, const list_value& value, size_type count)
1950 -> value_iterator
1951 {
1952 SPARROW_ASSERT_TRUE(value.size() == m_list_size);
1953 const auto idx = static_cast<size_type>(std::distance(this->value_cbegin(), pos));
1954
1955 this->throw_if_sliced_for_mutation("fixed_sized_list_array::insert_value");
1956
1957 const size_type flat_insert_pos = flat_element_count(idx);
1958
1959 this->insert_flat_elements(flat_insert_pos, value, count);
1960
1961 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1962 }
1963
1964 inline auto fixed_sized_list_array::erase_values(const_value_iterator pos, size_type count) -> value_iterator
1965 {
1966 const auto idx = static_cast<size_type>(std::distance(this->value_cbegin(), pos));
1967 if (count == 0)
1968 {
1969 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1970 }
1971
1972 this->throw_if_sliced_for_mutation("fixed_sized_list_array::erase_values");
1973
1974 this->erase_flat_elements(flat_element_count(idx), flat_element_count(count));
1975 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1976 }
1977
1978 inline void fixed_sized_list_array::replace_value(size_type index, const list_value& value)
1979 {
1980 SPARROW_ASSERT_TRUE(value.size() == m_list_size);
1981
1982 this->throw_if_sliced_for_mutation("fixed_sized_list_array::replace_value");
1983
1984 if (m_list_size == 0)
1985 {
1986 return;
1987 }
1988
1989 array source_owner;
1990 const array* source = value.flat_array();
1991 if (source != nullptr && this->raw_flat_array() == source)
1992 {
1993 source_owner = *source;
1994 source = &source_owner;
1995 }
1996
1997 SPARROW_ASSERT_TRUE(source != nullptr);
1998 const size_type flat_index = flat_element_count(index);
1999 this->erase_flat_elements(flat_index, static_cast<size_type>(m_list_size));
2000 this->insert_flat_elements(flat_index, list_value{source, value.begin_index(), value.end_index()}, 1);
2001 }
2002
2003 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE>
2004 inline arrow_proxy fixed_sized_list_array::create_proxy(
2005 std::uint64_t list_size,
2006 array&& flat_values,
2007 R&& validity_input,
2008 std::optional<std::string_view> name,
2009 std::optional<METADATA_RANGE> metadata
2010 )
2011 {
2012 const auto size = flat_values.size() / static_cast<std::size_t>(list_size);
2013 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<R>(validity_input));
2014 const auto null_count = vbitmap.null_count();
2015
2016 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
2017
2018 std::string format = "+w:" + std::to_string(list_size);
2019 ArrowSchema schema = detail::make_list_arrow_schema(
2020 std::move(format),
2021 std::move(flat_schema),
2022 name,
2023 metadata,
2024 true // nullable
2025 );
2026
2027 std::vector<buffer<std::uint8_t>> arr_buffs;
2028 arr_buffs.reserve(1);
2029 arr_buffs.emplace_back(vbitmap.extract_storage());
2030
2031 ArrowArray arr = detail::make_list_arrow_array(
2032 static_cast<std::int64_t>(size),
2033 static_cast<std::int64_t>(null_count),
2034 std::move(arr_buffs),
2035 std::move(flat_arr)
2036 );
2037
2038 return arrow_proxy{std::move(arr), std::move(schema)};
2039 }
2040
2041 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE>
2042 inline arrow_proxy fixed_sized_list_array::create_proxy(
2043 std::uint64_t list_size,
2044 array&& flat_values,
2045 bool nullable,
2046 std::optional<std::string_view> name,
2047 std::optional<METADATA_RANGE> metadata
2048 )
2049 {
2050 if (nullable)
2051 {
2052 return fixed_sized_list_array::create_proxy(
2053 list_size,
2054 std::move(flat_values),
2056 name,
2057 metadata
2058 );
2059 }
2060
2061 const auto size = flat_values.size() / static_cast<std::size_t>(list_size);
2062 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
2063
2064 std::string format = "+w:" + std::to_string(list_size);
2065 ArrowSchema schema = detail::make_list_arrow_schema(
2066 std::move(format),
2067 std::move(flat_schema),
2068 name,
2069 metadata,
2070 false // not nullable
2071 );
2072
2073 std::vector<buffer<std::uint8_t>> arr_buffs;
2074 arr_buffs.reserve(1);
2075 arr_buffs.emplace_back(nullptr, 0, buffer<std::uint8_t>::default_allocator()); // no validity bitmap
2076
2077 ArrowArray arr = detail::make_list_arrow_array(
2078 static_cast<std::int64_t>(size),
2079 0, // null_count
2080 std::move(arr_buffs),
2081 std::move(flat_arr)
2082 );
2083
2084 return arrow_proxy{std::move(arr), std::move(schema)};
2085 }
2086}
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
constexpr array_bitmap_base_impl & operator=(const array_bitmap_base_impl &)
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:50
SPARROW_API iterator erase(const_iterator pos)
Inserts a copy of value before pos in the array, repeating the insertion count times.
auto children() const
SPARROW_API iterator insert(const_iterator pos, const_iterator first, const_iterator last)
Inserts elements from the range [first, last) before the element at the specified position.
SPARROW_API const_iterator cbegin() const
Returns a constant iterator to the beginning of the array.
Object that owns a piece of contiguous memory.
Definition buffer.hpp:131
xsimd::aligned_allocator< T > default_allocator
Definition buffer.hpp:144
constexpr size_type null_count() const noexcept
Returns the number of bits set to false (null/invalid).
typename storage_type::default_allocator default_allocator
typename base_type::value_iterator value_iterator
inner_types::list_size_type list_size_type
array_inner_types< self_type > inner_types
fixed_sized_list_array(arrow_proxy proxy)
Constructs fixed size list array from Arrow proxy.
fixed_sized_list_array(ARGS &&... args)
Generic constructor for creating fixed size list array.
list_array_crtp_base< self_type > base_type
fixed_sized_list_array self_type
fixed_sized_list_array & operator=(self_type &&)=default
typename base_type::const_value_iterator const_value_iterator
fixed_sized_list_array(self_type &&)=default
fixed_sized_list_array & operator=(const self_type &)
typename base_type::size_type size_type
CRTP base class for all list array implementations.
constexpr void throw_if_sliced_for_mutation(const char *operation) const
typename base_type::const_bitmap_range const_bitmap_range
constexpr list_array_crtp_base & operator=(const self_type &)
Copy assignment operator.
constexpr array * raw_flat_array()
Gets mutable access to the underlying flat array.
nullable< inner_const_reference, bitmap_const_reference > const_reference
typename inner_types::const_value_iterator const_value_iterator
typename base_type::bitmap_const_reference bitmap_const_reference
constexpr void insert_flat_elements(size_type flat_pos, const list_value &value, size_type count)
typename inner_types::inner_const_reference inner_const_reference
typename base_type::iterator_tag iterator_tag
mutable_array_bitmap_base< DERIVED > base_type
typename base_type::bitmap_reference bitmap_reference
list_array_crtp_base(arrow_proxy proxy)
Constructs list array base from Arrow proxy.
constexpr list_array_crtp_base(const self_type &)
Copy constructor.
constexpr const_value_iterator value_cbegin() const
constexpr void erase_flat_elements(size_type flat_begin, size_type flat_count)
typename inner_types::value_iterator value_iterator
constexpr const_value_iterator value_cend() const
typename base_type::bitmap_type bitmap_type
list_array_crtp_base< DERIVED > self_type
typename base_type::size_type size_type
array_inner_types< DERIVED > inner_types
nullable< inner_reference, bitmap_reference > reference
nullable< inner_value_type > value_type
typename inner_types::inner_reference inner_reference
constexpr const array * raw_flat_array() const
Gets read-only access to the underlying flat array.
constexpr list_array_crtp_base(self_type &&) noexcept=default
list_array_impl< BIG > self_type
constexpr void slice_inplace(size_type start, size_type end)
constexpr list_array_impl(const self_type &)
Copy constructor.
std::conditional_t< BIG, const std::int64_t, const std::int32_t > offset_type
typename base_type::size_type size_type
constexpr list_array_impl & operator=(const self_type &)
Copy assignment operator.
typename base_type::value_iterator value_iterator
array_inner_types< self_type > inner_types
constexpr list_array_impl(self_type &&) noexcept=default
static constexpr auto offset_from_sizes(SIZES_RANGE &&sizes) -> offset_buffer_type
Creates offset buffer from list sizes.
typename base_type::const_value_iterator const_value_iterator
inner_types::list_size_type list_size_type
list_array_crtp_base< list_array_impl< BIG > > base_type
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
list_array_impl(arrow_proxy proxy)
Constructs list array from Arrow proxy.
size_type size() const
Gets the number of elements in the list.
std::size_t size_type
constexpr list_view_array_impl & operator=(self_type &&)=default
typename base_type::size_type size_type
constexpr void slice_inplace(size_type start, size_type end)
constexpr list_view_array_impl(self_type &&)=default
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
list_view_array_impl(arrow_proxy proxy)
Constructs list view array from Arrow proxy.
std::conditional_t< BIG, const std::int64_t, const std::int32_t > offset_type
array_inner_types< self_type > inner_types
list_array_crtp_base< list_view_array_impl< BIG > > base_type
typename base_type::const_value_iterator const_value_iterator
typename base_type::value_iterator value_iterator
list_view_array_impl(ARGS &&... args)
Generic constructor for creating list view array from various inputs.
list_view_array_impl< BIG > self_type
constexpr list_view_array_impl(const self_type &)
Copy constructor.
inner_types::list_size_type list_size_type
u8_buffer< std::remove_const_t< list_size_type > > size_buffer_type
constexpr list_view_array_impl & operator=(const self_type &)
Copy assignment operator.
Base class definining common interface for arrays with a bitmap.
A view that repeats a value a given number of times.
This buffer class is used as storage buffer for all sparrow arrays.
Concept for input containers that can provide metadata pairs.
Definition metadata.hpp:332
Concept defining valid input types for validity bitmap creation.
#define SPARROW_ASSERT_TRUE(expr__)
#define SPARROW_ASSERT(expr__, message__)
SPARROW_API void increase(const std::string &key)
std::string key< fixed_sized_list_array >()
SPARROW_API int count(const std::string &key, int disabled_value=0)
std::string key()
Definition buffer.hpp:49
ArrowArray make_list_arrow_array(std::int64_t size, std::int64_t null_count, std::vector< buffer< std::uint8_t > > &&arr_buffs, ArrowArray &&flat_arr)
ArrowSchema make_list_arrow_schema(std::string format, ArrowSchema &&flat_schema, std::optional< std::string_view > name, std::optional< METADATA_RANGE > metadata, bool nullable)
constexpr sparrow::u8_buffer< OFFSET_TYPE > offset_buffer_from_sizes(SIZES_RANGE &&sizes)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
Definition mp_utils.hpp:216
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
array_bitmap_base_impl< D, true > mutable_array_bitmap_base
Convenient alias for arrays with mutable validity bitmaps.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr bool is_list_view_array_v
Checks whether T is a list_view_array type.
list_array_impl< false > list_array
A list array implementation.
constexpr InputIt next(InputIt it, Distance n)
Definition iterator.hpp:605
constexpr bool is_fixed_sized_list_array_v
Checks whether T is a fixed_sized_list_array type.
list_view_array_impl< true > big_list_view_array
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArray and ArrowSchema structures from the given array or typed layout.
Definition array.hpp:110
constexpr bool is_big_list_array_v
Checks whether T is a big_list_array type.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
list_view_array_impl< false > list_view_array
A list view array implementation.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
constexpr bool is_list_array_v
Checks whether T is a list_array type.
list_array_impl< true > big_list_array
A big list array implementation.
auto make_buffer_adaptor(FromBufferRef &buf)
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
constexpr bool is_big_list_view_array_v
Checks whether T is a big_list_view_array type.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
Extensions to the C++ standard library.
functor_index_iterator< detail::layout_value_functor< const array_type, inner_const_reference > > const_value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_reference > > value_iterator
std::conditional_t< BIG, std::uint64_t, std::uint32_t > list_size_type
functor_index_iterator< detail::layout_value_functor< const array_type, inner_const_reference > > const_value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_reference > > value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_reference > > value_iterator
functor_index_iterator< detail::layout_value_functor< const array_type, inner_const_reference > > const_value_iterator
std::conditional_t< BIG, std::uint64_t, std::uint32_t > list_size_type
Base class for array_inner_types specializations.
Traits class that must be specialized by array implementations.
Metafunction for retrieving the data_type of a typed array.