1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
|
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// A Cord is a sequence of characters with some unusual access propreties.
// A Cord supports efficient insertions and deletions at the start and end of
// the byte sequence, but random access reads are slower, and random access
// modifications are not supported by the API. Cord also provides cheap copies
// (using a copy-on-write strategy) and cheap substring operations.
//
// Thread safety
// -------------
// Cord has the same thread-safety properties as many other types like
// std::string, std::vector<>, int, etc -- it is thread-compatible. In
// particular, if no thread may call a non-const method, then it is safe to
// concurrently call const methods. Copying a Cord produces a new instance that
// can be used concurrently with the original in arbitrary ways.
//
// Implementation is similar to the "Ropes" described in:
// Ropes: An alternative to strings
// Hans J. Boehm, Russ Atkinson, Michael Plass
// Software Practice and Experience, December 1995
#ifndef ABSL_STRINGS_CORD_H_
#define ABSL_STRINGS_CORD_H_
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <iostream>
#include <iterator>
#include <string>
#include <type_traits>
#include "absl/base/internal/endian.h"
#include "absl/base/internal/invoke.h"
#include "absl/base/internal/per_thread_tls.h"
#include "absl/base/macros.h"
#include "absl/base/port.h"
#include "absl/functional/function_ref.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/resize_uninitialized.h"
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
class Cord;
class CordTestPeer;
template <typename Releaser>
Cord MakeCordFromExternal(absl::string_view, Releaser&&);
void CopyCordToString(const Cord& src, std::string* dst);
namespace hash_internal {
template <typename H>
H HashFragmentedCord(H, const Cord&);
}
namespace cord_internal {
// It's expensive to keep a tree perfectly balanced, so instead we keep trees
// approximately balanced. A tree node N of depth D(N) that contains a string
// of L(N) characters is considered balanced if L >= Fibonacci(D + 2).
// The "+ 2" is used to ensure that every balanced leaf node contains at least
// one character. Here we presume that
// Fibonacci(0) = 0
// Fibonacci(1) = 1
// Fibonacci(2) = 1
// Fibonacci(3) = 2
// ...
// The algorithm is based on paper by Hans Boehm et al:
// https://www.cs.rit.edu/usr/local/pub/jeh/courses/QUARTERS/FP/Labs/CedarRope/rope-paper.pdf
// In this paper authors shows that rebalancing based on cord forest of already
// balanced subtrees can be proven to never produce tree of depth larger than
// largest Fibonacci number representable in the same integral type as cord size
// For 64 bit integers this is the 93rd Fibonacci number. For 32 bit integrals
// this is 47th Fibonacci number.
constexpr size_t MaxCordDepth() { return sizeof(size_t) == 8 ? 93 : 47; }
// This class models fixed max size stack of CordRep pointers.
// The elements are being pushed back and popped from the back.
template <typename CordRepPtr, size_t N>
class CordTreePath {
public:
CordTreePath() {}
explicit CordTreePath(CordRepPtr root) { push_back(root); }
bool empty() const { return size_ == 0; }
size_t size() const { return size_; }
void clear() { size_ = 0; }
CordRepPtr back() { return data_[size_ - 1]; }
void pop_back() {
--size_;
assert(size_ < N);
}
void push_back(CordRepPtr elem) { data_[size_++] = elem; }
private:
CordRepPtr data_[N];
size_t size_ = 0;
};
using CordTreeMutablePath = CordTreePath<CordRep*, MaxCordDepth()>;
} // namespace cord_internal
// A Cord is a sequence of characters.
class Cord {
private:
template <typename T>
using EnableIfString =
absl::enable_if_t<std::is_same<T, std::string>::value, int>;
public:
// --------------------------------------------------------------------
// Constructors, destructors and helper factories
// Create an empty cord
constexpr Cord() noexcept;
// Cord is copyable and efficiently movable.
// The moved-from state is valid but unspecified.
Cord(const Cord& src);
Cord(Cord&& src) noexcept;
Cord& operator=(const Cord& x);
Cord& operator=(Cord&& x) noexcept;
// Create a cord out of "src". This constructor is explicit on
// purpose so that people do not get automatic type conversions.
explicit Cord(absl::string_view src);
Cord& operator=(absl::string_view src);
// These are templated to avoid ambiguities for types that are convertible to
// both `absl::string_view` and `std::string`, such as `const char*`.
//
// Note that these functions reserve the right to reuse the `string&&`'s
// memory and that they will do so in the future.
template <typename T, EnableIfString<T> = 0>
explicit Cord(T&& src) : Cord(absl::string_view(src)) {}
template <typename T, EnableIfString<T> = 0>
Cord& operator=(T&& src);
// Destroy the cord
~Cord() {
if (contents_.is_tree()) DestroyCordSlow();
}
// Creates a Cord that takes ownership of external memory. The contents of
// `data` are not copied.
//
// This function takes a callable that is invoked when all Cords are
// finished with `data`. The data must remain live and unchanging until the
// releaser is called. The requirements for the releaser are that it:
// * is move constructible,
// * supports `void operator()(absl::string_view) const` or
// `void operator()() const`,
// * does not have alignment requirement greater than what is guaranteed by
// ::operator new. This is dictated by alignof(std::max_align_t) before
// C++17 and __STDCPP_DEFAULT_NEW_ALIGNMENT__ if compiling with C++17 or
// it is supported by the implementation.
//
// Example:
//
// Cord MakeCord(BlockPool* pool) {
// Block* block = pool->NewBlock();
// FillBlock(block);
// return absl::MakeCordFromExternal(
// block->ToStringView(),
// [pool, block](absl::string_view v) {
// pool->FreeBlock(block, v);
// });
// }
//
// WARNING: It's likely a bug if your releaser doesn't do anything.
// For example, consider the following:
//
// void Foo(const char* buffer, int len) {
// auto c = absl::MakeCordFromExternal(absl::string_view(buffer, len),
// [](absl::string_view) {});
//
// // BUG: If Bar() copies its cord for any reason, including keeping a
// // substring of it, the lifetime of buffer might be extended beyond
// // when Foo() returns.
// Bar(c);
// }
template <typename Releaser>
friend Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser);
// --------------------------------------------------------------------
// Mutations
void Clear();
void Append(const Cord& src);
void Append(Cord&& src);
void Append(absl::string_view src);
template <typename T, EnableIfString<T> = 0>
void Append(T&& src);
void Prepend(const Cord& src);
void Prepend(absl::string_view src);
template <typename T, EnableIfString<T> = 0>
void Prepend(T&& src);
void RemovePrefix(size_t n);
void RemoveSuffix(size_t n);
// Returns a new cord representing the subrange [pos, pos + new_size) of
// *this. If pos >= size(), the result is empty(). If
// (pos + new_size) >= size(), the result is the subrange [pos, size()).
Cord Subcord(size_t pos, size_t new_size) const;
friend void swap(Cord& x, Cord& y) noexcept;
// --------------------------------------------------------------------
// Accessors
size_t size() const;
bool empty() const;
// Returns the approximate number of bytes pinned by this Cord. Note that
// Cords that share memory could each be "charged" independently for the same
// shared memory.
size_t EstimatedMemoryUsage() const;
// --------------------------------------------------------------------
// Comparators
// Compares 'this' Cord with rhs. This function and its relatives
// treat Cords as sequences of unsigned bytes. The comparison is a
// straightforward lexicographic comparison. Return value:
// -1 'this' Cord is smaller
// 0 two Cords are equal
// 1 'this' Cord is larger
int Compare(absl::string_view rhs) const;
int Compare(const Cord& rhs) const;
// Does 'this' cord start/end with rhs
bool StartsWith(const Cord& rhs) const;
bool StartsWith(absl::string_view rhs) const;
bool EndsWith(absl::string_view rhs) const;
bool EndsWith(const Cord& rhs) const;
// --------------------------------------------------------------------
// Conversion to other types
explicit operator std::string() const;
// Copies the contents from `src` to `*dst`.
//
// This function optimizes the case of reusing the destination string since it
// can reuse previously allocated capacity. However, this function does not
// guarantee that pointers previously returned by `dst->data()` remain valid
// even if `*dst` had enough capacity to hold `src`. If `*dst` is a new
// object, prefer to simply use the conversion operator to `std::string`.
friend void CopyCordToString(const Cord& src, std::string* dst);
// --------------------------------------------------------------------
// Iteration
class CharIterator;
// Type for iterating over the chunks of a `Cord`. See comments for
// `Cord::chunk_begin()`, `Cord::chunk_end()` and `Cord::Chunks()` below for
// preferred usage.
//
// Additional notes:
// * The `string_view` returned by dereferencing a valid, non-`end()`
// iterator is guaranteed to be non-empty.
// * A `ChunkIterator` object is invalidated after any non-const
// operation on the `Cord` object over which it iterates.
// * Two `ChunkIterator` objects can be equality compared if and only if
// they remain valid and iterate over the same `Cord`.
// * This is a proxy iterator. This means the `string_view` returned by the
// iterator does not live inside the Cord, and its lifetime is limited to
// the lifetime of the iterator itself. To help prevent issues,
// `ChunkIterator::reference` is not a true reference type and is
// equivalent to `value_type`.
// * The iterator keeps state that can grow for `Cord`s that contain many
// nodes and are imbalanced due to sharing. Prefer to pass this type by
// const reference instead of by value.
class ChunkIterator {
public:
using iterator_category = std::input_iterator_tag;
using value_type = absl::string_view;
using difference_type = ptrdiff_t;
using pointer = const value_type*;
using reference = value_type;
ChunkIterator() = default;
ChunkIterator& operator++();
ChunkIterator operator++(int);
bool operator==(const ChunkIterator& other) const;
bool operator!=(const ChunkIterator& other) const;
reference operator*() const;
pointer operator->() const;
friend class Cord;
friend class CharIterator;
private:
// Constructs a `begin()` iterator from `cord`.
explicit ChunkIterator(const Cord* cord);
// Removes `n` bytes from `current_chunk_`. Expects `n` to be smaller than
// `current_chunk_.size()`.
void RemoveChunkPrefix(size_t n);
Cord AdvanceAndReadBytes(size_t n);
void AdvanceBytes(size_t n);
// Iterates `n` bytes, where `n` is expected to be greater than or equal to
// `current_chunk_.size()`.
void AdvanceBytesSlowPath(size_t n);
// A view into bytes of the current `CordRep`. It may only be a view to a
// suffix of bytes if this is being used by `CharIterator`.
absl::string_view current_chunk_;
// The current leaf, or `nullptr` if the iterator points to short data.
// If the current chunk is a substring node, current_leaf_ points to the
// underlying flat or external node.
absl::cord_internal::CordRep* current_leaf_ = nullptr;
// The number of bytes left in the `Cord` over which we are iterating.
size_t bytes_remaining_ = 0;
absl::cord_internal::CordTreeMutablePath stack_of_right_children_;
};
// Returns an iterator to the first chunk of the `Cord`.
//
// This is useful for getting a `ChunkIterator` outside the context of a
// range-based for-loop (in which case see `Cord::Chunks()` below).
//
// Example:
//
// absl::Cord::ChunkIterator FindAsChunk(const absl::Cord& c,
// absl::string_view s) {
// return std::find(c.chunk_begin(), c.chunk_end(), s);
// }
ChunkIterator chunk_begin() const;
// Returns an iterator one increment past the last chunk of the `Cord`.
ChunkIterator chunk_end() const;
// Convenience wrapper over `Cord::chunk_begin()` and `Cord::chunk_end()` to
// enable range-based for-loop iteration over `Cord` chunks.
//
// Prefer to use `Cord::Chunks()` below instead of constructing this directly.
class ChunkRange {
public:
explicit ChunkRange(const Cord* cord) : cord_(cord) {}
ChunkIterator begin() const;
ChunkIterator end() const;
private:
const Cord* cord_;
};
// Returns a range for iterating over the chunks of a `Cord` with a
// range-based for-loop.
//
// Example:
//
// void ProcessChunks(const Cord& cord) {
// for (absl::string_view chunk : cord.Chunks()) { ... }
// }
//
// Note that the ordinary caveats of temporary lifetime extension apply:
//
// void Process() {
// for (absl::string_view chunk : CordFactory().Chunks()) {
// // The temporary Cord returned by CordFactory has been destroyed!
// }
// }
ChunkRange Chunks() const;
// Type for iterating over the characters of a `Cord`. See comments for
// `Cord::char_begin()`, `Cord::char_end()` and `Cord::Chars()` below for
// preferred usage.
//
// Additional notes:
// * A `CharIterator` object is invalidated after any non-const
// operation on the `Cord` object over which it iterates.
// * Two `CharIterator` objects can be equality compared if and only if
// they remain valid and iterate over the same `Cord`.
// * The iterator keeps state that can grow for `Cord`s that contain many
// nodes and are imbalanced due to sharing. Prefer to pass this type by
// const reference instead of by value.
// * This type cannot be a forward iterator because a `Cord` can reuse
// sections of memory. This violates the requirement that if dereferencing
// two iterators returns the same object, the iterators must compare
// equal.
class CharIterator {
public:
using iterator_category = std::input_iterator_tag;
using value_type = char;
using difference_type = ptrdiff_t;
using pointer = const char*;
using reference = const char&;
CharIterator() = default;
CharIterator& operator++();
CharIterator operator++(int);
bool operator==(const CharIterator& other) const;
bool operator!=(const CharIterator& other) const;
reference operator*() const;
pointer operator->() const;
friend Cord;
private:
explicit CharIterator(const Cord* cord) : chunk_iterator_(cord) {}
ChunkIterator chunk_iterator_;
};
// Advances `*it` by `n_bytes` and returns the bytes passed as a `Cord`.
//
// `n_bytes` must be less than or equal to the number of bytes remaining for
// iteration. Otherwise the behavior is undefined. It is valid to pass
// `char_end()` and 0.
static Cord AdvanceAndRead(CharIterator* it, size_t n_bytes);
// Advances `*it` by `n_bytes`.
//
// `n_bytes` must be less than or equal to the number of bytes remaining for
// iteration. Otherwise the behavior is undefined. It is valid to pass
// `char_end()` and 0.
static void Advance(CharIterator* it, size_t n_bytes);
// Returns the longest contiguous view starting at the iterator's position.
//
// `it` must be dereferenceable.
static absl::string_view ChunkRemaining(const CharIterator& it);
// Returns an iterator to the first character of the `Cord`.
CharIterator char_begin() const;
// Returns an iterator to one past the last character of the `Cord`.
CharIterator char_end() const;
// Convenience wrapper over `Cord::char_begin()` and `Cord::char_end()` to
// enable range-based for-loop iterator over the characters of a `Cord`.
//
// Prefer to use `Cord::Chars()` below instead of constructing this directly.
class CharRange {
public:
explicit CharRange(const Cord* cord) : cord_(cord) {}
CharIterator begin() const;
CharIterator end() const;
private:
const Cord* cord_;
};
// Returns a range for iterating over the characters of a `Cord` with a
// range-based for-loop.
//
// Example:
//
// void ProcessCord(const Cord& cord) {
// for (char c : cord.Chars()) { ... }
// }
//
// Note that the ordinary caveats of temporary lifetime extension apply:
//
// void Process() {
// for (char c : CordFactory().Chars()) {
// // The temporary Cord returned by CordFactory has been destroyed!
// }
// }
CharRange Chars() const;
// --------------------------------------------------------------------
// Miscellaneous
// Get the "i"th character of 'this' and return it.
// NOTE: This routine is reasonably efficient. It is roughly
// logarithmic in the number of nodes that make up the cord. Still,
// if you need to iterate over the contents of a cord, you should
// use a CharIterator/CordIterator rather than call operator[] or Get()
// repeatedly in a loop.
//
// REQUIRES: 0 <= i < size()
char operator[](size_t i) const;
// If this cord's representation is a single flat array, return a
// string_view referencing that array. Otherwise return nullopt.
absl::optional<absl::string_view> TryFlat() const;
// Flattens the cord into a single array and returns a view of the data.
//
// If the cord was already flat, the contents are not modified.
absl::string_view Flatten();
private:
friend class CordTestPeer;
template <typename H>
friend H absl::hash_internal::HashFragmentedCord(H, const Cord&);
friend bool operator==(const Cord& lhs, const Cord& rhs);
friend bool operator==(const Cord& lhs, absl::string_view rhs);
// Call the provided function once for each cord chunk, in order. Unlike
// Chunks(), this API will not allocate memory.
void ForEachChunk(absl::FunctionRef<void(absl::string_view)>) const;
// Allocates new contiguous storage for the contents of the cord. This is
// called by Flatten() when the cord was not already flat.
absl::string_view FlattenSlowPath();
// Actual cord contents are hidden inside the following simple
// class so that we can isolate the bulk of cord.cc from changes
// to the representation.
//
// InlineRep holds either either a tree pointer, or an array of kMaxInline
// bytes.
class InlineRep {
public:
static const unsigned char kMaxInline = 15;
static_assert(kMaxInline >= sizeof(absl::cord_internal::CordRep*), "");
// Tag byte & kMaxInline means we are storing a pointer.
static const unsigned char kTreeFlag = 1 << 4;
// Tag byte & kProfiledFlag means we are profiling the Cord.
static const unsigned char kProfiledFlag = 1 << 5;
constexpr InlineRep() : data_{} {}
InlineRep(const InlineRep& src);
InlineRep(InlineRep&& src);
InlineRep& operator=(const InlineRep& src);
InlineRep& operator=(InlineRep&& src) noexcept;
void Swap(InlineRep* rhs);
bool empty() const;
size_t size() const;
const char* data() const; // Returns nullptr if holding pointer
void set_data(const char* data, size_t n,
bool nullify_tail); // Discards pointer, if any
char* set_data(size_t n); // Write data to the result
// Returns nullptr if holding bytes
absl::cord_internal::CordRep* tree() const;
// Discards old pointer, if any
void set_tree(absl::cord_internal::CordRep* rep);
// Replaces a tree with a new root. This is faster than set_tree, but it
// should only be used when it's clear that the old rep was a tree.
void replace_tree(absl::cord_internal::CordRep* rep);
// Returns non-null iff was holding a pointer
absl::cord_internal::CordRep* clear();
// Convert to pointer if necessary
absl::cord_internal::CordRep* force_tree(size_t extra_hint);
void reduce_size(size_t n); // REQUIRES: holding data
void remove_prefix(size_t n); // REQUIRES: holding data
void AppendArray(const char* src_data, size_t src_size);
absl::string_view FindFlatStartPiece() const;
void AppendTree(absl::cord_internal::CordRep* tree);
void PrependTree(absl::cord_internal::CordRep* tree);
void GetAppendRegion(char** region, size_t* size, size_t max_length);
void GetAppendRegion(char** region, size_t* size);
bool IsSame(const InlineRep& other) const {
return memcmp(data_, other.data_, sizeof(data_)) == 0;
}
int BitwiseCompare(const InlineRep& other) const {
uint64_t x, y;
// Use memcpy to avoid anti-aliasing issues.
memcpy(&x, data_, sizeof(x));
memcpy(&y, other.data_, sizeof(y));
if (x == y) {
memcpy(&x, data_ + 8, sizeof(x));
memcpy(&y, other.data_ + 8, sizeof(y));
if (x == y) return 0;
}
return absl::big_endian::FromHost64(x) < absl::big_endian::FromHost64(y)
? -1
: 1;
}
void CopyTo(std::string* dst) const {
// memcpy is much faster when operating on a known size. On most supported
// platforms, the small string optimization is large enough that resizing
// to 15 bytes does not cause a memory allocation.
absl::strings_internal::STLStringResizeUninitialized(dst,
sizeof(data_) - 1);
memcpy(&(*dst)[0], data_, sizeof(data_) - 1);
// erase is faster than resize because the logic for memory allocation is
// not needed.
dst->erase(data_[kMaxInline]);
}
// Copies the inline contents into `dst`. Assumes the cord is not empty.
void CopyToArray(char* dst) const;
bool is_tree() const { return data_[kMaxInline] > kMaxInline; }
private:
friend class Cord;
void AssignSlow(const InlineRep& src);
// Unrefs the tree, stops profiling, and zeroes the contents
void ClearSlow();
// If the data has length <= kMaxInline, we store it in data_[0..len-1],
// and store the length in data_[kMaxInline]. Else we store it in a tree
// and store a pointer to that tree in data_[0..sizeof(CordRep*)-1].
alignas(absl::cord_internal::CordRep*) char data_[kMaxInline + 1];
};
InlineRep contents_;
// Helper for MemoryUsage()
static size_t MemoryUsageAux(const absl::cord_internal::CordRep* rep);
// Helper for GetFlat() and TryFlat()
static bool GetFlatAux(absl::cord_internal::CordRep* rep,
absl::string_view* fragment);
// Helper for ForEachChunk()
static void ForEachChunkAux(
absl::cord_internal::CordRep* rep,
absl::FunctionRef<void(absl::string_view)> callback);
// The destructor for non-empty Cords.
void DestroyCordSlow();
// Out-of-line implementation of slower parts of logic.
void CopyToArraySlowPath(char* dst) const;
int CompareSlowPath(absl::string_view rhs, size_t compared_size,
size_t size_to_compare) const;
int CompareSlowPath(const Cord& rhs, size_t compared_size,
size_t size_to_compare) const;
bool EqualsImpl(absl::string_view rhs, size_t size_to_compare) const;
bool EqualsImpl(const Cord& rhs, size_t size_to_compare) const;
int CompareImpl(const Cord& rhs) const;
template <typename ResultType, typename RHS>
friend ResultType GenericCompare(const Cord& lhs, const RHS& rhs,
size_t size_to_compare);
static absl::string_view GetFirstChunk(const Cord& c);
static absl::string_view GetFirstChunk(absl::string_view sv);
// Returns a new reference to contents_.tree(), or steals an existing
// reference if called on an rvalue.
absl::cord_internal::CordRep* TakeRep() const&;
absl::cord_internal::CordRep* TakeRep() &&;
// Helper for Append()
template <typename C>
void AppendImpl(C&& src);
};
ABSL_NAMESPACE_END
} // namespace absl
namespace absl {
ABSL_NAMESPACE_BEGIN
// allow a Cord to be logged
extern std::ostream& operator<<(std::ostream& out, const Cord& cord);
// ------------------------------------------------------------------
// Internal details follow. Clients should ignore.
namespace cord_internal {
// Fast implementation of memmove for up to 15 bytes. This implementation is
// safe for overlapping regions. If nullify_tail is true, the destination is
// padded with '\0' up to 16 bytes.
inline void SmallMemmove(char* dst, const char* src, size_t n,
bool nullify_tail = false) {
if (n >= 8) {
assert(n <= 16);
uint64_t buf1;
uint64_t buf2;
memcpy(&buf1, src, 8);
memcpy(&buf2, src + n - 8, 8);
if (nullify_tail) {
memset(dst + 8, 0, 8);
}
memcpy(dst, &buf1, 8);
memcpy(dst + n - 8, &buf2, 8);
} else if (n >= 4) {
uint32_t buf1;
uint32_t buf2;
memcpy(&buf1, src, 4);
memcpy(&buf2, src + n - 4, 4);
if (nullify_tail) {
memset(dst + 4, 0, 4);
memset(dst + 8, 0, 8);
}
memcpy(dst, &buf1, 4);
memcpy(dst + n - 4, &buf2, 4);
} else {
if (n != 0) {
dst[0] = src[0];
dst[n / 2] = src[n / 2];
dst[n - 1] = src[n - 1];
}
if (nullify_tail) {
memset(dst + 8, 0, 8);
memset(dst + n, 0, 8);
}
}
}
struct ExternalRepReleaserPair {
CordRep* rep;
void* releaser_address;
};
// Allocates a new external `CordRep` and returns a pointer to it and a pointer
// to `releaser_size` bytes where the desired releaser can be constructed.
// Expects `data` to be non-empty.
ExternalRepReleaserPair NewExternalWithUninitializedReleaser(
absl::string_view data, ExternalReleaserInvoker invoker,
size_t releaser_size);
struct Rank1 {};
struct Rank0 : Rank1 {};
template <typename Releaser, typename = ::absl::base_internal::InvokeT<
Releaser, absl::string_view>>
void InvokeReleaser(Rank0, Releaser&& releaser, absl::string_view data) {
::absl::base_internal::Invoke(std::forward<Releaser>(releaser), data);
}
template <typename Releaser,
typename = ::absl::base_internal::InvokeT<Releaser>>
void InvokeReleaser(Rank1, Releaser&& releaser, absl::string_view) {
::absl::base_internal::Invoke(std::forward<Releaser>(releaser));
}
// Creates a new `CordRep` that owns `data` and `releaser` and returns a pointer
// to it, or `nullptr` if `data` was empty.
template <typename Releaser>
// NOLINTNEXTLINE - suppress clang-tidy raw pointer return.
CordRep* NewExternalRep(absl::string_view data, Releaser&& releaser) {
static_assert(
#if defined(__STDCPP_DEFAULT_NEW_ALIGNMENT__)
alignof(Releaser) <= __STDCPP_DEFAULT_NEW_ALIGNMENT__,
#else
alignof(Releaser) <= alignof(max_align_t),
#endif
"Releasers with alignment requirement greater than what is returned by "
"default `::operator new()` are not supported.");
using ReleaserType = absl::decay_t<Releaser>;
if (data.empty()) {
// Never create empty external nodes.
InvokeReleaser(Rank0{}, ReleaserType(std::forward<Releaser>(releaser)),
data);
return nullptr;
}
auto releaser_invoker = [](void* type_erased_releaser, absl::string_view d) {
auto* my_releaser = static_cast<ReleaserType*>(type_erased_releaser);
InvokeReleaser(Rank0{}, std::move(*my_releaser), d);
my_releaser->~ReleaserType();
return sizeof(Releaser);
};
ExternalRepReleaserPair external = NewExternalWithUninitializedReleaser(
data, releaser_invoker, sizeof(releaser));
::new (external.releaser_address)
ReleaserType(std::forward<Releaser>(releaser));
return external.rep;
}
// Overload for function reference types that dispatches using a function
// pointer because there are no `alignof()` or `sizeof()` a function reference.
// NOLINTNEXTLINE - suppress clang-tidy raw pointer return.
inline CordRep* NewExternalRep(absl::string_view data,
void (&releaser)(absl::string_view)) {
return NewExternalRep(data, &releaser);
}
} // namespace cord_internal
template <typename Releaser>
Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser) {
Cord cord;
cord.contents_.set_tree(::absl::cord_internal::NewExternalRep(
data, std::forward<Releaser>(releaser)));
return cord;
}
inline Cord::InlineRep::InlineRep(const Cord::InlineRep& src) {
cord_internal::SmallMemmove(data_, src.data_, sizeof(data_));
}
inline Cord::InlineRep::InlineRep(Cord::InlineRep&& src) {
memcpy(data_, src.data_, sizeof(data_));
memset(src.data_, 0, sizeof(data_));
}
inline Cord::InlineRep& Cord::InlineRep::operator=(const Cord::InlineRep& src) {
if (this == &src) {
return *this;
}
if (!is_tree() && !src.is_tree()) {
cord_internal::SmallMemmove(data_, src.data_, sizeof(data_));
return *this;
}
AssignSlow(src);
return *this;
}
inline Cord::InlineRep& Cord::InlineRep::operator=(
Cord::InlineRep&& src) noexcept {
if (is_tree()) {
ClearSlow();
}
memcpy(data_, src.data_, sizeof(data_));
memset(src.data_, 0, sizeof(data_));
return *this;
}
inline void Cord::InlineRep::Swap(Cord::InlineRep* rhs) {
if (rhs == this) {
return;
}
Cord::InlineRep tmp;
cord_internal::SmallMemmove(tmp.data_, data_, sizeof(data_));
cord_internal::SmallMemmove(data_, rhs->data_, sizeof(data_));
cord_internal::SmallMemmove(rhs->data_, tmp.data_, sizeof(data_));
}
inline const char* Cord::InlineRep::data() const {
return is_tree() ? nullptr : data_;
}
inline absl::cord_internal::CordRep* Cord::InlineRep::tree() const {
if (is_tree()) {
absl::cord_internal::CordRep* rep;
memcpy(&rep, data_, sizeof(rep));
return rep;
} else {
return nullptr;
}
}
inline bool Cord::InlineRep::empty() const { return data_[kMaxInline] == 0; }
inline size_t Cord::InlineRep::size() const {
const char tag = data_[kMaxInline];
if (tag <= kMaxInline) return tag;
return static_cast<size_t>(tree()->length);
}
inline void Cord::InlineRep::set_tree(absl::cord_internal::CordRep* rep) {
if (rep == nullptr) {
memset(data_, 0, sizeof(data_));
} else {
bool was_tree = is_tree();
memcpy(data_, &rep, sizeof(rep));
memset(data_ + sizeof(rep), 0, sizeof(data_) - sizeof(rep) - 1);
if (!was_tree) {
data_[kMaxInline] = kTreeFlag;
}
}
}
inline void Cord::InlineRep::replace_tree(absl::cord_internal::CordRep* rep) {
ABSL_ASSERT(is_tree());
if (ABSL_PREDICT_FALSE(rep == nullptr)) {
set_tree(rep);
return;
}
memcpy(data_, &rep, sizeof(rep));
memset(data_ + sizeof(rep), 0, sizeof(data_) - sizeof(rep) - 1);
}
inline absl::cord_internal::CordRep* Cord::InlineRep::clear() {
const char tag = data_[kMaxInline];
absl::cord_internal::CordRep* result = nullptr;
if (tag > kMaxInline) {
memcpy(&result, data_, sizeof(result));
}
memset(data_, 0, sizeof(data_)); // Clear the cord
return result;
}
inline void Cord::InlineRep::CopyToArray(char* dst) const {
assert(!is_tree());
size_t n = data_[kMaxInline];
assert(n != 0);
cord_internal::SmallMemmove(dst, data_, n);
}
constexpr inline Cord::Cord() noexcept {}
inline Cord& Cord::operator=(const Cord& x) {
contents_ = x.contents_;
return *this;
}
inline Cord::Cord(Cord&& src) noexcept : contents_(std::move(src.contents_)) {}
inline Cord& Cord::operator=(Cord&& x) noexcept {
contents_ = std::move(x.contents_);
return *this;
}
template <typename T, Cord::EnableIfString<T>>
inline Cord& Cord::operator=(T&& src) {
*this = absl::string_view(src);
return *this;
}
inline size_t Cord::size() const {
// Length is 1st field in str.rep_
return contents_.size();
}
inline bool Cord::empty() const { return contents_.empty(); }
inline size_t Cord::EstimatedMemoryUsage() const {
size_t result = sizeof(Cord);
if (const absl::cord_internal::CordRep* rep = contents_.tree()) {
result += MemoryUsageAux(rep);
}
return result;
}
inline absl::optional<absl::string_view> Cord::TryFlat() const {
absl::cord_internal::CordRep* rep = contents_.tree();
if (rep == nullptr) {
return absl::string_view(contents_.data(), contents_.size());
}
absl::string_view fragment;
if (GetFlatAux(rep, &fragment)) {
return fragment;
}
return absl::nullopt;
}
inline absl::string_view Cord::Flatten() {
absl::cord_internal::CordRep* rep = contents_.tree();
if (rep == nullptr) {
return absl::string_view(contents_.data(), contents_.size());
} else {
absl::string_view already_flat_contents;
if (GetFlatAux(rep, &already_flat_contents)) {
return already_flat_contents;
}
}
return FlattenSlowPath();
}
inline void Cord::Append(absl::string_view src) {
contents_.AppendArray(src.data(), src.size());
}
template <typename T, Cord::EnableIfString<T>>
inline void Cord::Append(T&& src) {
// Note that this function reserves the right to reuse the `string&&`'s
// memory and that it will do so in the future.
Append(absl::string_view(src));
}
template <typename T, Cord::EnableIfString<T>>
inline void Cord::Prepend(T&& src) {
// Note that this function reserves the right to reuse the `string&&`'s
// memory and that it will do so in the future.
Prepend(absl::string_view(src));
}
inline int Cord::Compare(const Cord& rhs) const {
if (!contents_.is_tree() && !rhs.contents_.is_tree()) {
return contents_.BitwiseCompare(rhs.contents_);
}
return CompareImpl(rhs);
}
// Does 'this' cord start/end with rhs
inline bool Cord::StartsWith(const Cord& rhs) const {
if (contents_.IsSame(rhs.contents_)) return true;
size_t rhs_size = rhs.size();
if (size() < rhs_size) return false;
return EqualsImpl(rhs, rhs_size);
}
inline bool Cord::StartsWith(absl::string_view rhs) const {
size_t rhs_size = rhs.size();
if (size() < rhs_size) return false;
return EqualsImpl(rhs, rhs_size);
}
inline Cord::ChunkIterator::ChunkIterator(const Cord* cord)
: bytes_remaining_(cord->size()) {
if (cord->empty()) return;
if (cord->contents_.is_tree()) {
stack_of_right_children_.push_back(cord->contents_.tree());
operator++();
} else {
current_chunk_ = absl::string_view(cord->contents_.data(), cord->size());
}
}
inline Cord::ChunkIterator Cord::ChunkIterator::operator++(int) {
ChunkIterator tmp(*this);
operator++();
return tmp;
}
inline bool Cord::ChunkIterator::operator==(const ChunkIterator& other) const {
return bytes_remaining_ == other.bytes_remaining_;
}
inline bool Cord::ChunkIterator::operator!=(const ChunkIterator& other) const {
return !(*this == other);
}
inline Cord::ChunkIterator::reference Cord::ChunkIterator::operator*() const {
assert(bytes_remaining_ != 0);
return current_chunk_;
}
inline Cord::ChunkIterator::pointer Cord::ChunkIterator::operator->() const {
assert(bytes_remaining_ != 0);
return ¤t_chunk_;
}
inline void Cord::ChunkIterator::RemoveChunkPrefix(size_t n) {
assert(n < current_chunk_.size());
current_chunk_.remove_prefix(n);
bytes_remaining_ -= n;
}
inline void Cord::ChunkIterator::AdvanceBytes(size_t n) {
if (ABSL_PREDICT_TRUE(n < current_chunk_.size())) {
RemoveChunkPrefix(n);
} else if (n != 0) {
AdvanceBytesSlowPath(n);
}
}
inline Cord::ChunkIterator Cord::chunk_begin() const {
return ChunkIterator(this);
}
inline Cord::ChunkIterator Cord::chunk_end() const { return ChunkIterator(); }
inline Cord::ChunkIterator Cord::ChunkRange::begin() const {
return cord_->chunk_begin();
}
inline Cord::ChunkIterator Cord::ChunkRange::end() const {
return cord_->chunk_end();
}
inline Cord::ChunkRange Cord::Chunks() const { return ChunkRange(this); }
inline Cord::CharIterator& Cord::CharIterator::operator++() {
if (ABSL_PREDICT_TRUE(chunk_iterator_->size() > 1)) {
chunk_iterator_.RemoveChunkPrefix(1);
} else {
++chunk_iterator_;
}
return *this;
}
inline Cord::CharIterator Cord::CharIterator::operator++(int) {
CharIterator tmp(*this);
operator++();
return tmp;
}
inline bool Cord::CharIterator::operator==(const CharIterator& other) const {
return chunk_iterator_ == other.chunk_iterator_;
}
inline bool Cord::CharIterator::operator!=(const CharIterator& other) const {
return !(*this == other);
}
inline Cord::CharIterator::reference Cord::CharIterator::operator*() const {
return *chunk_iterator_->data();
}
inline Cord::CharIterator::pointer Cord::CharIterator::operator->() const {
return chunk_iterator_->data();
}
inline Cord Cord::AdvanceAndRead(CharIterator* it, size_t n_bytes) {
assert(it != nullptr);
return it->chunk_iterator_.AdvanceAndReadBytes(n_bytes);
}
inline void Cord::Advance(CharIterator* it, size_t n_bytes) {
assert(it != nullptr);
it->chunk_iterator_.AdvanceBytes(n_bytes);
}
inline absl::string_view Cord::ChunkRemaining(const CharIterator& it) {
return *it.chunk_iterator_;
}
inline Cord::CharIterator Cord::char_begin() const {
return CharIterator(this);
}
inline Cord::CharIterator Cord::char_end() const { return CharIterator(); }
inline Cord::CharIterator Cord::CharRange::begin() const {
return cord_->char_begin();
}
inline Cord::CharIterator Cord::CharRange::end() const {
return cord_->char_end();
}
inline Cord::CharRange Cord::Chars() const { return CharRange(this); }
inline void Cord::ForEachChunk(
absl::FunctionRef<void(absl::string_view)> callback) const {
absl::cord_internal::CordRep* rep = contents_.tree();
if (rep == nullptr) {
callback(absl::string_view(contents_.data(), contents_.size()));
} else {
return ForEachChunkAux(rep, callback);
}
}
// Nonmember Cord-to-Cord relational operarators.
inline bool operator==(const Cord& lhs, const Cord& rhs) {
if (lhs.contents_.IsSame(rhs.contents_)) return true;
size_t rhs_size = rhs.size();
if (lhs.size() != rhs_size) return false;
return lhs.EqualsImpl(rhs, rhs_size);
}
inline bool operator!=(const Cord& x, const Cord& y) { return !(x == y); }
inline bool operator<(const Cord& x, const Cord& y) {
return x.Compare(y) < 0;
}
inline bool operator>(const Cord& x, const Cord& y) {
return x.Compare(y) > 0;
}
inline bool operator<=(const Cord& x, const Cord& y) {
return x.Compare(y) <= 0;
}
inline bool operator>=(const Cord& x, const Cord& y) {
return x.Compare(y) >= 0;
}
// Nonmember Cord-to-absl::string_view relational operators.
//
// Due to implicit conversions, these also enable comparisons of Cord with
// with std::string, ::string, and const char*.
inline bool operator==(const Cord& lhs, absl::string_view rhs) {
size_t lhs_size = lhs.size();
size_t rhs_size = rhs.size();
if (lhs_size != rhs_size) return false;
return lhs.EqualsImpl(rhs, rhs_size);
}
inline bool operator==(absl::string_view x, const Cord& y) { return y == x; }
inline bool operator!=(const Cord& x, absl::string_view y) { return !(x == y); }
inline bool operator!=(absl::string_view x, const Cord& y) { return !(x == y); }
inline bool operator<(const Cord& x, absl::string_view y) {
return x.Compare(y) < 0;
}
inline bool operator<(absl::string_view x, const Cord& y) {
return y.Compare(x) > 0;
}
inline bool operator>(const Cord& x, absl::string_view y) { return y < x; }
inline bool operator>(absl::string_view x, const Cord& y) { return y < x; }
inline bool operator<=(const Cord& x, absl::string_view y) { return !(y < x); }
inline bool operator<=(absl::string_view x, const Cord& y) { return !(y < x); }
inline bool operator>=(const Cord& x, absl::string_view y) { return !(x < y); }
inline bool operator>=(absl::string_view x, const Cord& y) { return !(x < y); }
// Overload of swap for Cord. The use of non-const references is
// required. :(
inline void swap(Cord& x, Cord& y) noexcept { y.contents_.Swap(&x.contents_); }
// Some internals exposed to test code.
namespace strings_internal {
class CordTestAccess {
public:
static size_t FlatOverhead();
static size_t MaxFlatLength();
static size_t SizeofCordRepConcat();
static size_t SizeofCordRepExternal();
static size_t SizeofCordRepSubstring();
static size_t FlatTagToLength(uint8_t tag);
static uint8_t LengthToTag(size_t s);
};
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_CORD_H_
|