about summary refs log tree commit diff
path: root/absl/strings/str_split.h
diff options
context:
space:
mode:
Diffstat (limited to 'absl/strings/str_split.h')
-rw-r--r--absl/strings/str_split.h511
1 files changed, 511 insertions, 0 deletions
diff --git a/absl/strings/str_split.h b/absl/strings/str_split.h
new file mode 100644
index 000000000000..a7b48b18916b
--- /dev/null
+++ b/absl/strings/str_split.h
@@ -0,0 +1,511 @@
+//
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// -----------------------------------------------------------------------------
+// File: str_split.h
+// -----------------------------------------------------------------------------
+//
+// This file contains functions for splitting strings. It defines the main
+// `StrSplit()` function, several delimiters for determining the boundaries on
+// which to split the std::string, and predicates for filtering delimited results.
+// `StrSplit()` adapts the returned collection to the type specified by the
+// caller.
+//
+// Example:
+//
+//   // Splits the given std::string on commas. Returns the results in a
+//   // vector of strings.
+//   std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
+//   // Can also use ","
+//   // v[0] == "a", v[1] == "b", v[2] == "c"
+//
+// See StrSplit() below for more information.
+#ifndef ABSL_STRINGS_STR_SPLIT_H_
+#define ABSL_STRINGS_STR_SPLIT_H_
+
+#include <algorithm>
+#include <cstddef>
+#include <map>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "absl/base/internal/raw_logging.h"
+#include "absl/strings/internal/str_split_internal.h"
+#include "absl/strings/string_view.h"
+#include "absl/strings/strip.h"
+
+namespace absl {
+
+//------------------------------------------------------------------------------
+// Delimiters
+//------------------------------------------------------------------------------
+//
+// `StrSplit()` uses delimiters to define the boundaries between elements in the
+// provided input. Several `Delimiter` types are defined below. If a std::string
+// (`const char*`, `std::string`, or `absl::string_view`) is passed in place of
+// an explicit `Delimiter` object, `StrSplit()` treats it the same way as if it
+// were passed a `ByString` delimiter.
+//
+// A `Delimiter` is an object with a `Find()` function that knows how to find
+// the first occurrence of itself in a given `absl::string_view`.
+//
+// The following `Delimiter` types are available for use within `StrSplit()`:
+//
+//   - `ByString` (default for std::string arguments)
+//   - `ByChar` (default for a char argument)
+//   - `ByAnyChar`
+//   - `ByLength`
+//   - `MaxSplits`
+//
+//
+// A Delimiter's Find() member function will be passed the input text that is to
+// be split and the position to begin searching for the next delimiter in the
+// input text. The returned absl::string_view should refer to the next
+// occurrence (after pos) of the represented delimiter; this returned
+// absl::string_view represents the next location where the input std::string should
+// be broken. The returned absl::string_view may be zero-length if the Delimiter
+// does not represent a part of the std::string (e.g., a fixed-length delimiter). If
+// no delimiter is found in the given text, a zero-length absl::string_view
+// referring to text.end() should be returned (e.g.,
+// absl::string_view(text.end(), 0)). It is important that the returned
+// absl::string_view always be within the bounds of input text given as an
+// argument--it must not refer to a std::string that is physically located outside of
+// the given std::string.
+//
+// The following example is a simple Delimiter object that is created with a
+// single char and will look for that char in the text passed to the Find()
+// function:
+//
+//   struct SimpleDelimiter {
+//     const char c_;
+//     explicit SimpleDelimiter(char c) : c_(c) {}
+//     absl::string_view Find(absl::string_view text, size_t pos) {
+//       auto found = text.find(c_, pos);
+//       if (found == absl::string_view::npos)
+//         return absl::string_view(text.end(), 0);
+//
+//       return absl::string_view(text, found, 1);
+//     }
+//   };
+
+// ByString
+//
+// A sub-std::string delimiter. If `StrSplit()` is passed a std::string in place of a
+// `Delimiter` object, the std::string will be implicitly converted into a
+// `ByString` delimiter.
+//
+// Example:
+//
+//   // Because a std::string literal is converted to an `absl::ByString`,
+//   // the following two splits are equivalent.
+//
+//   std::vector<std::string> v1 = absl::StrSplit("a, b, c", ", ");
+//
+//   using absl::ByString;
+//   std::vector<std::string> v2 = absl::StrSplit("a, b, c",
+//                                                ByString(", "));
+//   // v[0] == "a", v[1] == "b", v[3] == "c"
+class ByString {
+ public:
+  explicit ByString(absl::string_view sp);
+  absl::string_view Find(absl::string_view text, size_t pos) const;
+
+ private:
+  const std::string delimiter_;
+};
+
+// ByChar
+//
+// A single character delimiter. `ByChar` is functionally equivalent to a
+// 1-char std::string within a `ByString` delimiter, but slightly more
+// efficient.
+//
+// Example:
+//
+//   // Because a char literal is converted to a absl::ByChar,
+//   // the following two splits are equivalent.
+//   std::vector<std::string> v1 = absl::StrSplit("a,b,c", ',');
+//   using absl::ByChar;
+//   std::vector<std::string> v2 = absl::StrSplit("a,b,c", ByChar(','));
+//   // v[0] == "a", v[1] == "b", v[3] == "c"
+//
+// `ByChar` is also the default delimiter if a single character is given
+// as the delimiter to `StrSplit()`. For example, the following calls are
+// equivalent:
+//
+//   std::vector<std::string> v = absl::StrSplit("a-b", '-');
+//
+//   using absl::ByChar;
+//   std::vector<std::string> v = absl::StrSplit("a-b", ByChar('-'));
+//
+class ByChar {
+ public:
+  explicit ByChar(char c) : c_(c) {}
+  absl::string_view Find(absl::string_view text, size_t pos) const;
+
+ private:
+  char c_;
+};
+
+// ByAnyChar
+//
+// A delimiter that will match any of the given byte-sized characters within
+// its provided std::string.
+//
+// Note: this delimiter works with single-byte std::string data, but does not work
+// with variable-width encodings, such as UTF-8.
+//
+// Example:
+//
+//   using absl::ByAnyChar;
+//   std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
+//   // v[0] == "a", v[1] == "b", v[3] == "c"
+//
+// If `ByAnyChar` is given the empty std::string, it behaves exactly like
+// `ByString` and matches each individual character in the input std::string.
+//
+class ByAnyChar {
+ public:
+  explicit ByAnyChar(absl::string_view sp);
+  absl::string_view Find(absl::string_view text, size_t pos) const;
+
+ private:
+  const std::string delimiters_;
+};
+
+// ByLength
+//
+// A delimiter for splitting into equal-length strings. The length argument to
+// the constructor must be greater than 0.
+//
+// Note: this delimiter works with single-byte std::string data, but does not work
+// with variable-width encodings, such as UTF-8.
+//
+// Example:
+//
+//   using absl::ByLength;
+//   std::vector<std::string> v = absl::StrSplit("123456789", ByLength(3));
+
+//   // v[0] == "123", v[1] == "456", v[2] == "789"
+//
+// Note that the std::string does not have to be a multiple of the fixed split
+// length. In such a case, the last substring will be shorter.
+//
+//   using absl::ByLength;
+//   std::vector<std::string> v = absl::StrSplit("12345", ByLength(2));
+//
+//   // v[0] == "12", v[1] == "35", v[2] == "5"
+class ByLength {
+ public:
+  explicit ByLength(ptrdiff_t length);
+  absl::string_view Find(absl::string_view text, size_t pos) const;
+
+ private:
+  const ptrdiff_t length_;
+};
+
+namespace strings_internal {
+
+// A traits-like metafunction for selecting the default Delimiter object type
+// for a particular Delimiter type. The base case simply exposes type Delimiter
+// itself as the delimiter's Type. However, there are specializations for
+// std::string-like objects that map them to the ByString delimiter object.
+// This allows functions like absl::StrSplit() and absl::MaxSplits() to accept
+// std::string-like objects (e.g., ',') as delimiter arguments but they will be
+// treated as if a ByString delimiter was given.
+template <typename Delimiter>
+struct SelectDelimiter {
+  using type = Delimiter;
+};
+
+template <>
+struct SelectDelimiter<char> {
+  using type = ByChar;
+};
+template <>
+struct SelectDelimiter<char*> {
+  using type = ByString;
+};
+template <>
+struct SelectDelimiter<const char*> {
+  using type = ByString;
+};
+template <>
+struct SelectDelimiter<absl::string_view> {
+  using type = ByString;
+};
+template <>
+struct SelectDelimiter<std::string> {
+  using type = ByString;
+};
+
+// Wraps another delimiter and sets a max number of matches for that delimiter.
+template <typename Delimiter>
+class MaxSplitsImpl {
+ public:
+  MaxSplitsImpl(Delimiter delimiter, int limit)
+      : delimiter_(delimiter), limit_(limit), count_(0) {}
+  absl::string_view Find(absl::string_view text, size_t pos) {
+    if (count_++ == limit_) {
+      return absl::string_view(text.end(), 0);  // No more matches.
+    }
+    return delimiter_.Find(text, pos);
+  }
+
+ private:
+  Delimiter delimiter_;
+  const int limit_;
+  int count_;
+};
+
+}  // namespace strings_internal
+
+// MaxSplits()
+//
+// A delimiter that limits the number of matches which can occur to the passed
+// `limit`. The last element in the returned collection will contain all
+// remaining unsplit pieces, which may contain instances of the delimiter.
+// The collection will contain at most `limit` + 1 elements.
+// Example:
+//
+//   using absl::MaxSplits;
+//   std::vector<std::string> v = absl::StrSplit("a,b,c", MaxSplits(',', 1));
+//
+//   // v[0] == "a", v[1] == "b,c"
+template <typename Delimiter>
+inline strings_internal::MaxSplitsImpl<
+    typename strings_internal::SelectDelimiter<Delimiter>::type>
+MaxSplits(Delimiter delimiter, int limit) {
+  typedef
+      typename strings_internal::SelectDelimiter<Delimiter>::type DelimiterType;
+  return strings_internal::MaxSplitsImpl<DelimiterType>(
+      DelimiterType(delimiter), limit);
+}
+
+//------------------------------------------------------------------------------
+// Predicates
+//------------------------------------------------------------------------------
+//
+// Predicates filter the results of a `StrSplit()` by determining whether or not
+// a resultant element is included in the result set. A predicate may be passed
+// as an optional third argument to the `StrSplit()` function.
+//
+// Predicates are unary functions (or functors) that take a single
+// `absl::string_view` argument and return a bool indicating whether the
+// argument should be included (`true`) or excluded (`false`).
+//
+// Predicates are useful when filtering out empty substrings. By default, empty
+// substrings may be returned by `StrSplit()`, which is similar to the way split
+// functions work in other programming languages.
+
+// AllowEmpty()
+//
+// Always returns `true`, indicating that all strings--including empty
+// strings--should be included in the split output. This predicate is not
+// strictly needed because this is the default behavior of `StrSplit()`;
+// however, it might be useful at some call sites to make the intent explicit.
+//
+// Example:
+//
+//  std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', AllowEmpty());
+//
+//  // v[0] == " a ", v[1] == " ", v[2] == "", v[3] = "b", v[4] == ""
+struct AllowEmpty {
+  bool operator()(absl::string_view) const { return true; }
+};
+
+// SkipEmpty()
+//
+// Returns `false` if the given `absl::string_view` is empty, indicating that
+// `StrSplit()` should omit the empty std::string.
+//
+// Example:
+//
+//   std::vector<std::string> v = absl::StrSplit(",a,,b,", ',', SkipEmpty());
+//
+//   // v[0] == "a", v[1] == "b"
+//
+// Note: `SkipEmpty()` does not consider a std::string containing only whitespace
+// to be empty. To skip such whitespace as well, use the `SkipWhitespace()`
+// predicate.
+struct SkipEmpty {
+  bool operator()(absl::string_view sp) const { return !sp.empty(); }
+};
+
+// SkipWhitespace()
+//
+// Returns `false` if the given `absl::string_view` is empty *or* contains only
+// whitespace, indicating that `StrSplit()` should omit the std::string.
+//
+// Example:
+//
+//   std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
+//                                               ',', SkipWhitespace());
+//   // v[0] == " a ", v[1] == "b"
+//
+//   // SkipEmpty() would return whitespace elements
+//   std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', SkipEmpty());
+//   // v[0] == " a ", v[1] == " ", v[2] == "b"
+struct SkipWhitespace {
+  bool operator()(absl::string_view sp) const {
+    sp = absl::StripAsciiWhitespace(sp);
+    return !sp.empty();
+  }
+};
+
+//------------------------------------------------------------------------------
+//                                  StrSplit()
+//------------------------------------------------------------------------------
+
+// StrSplit()
+//
+// Splits a given `std::string` based on the provided `Delimiter` object,
+// returning the elements within the type specified by the caller. Optionally,
+// you may also pass a `Predicate` to `StrSplit()` indicating whether to include
+// or exclude the resulting element within the final result set. (See the
+// overviews for Delimiters and Predicates above.)
+//
+// Example:
+//
+//   std::vector<std::string> v = absl::StrSplit("a,b,c,d", ',');
+//   // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d"
+//
+// You can also provide an explicit `Delimiter` object:
+//
+// Example:
+//
+//   using absl::ByAnyChar;
+//   std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
+//   // v[0] == "a", v[1] == "b", v[3] == "c"
+//
+// See above for more information on delimiters.
+//
+// By default, empty strings are included in the result set. You can optionally
+// include a third `Predicate` argument to apply a test for whether the
+// resultant element should be included in the result set:
+//
+// Example:
+//
+//   std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
+//                                               ',', SkipWhitespace());
+//   // v[0] == "a", v[1] == "b"
+//
+// See above for more information on predicates.
+//
+//------------------------------------------------------------------------------
+// StrSplit() Return Types
+//------------------------------------------------------------------------------
+//
+// The `StrSplit()` function adapts the returned collection to the collection
+// specified by the caller (e.g. `std::vector` above). The returned collections
+// may contain `string`, `absl::string_view` (in which case the original std::string
+// being split must ensure that it outlives the collection), or any object that
+// can be explicitly created from an `absl::string_view`. This behavior works
+// for:
+//
+// 1) All standard STL containers including `std::vector`, `std::list`,
+//    `std::deque`, `std::set`,`std::multiset`, 'std::map`, and `std::multimap`
+// 2) `std::pair` (which is not actually a container). See below.
+//
+// Example:
+//
+//   // The results are returned as `absl::string_view` objects. Note that we
+//   // have to ensure that the input std::string outlives any results.
+//   std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
+//
+//   // Stores results in a std::set<std::string>, which also performs
+//   // de-duplication and orders the elements in ascending order.
+//   std::set<std::string> a = absl::StrSplit("b,a,c,a,b", ',');
+//   // v[0] == "a", v[1] == "b", v[2] = "c"
+//
+//   // `StrSplit()` can be used within a range-based for loop, in which case
+//   // each element will be of type `absl::string_view`.
+//   std::vector<std::string> v;
+//   for (const auto sv : absl::StrSplit("a,b,c", ',')) {
+//     if (sv != "b") v.emplace_back(sv);
+//   }
+//   // v[0] == "a", v[1] == "c"
+//
+//   // Stores results in a map. The map implementation assumes that the input
+//   // is provided as a series of key/value pairs. For example, the 0th element
+//   // resulting from the split will be stored as a key to the 1st element. If
+//   // an odd number of elements are resolved, the last element is paired with
+//   // a default-constructed value (e.g., empty std::string).
+//   std::map<std::string, std::string> m = absl::StrSplit("a,b,c", ',');
+//   // m["a"] == "b", m["c"] == ""     // last component value equals ""
+//
+// Splitting to `std::pair` is an interesting case because it can hold only two
+// elements and is not a collection type. When splitting to a `std::pair` the
+// first two split strings become the `std::pair` `.first` and `.second`
+// members, respectively. The remaining split substrings are discarded. If there
+// are less than two split substrings, the empty std::string is used for the
+// corresponding
+// `std::pair` member.
+//
+// Example:
+//
+//   // Stores first two split strings as the members in a std::pair.
+//   std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
+//   // p.first == "a", p.second == "b"       // "c" is omitted.
+//
+// The `StrSplit()` function can be used multiple times to perform more
+// complicated splitting logic, such as intelligently parsing key-value pairs.
+//
+// Example:
+//
+//   // The input std::string "a=b=c,d=e,f=,g" becomes
+//   // { "a" => "b=c", "d" => "e", "f" => "", "g" => "" }
+//   std::map<std::string, std::string> m;
+//   for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {
+//     m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));
+//   }
+//   EXPECT_EQ("b=c", m.find("a")->second);
+//   EXPECT_EQ("e", m.find("d")->second);
+//   EXPECT_EQ("", m.find("f")->second);
+//   EXPECT_EQ("", m.find("g")->second);
+//
+// WARNING: Due to a legacy bug that is maintained for backward compatibility,
+// splitting the following empty string_views produces different results:
+//
+//   absl::StrSplit(absl::string_view(""), '-');  // {""}
+//   absl::StrSplit(absl::string_view(), '-');    // {}, but should be {""}
+//
+// Try not to depend on this distinction because the bug may one day be fixed.
+template <typename Delimiter>
+strings_internal::Splitter<
+    typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty>
+StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d) {
+  using DelimiterType =
+      typename strings_internal::SelectDelimiter<Delimiter>::type;
+  return strings_internal::Splitter<DelimiterType, AllowEmpty>(
+      std::move(text), DelimiterType(d), AllowEmpty());
+}
+
+template <typename Delimiter, typename Predicate>
+strings_internal::Splitter<
+    typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate>
+StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d,
+         Predicate p) {
+  using DelimiterType =
+      typename strings_internal::SelectDelimiter<Delimiter>::type;
+  return strings_internal::Splitter<DelimiterType, Predicate>(
+      std::move(text), DelimiterType(d), std::move(p));
+}
+
+}  // namespace absl
+
+#endif  // ABSL_STRINGS_STR_SPLIT_H_