about summary refs log tree commit diff
path: root/absl/strings/ascii.h
diff options
context:
space:
mode:
authormisterg <misterg@google.com>2017-09-19T20·54-0400
committermisterg <misterg@google.com>2017-09-19T20·54-0400
commitc2e754829628d1e9b7a16b3389cfdace76950fdf (patch)
tree5a7f056f44e27c30e10025113b644f0b3b5801fc /absl/strings/ascii.h
Initial Commit
Diffstat (limited to 'absl/strings/ascii.h')
-rw-r--r--absl/strings/ascii.h239
1 files changed, 239 insertions, 0 deletions
diff --git a/absl/strings/ascii.h b/absl/strings/ascii.h
new file mode 100644
index 000000000000..fc2bb33e0d77
--- /dev/null
+++ b/absl/strings/ascii.h
@@ -0,0 +1,239 @@
+//
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// -----------------------------------------------------------------------------
+// File: ascii.h
+// -----------------------------------------------------------------------------
+//
+// This package contains functions operating on characters and strings
+// restricted to standard ASCII. These include character classification
+// functions analogous to those found in the ANSI C Standard Library <ctype.h>
+// header file.
+//
+// C++ implementations provide <ctype.h> functionality based on their
+// C environment locale. In general, reliance on such a locale is not ideal, as
+// the locale standard is problematic (and may not return invariant information
+// for the same character set, for example). These `ascii_*()` functions are
+// hard-wired for standard ASCII, much faster, and guaranteed to behave
+// consistently.  They will never be overloaded, nor will their function
+// signature change.
+//
+// `ascii_isalnum()`, `ascii_isalpha()`, `ascii_isascii()`, `ascii_isblank()`,
+// `ascii_iscntrl()`, `ascii_isdigit()`, `ascii_isgraph()`, `ascii_islower()`,
+// `ascii_isprint()`, `ascii_ispunct()`, `ascii_isspace()`, `ascii_isupper()`,
+// `ascii_isxdigit()`
+//   Analagous to the <ctype.h> functions with similar names, these
+//   functions take an unsigned char and return a bool, based on whether the
+//   character matches the condition specified.
+//
+//   If the input character has a numerical value greater than 127, these
+//   functions return `false`.
+//
+// `ascii_tolower()`, `ascii_toupper()`
+//   Analagous to the <ctype.h> functions with similar names, these functions
+//   take an unsigned char and return a char.
+//
+//   If the input character is not an ASCII {lower,upper}-case letter (including
+//   numerical values greater than 127) then the functions return the same value
+//   as the input character.
+
+#ifndef ABSL_STRINGS_ASCII_H_
+#define ABSL_STRINGS_ASCII_H_
+
+#include <algorithm>
+#include <string>
+
+#include "absl/base/attributes.h"
+#include "absl/strings/string_view.h"
+
+namespace absl {
+namespace ascii_internal {
+
+// Declaration for an array of bitfields holding character information.
+extern const unsigned char kPropertyBits[256];
+
+// Declaration for the array of characters to upper-case characters.
+extern const char kToUpper[256];
+
+// Declaration for the array of characters to lower-case characters.
+extern const char kToLower[256];
+
+}  // namespace ascii_internal
+
+// ascii_isalpha()
+//
+// Determines whether the given character is an alphabetic character.
+inline bool ascii_isalpha(unsigned char c) {
+  return (ascii_internal::kPropertyBits[c] & 0x01) != 0;
+}
+
+// ascii_isalnum()
+//
+// Determines whether the given character is an alphanumeric character.
+inline bool ascii_isalnum(unsigned char c) {
+  return (ascii_internal::kPropertyBits[c] & 0x04) != 0;
+}
+
+// ascii_isspace()
+//
+// Determines whether the given character is a whitespace character (space,
+// tab, vertical tab, formfeed, linefeed, or carriage return).
+inline bool ascii_isspace(unsigned char c) {
+  return (ascii_internal::kPropertyBits[c] & 0x08) != 0;
+}
+
+// ascii_ispunct()
+//
+// Determines whether the given character is a punctuation character.
+inline bool ascii_ispunct(unsigned char c) {
+  return (ascii_internal::kPropertyBits[c] & 0x10) != 0;
+}
+
+// ascii_isblank()
+//
+// Determines whether the given character is a blank character (tab or space).
+inline bool ascii_isblank(unsigned char c) {
+  return (ascii_internal::kPropertyBits[c] & 0x20) != 0;
+}
+
+// ascii_iscntrl()
+//
+// Determines whether the given character is a control character.
+inline bool ascii_iscntrl(unsigned char c) {
+  return (ascii_internal::kPropertyBits[c] & 0x40) != 0;
+}
+
+// ascii_isxdigit()
+//
+// Determines whether the given character can be represented as a hexadecimal
+// digit character (i.e. {0-9} or {A-F}).
+inline bool ascii_isxdigit(unsigned char c) {
+  return (ascii_internal::kPropertyBits[c] & 0x80) != 0;
+}
+
+// ascii_isdigit()
+//
+// Determines whether the given character can be represented as a decimal
+// digit character (i.e. {0-9}).
+inline bool ascii_isdigit(unsigned char c) { return c >= '0' && c <= '9'; }
+
+// ascii_isprint()
+//
+// Determines whether the given character is printable, including whitespace.
+inline bool ascii_isprint(unsigned char c) { return c >= 32 && c < 127; }
+
+// ascii_isgraph()
+//
+// Determines whether the given character has a graphical representation.
+inline bool ascii_isgraph(unsigned char c) { return c > 32 && c < 127; }
+
+// ascii_isupper()
+//
+// Determines whether the given character is uppercase.
+inline bool ascii_isupper(unsigned char c) { return c >= 'A' && c <= 'Z'; }
+
+// ascii_islower()
+//
+// Determines whether the given character is lowercase.
+inline bool ascii_islower(unsigned char c) { return c >= 'a' && c <= 'z'; }
+
+// ascii_isascii()
+//
+// Determines whether the given character is ASCII.
+inline bool ascii_isascii(unsigned char c) { return c < 128; }
+
+// ascii_tolower()
+//
+// Returns an ASCII character, converting to lowercase if uppercase is
+// passed. Note that character values > 127 are simply returned.
+inline char ascii_tolower(unsigned char c) {
+  return ascii_internal::kToLower[c];
+}
+
+// Converts the characters in `s` to lowercase, changing the contents of `s`.
+void AsciiStrToLower(std::string* s);
+
+// Creates a lowercase std::string from a given absl::string_view.
+ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(absl::string_view s) {
+  std::string result(s);
+  absl::AsciiStrToLower(&result);
+  return result;
+}
+
+// ascii_toupper()
+//
+// Returns the ASCII character, converting to upper-case if lower-case is
+// passed. Note that characters values > 127 are simply returned.
+inline char ascii_toupper(unsigned char c) {
+  return ascii_internal::kToUpper[c];
+}
+
+// Converts the characters in `s` to uppercase, changing the contents of `s`.
+void AsciiStrToUpper(std::string* s);
+
+// Creates an uppercase std::string from a given absl::string_view.
+ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(absl::string_view s) {
+  std::string result(s);
+  absl::AsciiStrToUpper(&result);
+  return result;
+}
+
+// Returns absl::string_view with whitespace stripped from the beginning of the
+// given string_view.
+ABSL_MUST_USE_RESULT inline absl::string_view StripLeadingAsciiWhitespace(
+    absl::string_view str) {
+  auto it = std::find_if_not(str.begin(), str.end(), absl::ascii_isspace);
+  return absl::string_view(it, str.end() - it);
+}
+
+// Strips in place whitespace from the beginning of the given std::string.
+inline void StripLeadingAsciiWhitespace(std::string* str) {
+  auto it = std::find_if_not(str->begin(), str->end(), absl::ascii_isspace);
+  str->erase(str->begin(), it);
+}
+
+// Returns absl::string_view with whitespace stripped from the end of the given
+// string_view.
+ABSL_MUST_USE_RESULT inline absl::string_view StripTrailingAsciiWhitespace(
+    absl::string_view str) {
+  auto it = std::find_if_not(str.rbegin(), str.rend(), absl::ascii_isspace);
+  return absl::string_view(str.begin(), str.rend() - it);
+}
+
+// Strips in place whitespace from the end of the given std::string
+inline void StripTrailingAsciiWhitespace(std::string* str) {
+  auto it = std::find_if_not(str->rbegin(), str->rend(), absl::ascii_isspace);
+  str->erase(str->rend() - it);
+}
+
+// Returns absl::string_view with whitespace stripped from both ends of the
+// given string_view.
+ABSL_MUST_USE_RESULT inline absl::string_view StripAsciiWhitespace(
+    absl::string_view str) {
+  return StripTrailingAsciiWhitespace(StripLeadingAsciiWhitespace(str));
+}
+
+// Strips in place whitespace from both ends of the given std::string
+inline void StripAsciiWhitespace(std::string* str) {
+  StripTrailingAsciiWhitespace(str);
+  StripLeadingAsciiWhitespace(str);
+}
+
+// Removes leading, trailing, and consecutive internal whitespace.
+void RemoveExtraAsciiWhitespace(std::string*);
+
+}  // namespace absl
+
+#endif  // ABSL_STRINGS_ASCII_H_