diff options
Diffstat (limited to 'absl/strings/strip.cc')
-rw-r--r-- | absl/strings/strip.cc | 269 |
1 files changed, 269 insertions, 0 deletions
diff --git a/absl/strings/strip.cc b/absl/strings/strip.cc new file mode 100644 index 000000000000..968c09c6fd31 --- /dev/null +++ b/absl/strings/strip.cc @@ -0,0 +1,269 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains functions that remove a defined part from the std::string, +// i.e., strip the std::string. + +#include "absl/strings/strip.h" + +#include <algorithm> +#include <cassert> +#include <cstring> +#include <string> + +#include "absl/strings/ascii.h" +#include "absl/strings/string_view.h" + +// ---------------------------------------------------------------------- +// ReplaceCharacters +// Replaces any occurrence of the character 'remove' (or the characters +// in 'remove') with the character 'replace_with'. +// ---------------------------------------------------------------------- +void ReplaceCharacters(char* str, size_t len, absl::string_view remove, + char replace_with) { + for (char* end = str + len; str != end; ++str) { + if (remove.find(*str) != absl::string_view::npos) { + *str = replace_with; + } + } +} + +void ReplaceCharacters(std::string* s, absl::string_view remove, char replace_with) { + for (char& ch : *s) { + if (remove.find(ch) != absl::string_view::npos) { + ch = replace_with; + } + } +} + +bool StripTrailingNewline(std::string* s) { + if (!s->empty() && (*s)[s->size() - 1] == '\n') { + if (s->size() > 1 && (*s)[s->size() - 2] == '\r') + s->resize(s->size() - 2); + else + s->resize(s->size() - 1); + return true; + } + return false; +} + +// ---------------------------------------------------------------------- +// Misc. stripping routines +// ---------------------------------------------------------------------- +void StripCurlyBraces(std::string* s) { + return StripBrackets('{', '}', s); +} + +void StripBrackets(char left, char right, std::string* s) { + std::string::iterator opencurly = std::find(s->begin(), s->end(), left); + while (opencurly != s->end()) { + std::string::iterator closecurly = std::find(opencurly, s->end(), right); + if (closecurly == s->end()) return; + opencurly = s->erase(opencurly, closecurly + 1); + opencurly = std::find(opencurly, s->end(), left); + } +} + +void StripMarkupTags(std::string* s) { + std::string::iterator output = std::find(s->begin(), s->end(), '<'); + std::string::iterator input = output; + while (input != s->end()) { + if (*input == '<') { + input = std::find(input, s->end(), '>'); + if (input == s->end()) break; + ++input; + } else { + *output++ = *input++; + } + } + s->resize(output - s->begin()); +} + +std::string OutputWithMarkupTagsStripped(const std::string& s) { + std::string result(s); + StripMarkupTags(&result); + return result; +} + +ptrdiff_t TrimStringLeft(std::string* s, absl::string_view remove) { + size_t i = 0; + while (i < s->size() && memchr(remove.data(), (*s)[i], remove.size())) { + ++i; + } + if (i > 0) s->erase(0, i); + return i; +} + +ptrdiff_t TrimStringRight(std::string* s, absl::string_view remove) { + size_t i = s->size(), trimmed = 0; + while (i > 0 && memchr(remove.data(), (*s)[i - 1], remove.size())) { + --i; + } + if (i < s->size()) { + trimmed = s->size() - i; + s->erase(i); + } + return trimmed; +} + +// Unfortunately, absl::string_view does not have erase, so we've to replicate +// the implementation with remove_prefix()/remove_suffix() +ptrdiff_t TrimStringLeft(absl::string_view* s, absl::string_view remove) { + size_t i = 0; + while (i < s->size() && memchr(remove.data(), (*s)[i], remove.size())) { + ++i; + } + if (i > 0) s->remove_prefix(i); + return i; +} + +ptrdiff_t TrimStringRight(absl::string_view* s, absl::string_view remove) { + size_t i = s->size(), trimmed = 0; + while (i > 0 && memchr(remove.data(), (*s)[i - 1], remove.size())) { + --i; + } + if (i < s->size()) { + trimmed = s->size() - i; + s->remove_suffix(trimmed); + } + return trimmed; +} + +// ---------------------------------------------------------------------- +// Various removal routines +// ---------------------------------------------------------------------- +ptrdiff_t strrm(char* str, char c) { + char* src; + char* dest; + for (src = dest = str; *src != '\0'; ++src) + if (*src != c) *(dest++) = *src; + *dest = '\0'; + return dest - str; +} + +ptrdiff_t memrm(char* str, ptrdiff_t strlen, char c) { + char* src; + char* dest; + for (src = dest = str; strlen-- > 0; ++src) + if (*src != c) *(dest++) = *src; + return dest - str; +} + +ptrdiff_t strrmm(char* str, const char* chars) { + char* src; + char* dest; + for (src = dest = str; *src != '\0'; ++src) { + bool skip = false; + for (const char* c = chars; *c != '\0'; c++) { + if (*src == *c) { + skip = true; + break; + } + } + if (!skip) *(dest++) = *src; + } + *dest = '\0'; + return dest - str; +} + +ptrdiff_t strrmm(std::string* str, const std::string& chars) { + size_t str_len = str->length(); + size_t in_index = str->find_first_of(chars); + if (in_index == std::string::npos) return str_len; + + size_t out_index = in_index++; + + while (in_index < str_len) { + char c = (*str)[in_index++]; + if (chars.find(c) == std::string::npos) (*str)[out_index++] = c; + } + + str->resize(out_index); + return out_index; +} + +// ---------------------------------------------------------------------- +// StripDupCharacters +// Replaces any repeated occurrence of the character 'dup_char' +// with single occurrence. e.g., +// StripDupCharacters("a//b/c//d", '/', 0) => "a/b/c/d" +// Return the number of characters removed +// ---------------------------------------------------------------------- +ptrdiff_t StripDupCharacters(std::string* s, char dup_char, ptrdiff_t start_pos) { + if (start_pos < 0) start_pos = 0; + + // remove dups by compaction in-place + ptrdiff_t input_pos = start_pos; // current reader position + ptrdiff_t output_pos = start_pos; // current writer position + const ptrdiff_t input_end = s->size(); + while (input_pos < input_end) { + // keep current character + const char curr_char = (*s)[input_pos]; + if (output_pos != input_pos) // must copy + (*s)[output_pos] = curr_char; + ++input_pos; + ++output_pos; + + if (curr_char == dup_char) { // skip subsequent dups + while ((input_pos < input_end) && ((*s)[input_pos] == dup_char)) + ++input_pos; + } + } + const ptrdiff_t num_deleted = input_pos - output_pos; + s->resize(s->size() - num_deleted); + return num_deleted; +} + +// ---------------------------------------------------------------------- +// TrimRunsInString +// Removes leading and trailing runs, and collapses middle +// runs of a set of characters into a single character (the +// first one specified in 'remove'). Useful for collapsing +// runs of repeated delimiters, whitespace, etc. E.g., +// TrimRunsInString(&s, " :,()") removes leading and trailing +// delimiter chars and collapses and converts internal runs +// of delimiters to single ' ' characters, so, for example, +// " a:(b):c " -> "a b c" +// "first,last::(area)phone, ::zip" -> "first last area phone zip" +// ---------------------------------------------------------------------- +void TrimRunsInString(std::string* s, absl::string_view remove) { + std::string::iterator dest = s->begin(); + std::string::iterator src_end = s->end(); + for (std::string::iterator src = s->begin(); src != src_end;) { + if (remove.find(*src) == absl::string_view::npos) { + *(dest++) = *(src++); + } else { + // Skip to the end of this run of chars that are in 'remove'. + for (++src; src != src_end; ++src) { + if (remove.find(*src) == absl::string_view::npos) { + if (dest != s->begin()) { + // This is an internal run; collapse it. + *(dest++) = remove[0]; + } + *(dest++) = *(src++); + break; + } + } + } + } + s->erase(dest, src_end); +} + +// ---------------------------------------------------------------------- +// RemoveNullsInString +// Removes any internal \0 characters from the std::string. +// ---------------------------------------------------------------------- +void RemoveNullsInString(std::string* s) { + s->erase(std::remove(s->begin(), s->end(), '\0'), s->end()); +} |