about summary refs log tree commit diff
path: root/absl/strings/internal/memutil_benchmark.cc
blob: 77915adb958ee58b5f5c5825cd87a4c97144da2d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "absl/strings/internal/memutil.h"

#include <algorithm>
#include <cstdlib>

#include "benchmark/benchmark.h"
#include "absl/strings/ascii.h"

// We fill the haystack with aaaaaaaaaaaaaaaaaa...aaaab.
// That gives us:
// - an easy search: 'b'
// - a medium search: 'ab'.  That means every letter is a possible match.
// - a pathological search: 'aaaaaa.......aaaaab' (half as many a's as haytack)
// We benchmark case-sensitive and case-insensitive versions of
// three memmem implementations:
// - memmem() from memutil.h
// - search() from STL
// - memmatch(), a custom implementation using memchr and memcmp.
// Here are sample results:
//
// Run on (12 X 3800 MHz CPU s)
// CPU Caches:
//   L1 Data 32K (x6)
//   L1 Instruction 32K (x6)
//   L2 Unified 256K (x6)
//   L3 Unified 15360K (x1)
// ----------------------------------------------------------------
// Benchmark                           Time          CPU Iterations
// ----------------------------------------------------------------
// BM_Memmem                        3583 ns      3582 ns     196469  2.59966GB/s
// BM_MemmemMedium                 13743 ns     13742 ns      50901  693.986MB/s
// BM_MemmemPathological        13695030 ns  13693977 ns         51  713.133kB/s
// BM_Memcasemem                    3299 ns      3299 ns     212942  2.82309GB/s
// BM_MemcasememMedium             16407 ns     16406 ns      42170  581.309MB/s
// BM_MemcasememPathological    17267745 ns  17266030 ns         41  565.598kB/s
// BM_Search                        1610 ns      1609 ns     431321  5.78672GB/s
// BM_SearchMedium                 11111 ns     11110 ns      63001  858.414MB/s
// BM_SearchPathological        12117390 ns  12116397 ns         58  805.984kB/s
// BM_Searchcase                    3081 ns      3081 ns     229949  3.02313GB/s
// BM_SearchcaseMedium             16003 ns     16001 ns      44170  595.998MB/s
// BM_SearchcasePathological    15823413 ns  15821909 ns         44  617.222kB/s
// BM_Memmatch                       197 ns       197 ns    3584225  47.2951GB/s
// BM_MemmatchMedium               52333 ns     52329 ns      13280  182.244MB/s
// BM_MemmatchPathological        659799 ns    659727 ns       1058  14.4556MB/s
// BM_Memcasematch                  5460 ns      5460 ns     127606  1.70586GB/s
// BM_MemcasematchMedium           32861 ns     32857 ns      21258  290.248MB/s
// BM_MemcasematchPathological  15154243 ns  15153089 ns         46  644.464kB/s
// BM_MemmemStartup                    5 ns         5 ns  150821500
// BM_SearchStartup                    5 ns         5 ns  150644203
// BM_MemmatchStartup                  7 ns         7 ns   97068802
//
// Conclusions:
//
// The following recommendations are based on the sample results above. However,
// we have found that the performance of STL search can vary significantly
// depending on compiler and standard library implementation. We recommend you
// run the benchmarks for yourself on relevant platforms.
//
// If you need case-insensitive, STL search is slightly better than memmem for
// all cases.
//
// Case-sensitive is more subtle:
// Custom memmatch is _very_ fast at scanning, so if you have very few possible
// matches in your haystack, that's the way to go. Performance drops
// significantly with more matches.
//
// STL search is slightly faster than memmem in the medium and pathological
// benchmarks. However, the performance of memmem is currently more dependable
// across platforms and build configurations.

namespace {

constexpr int kHaystackSize = 10000;
constexpr int64_t kHaystackSize64 = kHaystackSize;
const char* MakeHaystack() {
  char* haystack = new char[kHaystackSize];
  for (int i = 0; i < kHaystackSize - 1; ++i) haystack[i] = 'a';
  haystack[kHaystackSize - 1] = 'b';
  return haystack;
}
const char* const kHaystack = MakeHaystack();

void BM_Memmem(benchmark::State& state) {
  for (auto _ : state) {
    benchmark::DoNotOptimize(
        absl::strings_internal::memmem(kHaystack, kHaystackSize, "b", 1));
  }
  state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Memmem);

void BM_MemmemMedium(benchmark::State& state) {
  for (auto _ : state) {
    benchmark::DoNotOptimize(
        absl::strings_internal::memmem(kHaystack, kHaystackSize, "ab", 2));
  }
  state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemmemMedium);

void BM_MemmemPathological(benchmark::State& state) {
  for (auto _ : state) {
    benchmark::DoNotOptimize(absl::strings_internal::memmem(
        kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
        kHaystackSize - kHaystackSize / 2));
  }
  state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemmemPathological);

void BM_Memcasemem(benchmark::State& state) {
  for (auto _ : state) {
    benchmark::DoNotOptimize(
        absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "b", 1));
  }
  state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Memcasemem);

void BM_MemcasememMedium(benchmark::State& state) {
  for (auto _ : state) {
    benchmark::DoNotOptimize(
        absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "ab", 2));
  }
  state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemcasememMedium);

void BM_MemcasememPathological(benchmark::State& state) {
  for (auto _ : state) {
    benchmark::DoNotOptimize(absl::strings_internal::memcasemem(
        kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
        kHaystackSize - kHaystackSize / 2));
  }
  state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemcasememPathological);

bool case_eq(const char a, const char b) {
  return absl::ascii_tolower(a) == absl::ascii_tolower(b);
}

void BM_Search(benchmark::State& state) {
  for (auto _ : state) {
    benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
                                         kHaystack + kHaystackSize - 1,
                                         kHaystack + kHaystackSize));
  }
  state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Search);

void BM_SearchMedium(benchmark::State& state) {
  for (auto _ : state) {
    benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
                                         kHaystack + kHaystackSize - 2,
                                         kHaystack + kHaystackSize));
  }
  state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_SearchMedium);

void BM_SearchPathological(benchmark::State& state) {
  for (auto _ : state) {
    benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
                                         kHaystack + kHaystackSize / 2,
                                         kHaystack + kHaystackSize));
  }
  state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_SearchPathological);

void BM_Searchcase(benchmark::State& state) {
  for (auto _ : state) {
    benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
                                         kHaystack + kHaystackSize - 1,
                                         kHaystack + kHaystackSize, case_eq));
  }
  state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Searchcase);

void BM_SearchcaseMedium(benchmark::State& state) {
  for (auto _ : state) {
    benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
                                         kHaystack + kHaystackSize - 2,
                                         kHaystack + kHaystackSize, case_eq));
  }
  state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_SearchcaseMedium);

void BM_SearchcasePathological(benchmark::State& state) {
  for (auto _ : state) {
    benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
                                         kHaystack + kHaystackSize / 2,
                                         kHaystack + kHaystackSize, case_eq));
  }
  state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_SearchcasePathological);

char* memcasechr(const char* s, int c, size_t slen) {
  c = absl::ascii_tolower(c);
  for (; slen; ++s, --slen) {
    if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s);
  }
  return nullptr;
}

const char* memcasematch(const char* phaystack, size_t haylen,
                         const char* pneedle, size_t neelen) {
  if (0 == neelen) {
    return phaystack;  // even if haylen is 0
  }
  if (haylen < neelen) return nullptr;

  const char* match;
  const char* hayend = phaystack + haylen - neelen + 1;
  while ((match = static_cast<char*>(
              memcasechr(phaystack, pneedle[0], hayend - phaystack)))) {
    if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0)
      return match;
    else
      phaystack = match + 1;
  }
  return nullptr;
}

void BM_Memmatch(benchmark::State& state) {
  for (auto _ : state) {
    benchmark::DoNotOptimize(
        absl::strings_internal::memmatch(kHaystack, kHaystackSize, "b", 1));
  }
  state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Memmatch);

void BM_MemmatchMedium(benchmark::State& state) {
  for (auto _ : state) {
    benchmark::DoNotOptimize(
        absl::strings_internal::memmatch(kHaystack, kHaystackSize, "ab", 2));
  }
  state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemmatchMedium);

void BM_MemmatchPathological(benchmark::State& state) {
  for (auto _ : state) {
    benchmark::DoNotOptimize(absl::strings_internal::memmatch(
        kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
        kHaystackSize - kHaystackSize / 2));
  }
  state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemmatchPathological);

void BM_Memcasematch(benchmark::State& state) {
  for (auto _ : state) {
    benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "b", 1));
  }
  state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Memcasematch);

void BM_MemcasematchMedium(benchmark::State& state) {
  for (auto _ : state) {
    benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "ab", 2));
  }
  state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemcasematchMedium);

void BM_MemcasematchPathological(benchmark::State& state) {
  for (auto _ : state) {
    benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize,
                                          kHaystack + kHaystackSize / 2,
                                          kHaystackSize - kHaystackSize / 2));
  }
  state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemcasematchPathological);

void BM_MemmemStartup(benchmark::State& state) {
  for (auto _ : state) {
    benchmark::DoNotOptimize(absl::strings_internal::memmem(
        kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1));
  }
}
BENCHMARK(BM_MemmemStartup);

void BM_SearchStartup(benchmark::State& state) {
  for (auto _ : state) {
    benchmark::DoNotOptimize(
        std::search(kHaystack + kHaystackSize - 10, kHaystack + kHaystackSize,
                    kHaystack + kHaystackSize - 1, kHaystack + kHaystackSize));
  }
}
BENCHMARK(BM_SearchStartup);

void BM_MemmatchStartup(benchmark::State& state) {
  for (auto _ : state) {
    benchmark::DoNotOptimize(absl::strings_internal::memmatch(
        kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1));
  }
}
BENCHMARK(BM_MemmatchStartup);

}  // namespace