diff options
author | William Carroll <wpcarro@gmail.com> | 2020-11-17T22·28+0000 |
---|---|---|
committer | William Carroll <wpcarro@gmail.com> | 2020-11-17T22·28+0000 |
commit | 751b5327a92f4d53db6253e59c475e4e96cabcb6 (patch) | |
tree | 523d6efc2f8f7e52b665738224129bc76be8dd13 /scratch | |
parent | 572fb0fe5f8201376740c84a316407b75e96b1c9 (diff) |
Solve algorithms dealing with randomness
Tonight I learned that random sample where each element in the sampling corpus has an equal likelihood of being chosen is a brand of algorithms known as "reservoir sampling". - Implement random.shuffle(..) - Implement random.choice(..) Surprisingly, candidates are expected to encounter problems like this during interviews.
Diffstat (limited to 'scratch')
-rw-r--r-- | scratch/facebook/hard/fisher-yates.py | 7 | ||||
-rw-r--r-- | scratch/facebook/hard/random-choice.py | 39 |
2 files changed, 46 insertions, 0 deletions
diff --git a/scratch/facebook/hard/fisher-yates.py b/scratch/facebook/hard/fisher-yates.py new file mode 100644 index 000000000000..200d1613ddcb --- /dev/null +++ b/scratch/facebook/hard/fisher-yates.py @@ -0,0 +1,7 @@ +import random + +def shuffle(xs): + n = len(xs) + for i in range(n): + j = random.randint(i, n - 1) + xs[i], xs[j] = xs[j], xs[i] diff --git a/scratch/facebook/hard/random-choice.py b/scratch/facebook/hard/random-choice.py new file mode 100644 index 000000000000..95029ceb80a4 --- /dev/null +++ b/scratch/facebook/hard/random-choice.py @@ -0,0 +1,39 @@ +import random + +# This class of problems is known as "resevoir sampling". +def choose_a(m, xs): + """ + Randomly choose `m` elements from `xs`. + This algorithm runs in linear time with respect to the size of `xs`. + """ + result = xs[:m] + for i in range(m, len(xs)): + j = random.randint(0, i) + if j < m: + result[j] = xs[i] + return result + +def choose_b(m, xs): + """ + This algorithm, which copies `xs`, which runs in linear time, and then + shuffles the copies, which also runs in linear time, achieves the same + result as `choose_a` and both run in linear time. + + `choose_a` is still preferable since it has a coefficient of one, while this + version has a coefficient of two because it copies + shuffles. + """ + ys = xs[:] + random.shuffle(ys) + return ys[:m] + +# ROYGBIV +xs = [ + 'red', + 'orange', + 'yellow', + 'green', + 'blue', + 'indigo', + 'violet', +] +print(choose_b(3, xs)) |