about summary refs log tree commit diff
path: root/scratch
diff options
context:
space:
mode:
authorWilliam Carroll <wpcarro@gmail.com>2020-11-17T22·28+0000
committerWilliam Carroll <wpcarro@gmail.com>2020-11-17T22·28+0000
commit751b5327a92f4d53db6253e59c475e4e96cabcb6 (patch)
tree523d6efc2f8f7e52b665738224129bc76be8dd13 /scratch
parent572fb0fe5f8201376740c84a316407b75e96b1c9 (diff)
Solve algorithms dealing with randomness
Tonight I learned that random sample where each element in the sampling corpus
has an equal likelihood of being chosen is a brand of algorithms known as
"reservoir sampling".

- Implement random.shuffle(..)
- Implement random.choice(..)

Surprisingly, candidates are expected to encounter problems like this during
interviews.
Diffstat (limited to 'scratch')
-rw-r--r--scratch/facebook/hard/fisher-yates.py7
-rw-r--r--scratch/facebook/hard/random-choice.py39
2 files changed, 46 insertions, 0 deletions
diff --git a/scratch/facebook/hard/fisher-yates.py b/scratch/facebook/hard/fisher-yates.py
new file mode 100644
index 000000000000..200d1613ddcb
--- /dev/null
+++ b/scratch/facebook/hard/fisher-yates.py
@@ -0,0 +1,7 @@
+import random
+
+def shuffle(xs):
+    n = len(xs)
+    for i in range(n):
+        j = random.randint(i, n - 1)
+        xs[i], xs[j] = xs[j], xs[i]
diff --git a/scratch/facebook/hard/random-choice.py b/scratch/facebook/hard/random-choice.py
new file mode 100644
index 000000000000..95029ceb80a4
--- /dev/null
+++ b/scratch/facebook/hard/random-choice.py
@@ -0,0 +1,39 @@
+import random
+
+# This class of problems is known as "resevoir sampling".
+def choose_a(m, xs):
+    """
+    Randomly choose `m` elements from `xs`.
+    This algorithm runs in linear time with respect to the size of `xs`.
+    """
+    result = xs[:m]
+    for i in range(m, len(xs)):
+        j = random.randint(0, i)
+        if j < m:
+            result[j] = xs[i]
+    return result
+
+def choose_b(m, xs):
+    """
+    This algorithm, which copies `xs`, which runs in linear time, and then
+    shuffles the copies, which also runs in linear time, achieves the same
+    result as `choose_a` and both run in linear time.
+
+    `choose_a` is still preferable since it has a coefficient of one, while this
+    version has a coefficient of two because it copies + shuffles.
+    """
+    ys = xs[:]
+    random.shuffle(ys)
+    return ys[:m]
+
+# ROYGBIV
+xs = [
+    'red',
+    'orange',
+    'yellow',
+    'green',
+    'blue',
+    'indigo',
+    'violet',
+]
+print(choose_b(3, xs))