about summary refs log tree commit diff
path: root/scratch/facebook/hard/random-choice.py
blob: a5c6e4e6ee81b5cf6f1b747288c51c69c41bc653 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import random

# This class of problems is known as "resevoir sampling".
def choose_a(m, xs):
    """
    Randomly choose `m` elements from `xs`.
    This algorithm runs in linear time with respect to the size of `xs`.
    """
    result = [None] * m
    for i in range(len(xs)):
        j = random.randint(0, i)
        if j < m:
            result[j] = xs[i]
    return result

def choose_b(m, xs):
    """
    This algorithm, which copies `xs`, which runs in linear time, and then
    shuffles the copies, which also runs in linear time, achieves the same
    result as `choose_a` and both run in linear time.

    `choose_a` is still preferable since it has a coefficient of one, while this
    version has a coefficient of two because it copies + shuffles.
    """
    ys = xs[:]
    random.shuffle(ys)
    return ys[:m]

def choose_c(m, xs):
    """
    This is one, possibly inefficient, way to randomly sample `m` elements from
    `xs`.
    """
    choices = set()
    while len(choices) < m:
        choices.add(random.randint(0, len(xs) - 1))
    return [xs[i] for i in choices]

# ROYGBIV
xs = [
    'red',
    'orange',
    'yellow',
    'green',
    'blue',
    'indigo',
    'violet',
]
print(choose_b(3, xs))
print(choose_c(3, xs))