about summary refs log tree commit diff
path: root/scratch/facebook/hard/random-choice.py
blob: 95029ceb80a465d479c1049a882abb090312c976 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import random

# This class of problems is known as "resevoir sampling".
def choose_a(m, xs):
    """
    Randomly choose `m` elements from `xs`.
    This algorithm runs in linear time with respect to the size of `xs`.
    """
    result = xs[:m]
    for i in range(m, len(xs)):
        j = random.randint(0, i)
        if j < m:
            result[j] = xs[i]
    return result

def choose_b(m, xs):
    """
    This algorithm, which copies `xs`, which runs in linear time, and then
    shuffles the copies, which also runs in linear time, achieves the same
    result as `choose_a` and both run in linear time.

    `choose_a` is still preferable since it has a coefficient of one, while this
    version has a coefficient of two because it copies + shuffles.
    """
    ys = xs[:]
    random.shuffle(ys)
    return ys[:m]

# ROYGBIV
xs = [
    'red',
    'orange',
    'yellow',
    'green',
    'blue',
    'indigo',
    'violet',
]
print(choose_b(3, xs))