about summary refs log blame commit diff
path: root/scratch/facebook/hard/random-choice.py
blob: a5c6e4e6ee81b5cf6f1b747288c51c69c41bc653 (plain) (tree)
1
2
3
4
5
6
7
8
9
10







                                                                        

                            

















                                                                                









                                                                               










                      
                      
import random

# This class of problems is known as "resevoir sampling".
def choose_a(m, xs):
    """
    Randomly choose `m` elements from `xs`.
    This algorithm runs in linear time with respect to the size of `xs`.
    """
    result = [None] * m
    for i in range(len(xs)):
        j = random.randint(0, i)
        if j < m:
            result[j] = xs[i]
    return result

def choose_b(m, xs):
    """
    This algorithm, which copies `xs`, which runs in linear time, and then
    shuffles the copies, which also runs in linear time, achieves the same
    result as `choose_a` and both run in linear time.

    `choose_a` is still preferable since it has a coefficient of one, while this
    version has a coefficient of two because it copies + shuffles.
    """
    ys = xs[:]
    random.shuffle(ys)
    return ys[:m]

def choose_c(m, xs):
    """
    This is one, possibly inefficient, way to randomly sample `m` elements from
    `xs`.
    """
    choices = set()
    while len(choices) < m:
        choices.add(random.randint(0, len(xs) - 1))
    return [xs[i] for i in choices]

# ROYGBIV
xs = [
    'red',
    'orange',
    'yellow',
    'green',
    'blue',
    'indigo',
    'violet',
]
print(choose_b(3, xs))
print(choose_c(3, xs))