summary refs log tree commit diff
path: root/third_party/bazel/rules_haskell/haskell/gen_ghc_bindist.py
blob: 9e52896eb913a1898b1b745b0fdd90617be15bba (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/usr/bin/env python

# This is a happy-path tool to download the bindist
# download paths and hashes, for maintainers.
# It uses the hashes provided by download.haskell.org.

from __future__ import print_function

import pprint
import sys
import urllib2

# All GHC versions we generate.
# `version` is the version number
# `distribution_version` is a corrected name
# (sometimes bindists have errors and are updated by new bindists)
# `ignore_prefixes` is the prefix of files to ignore
# `ignore_suffixes` is the suffix of files to ignore
VERSIONS = [
    { "version": "8.6.5" },
    { "version": "8.6.4" },
    { "version": "8.6.3" },
    { "version": "8.6.2" },
    { "version": "8.4.4" },
    { "version": "8.4.3" },
    { "version": "8.4.2" },
    { "version": "8.4.1" },
    { "version": "8.2.2" },
    { "version": "8.0.2",
      "ignore_suffixes": [".patch"] },
    { "version": "7.10.3",
      "distribution_version": "7.10.3b",
      "ignore_prefixes": ["ghc-7.10.3-", "ghc-7.10.3a-"],
      "ignore_suffixes": [".bz2", ".patch" ] }
]

# All architectures we generate.
# bazel: bazel name
# upstream: download.haskell.org name
ARCHES = [
    { "bazel": "linux_amd64",
      "upstream": "x86_64-deb8-linux", },
    { "bazel": "darwin_amd64",
      "upstream": "x86_64-apple-darwin" },
    { "bazel": "windows_amd64",
      "upstream": "x86_64-unknown-mingw32" },
]


# An url to a bindist tarball.
def link_for_tarball(arch, version):
    return "https://downloads.haskell.org/~ghc/{ver}/ghc-{ver}-{arch}.tar.xz".format(
        ver = version,
        arch = arch,
    )

# An url to a version's tarball hashsum file.
# The files contain the hashsums for all arches.
def link_for_sha256_file(version):
    return "https://downloads.haskell.org/~ghc/{ver}/SHA256SUMS".format(
        ver = version
    )

# Parses the tarball hashsum file for a distribution version.
def parse_sha256_file(content, version, url):
    res = {}
    errs = []
    for line in content:
        # f5763983a26dedd88b65a0b17267359a3981b83a642569b26334423f684f8b8c  ./ghc-8.4.3-i386-deb8-linux.tar.xz
        (hash, file_) = line.strip().split("  ./")
        prefix = "ghc-{ver}-".format(ver = version.get("distribution_version", version['version']))
        suffix = ".tar.xz"

        # filter ignored files
        if   any([file_.startswith(p) for p in version.get("ignore_prefixes", [])]) \
          or any([file_.endswith(s)   for s in version.get("ignore_suffixes", [])]):
            continue

        if file_.startswith(prefix) and file_.endswith(suffix):
            # i386-deb8-linux
            name = file_[len(prefix):-len(suffix)]
            res[name] = hash
        else:
            errs.append("Can't parse the sha256 field for {ver}: {entry}".format(
                ver = version['version'], entry = line.strip()))

    if errs:
        eprint("Errors parsing file at " + url + ". Either fix or ignore the lines (ignore_suffixes/ignore_prefixes).")
        for e in errs:
            eprint(e)
        exit(1)

    return res

# Print to stderr.
def eprint(mes):
    print(mes, file = sys.stderr)

# Main.
if __name__ == "__main__":

    # Fetch all hashsum files
    # grab : { version: { arch: sha256 } }
    grab = {}
    for ver in VERSIONS:
        eprint("fetching " + ver['version'])
        url = link_for_sha256_file(ver['version'])
        res = urllib2.urlopen(url)
        if res.getcode() != 200:
            eprint("download of {} failed with status {}".format(url, res.getcode()))
            sys.exit(1)
        else:
            grab[ver['version']] = parse_sha256_file(res, ver, url)

    # check whether any version is missing arches we need
    # errs : { version: set(missing_arches) }
    errs = {}
    for ver, hashes in grab.items():
      real_arches = frozenset(hashes.keys())
      needed_arches = frozenset([a['upstream'] for a in ARCHES])
      missing_arches = needed_arches.difference(real_arches)
      if missing_arches:
          errs[ver] = missing_arches
    if errs:
        for ver, missing in errs.items():
            eprint("version {ver} is missing hashes for required architectures {arches}".format(
                ver = ver,
                arches = missing))

    # fetch the arches we need and create the GHC_BINDISTS dict
    # ghc_bindists : { version: { bazel_arch: (tarball_url, sha256_hash) } }
    ghc_bindists = {}
    for ver, hashes in grab.items():
        # { bazel_arch: (tarball_url, sha256_hash) }
        arch_dists = {}
        for arch in ARCHES:
            hashes[arch['upstream']]
            arch_dists[arch['bazel']] = (
                link_for_tarball(arch['upstream'], ver),
                hashes[arch['upstream']]
            )
        ghc_bindists[ver] = arch_dists

    # Print to stdout. Be aware that you can't `> foo.bzl`,
    # because that truncates the source file which is needed
    # for bazel to run in the first place.
    print(""" \
# Generated with `bazel run @io_tweag_rules_haskell//haskell:gen-ghc-bindist | sponge haskell/private/ghc_bindist_generated.bzl`
# To add a version or architecture, edit the constants in haskell/gen_ghc_bindist.py
GHC_BINDIST = \\""")
    pprint.pprint(ghc_bindists)