about summary refs log tree commit diff
path: root/third_party/bazel/rules_haskell/haskell/private/actions/link.bzl
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/bazel/rules_haskell/haskell/private/actions/link.bzl')
-rw-r--r--third_party/bazel/rules_haskell/haskell/private/actions/link.bzl667
1 files changed, 667 insertions, 0 deletions
diff --git a/third_party/bazel/rules_haskell/haskell/private/actions/link.bzl b/third_party/bazel/rules_haskell/haskell/private/actions/link.bzl
new file mode 100644
index 0000000000..65cd2c6e43
--- /dev/null
+++ b/third_party/bazel/rules_haskell/haskell/private/actions/link.bzl
@@ -0,0 +1,667 @@
+"""Actions for linking object code produced by compilation"""
+
+load(":private/packages.bzl", "expose_packages", "pkg_info_to_compile_flags")
+load("@bazel_skylib//lib:paths.bzl", "paths")
+load(
+    ":private/path_utils.bzl",
+    "get_lib_name",
+    "is_shared_library",
+    "is_static_library",
+    "ln",
+)
+load(":private/pkg_id.bzl", "pkg_id")
+load(":private/set.bzl", "set")
+load(":private/list.bzl", "list")
+
+# tests in /tests/unit_tests/BUILD
+def parent_dir_path(path):
+    """Returns the path of the parent directory.
+    For a relative path with just a file, "." is returned.
+    The path is not normalized.
+
+    foo => .
+    foo/ => foo
+    foo/bar => foo
+    foo/bar/baz => foo/bar
+    foo/../bar => foo/..
+
+    Args:
+      a path string
+
+    Returns:
+      A path list of the form `["foo", "bar"]`
+    """
+    path_dir = paths.dirname(path)
+
+    # dirname returns "" if there is no parent directory
+    # In that case we return the identity path, which is ".".
+    if path_dir == "":
+        return ["."]
+    else:
+        return path_dir.split("/")
+
+def __check_dots(target, path):
+    # there’s still (non-leading) .. in split
+    if ".." in path:
+        fail("the short_path of target {} (which is {}) contains more dots than loading `../`. We can’t handle that.".format(
+            target,
+            target.short_path,
+        ))
+
+# skylark doesn’t allow nested defs, which is a mystery.
+def _get_target_parent_dir(target):
+    """get the parent dir and handle leading short_path dots,
+    which signify that the target is in an external repository.
+
+    Args:
+      target: a target, .short_path is used
+    Returns:
+      (is_external, parent_dir)
+      `is_external`: Bool whether the path points to an external repository
+      `parent_dir`: The parent directory, either up to the runfiles toplel,
+                    up to the external repository toplevel.
+                    Is `[]` if there is no parent dir.
+    """
+
+    parent_dir = parent_dir_path(target.short_path)
+
+    if parent_dir[0] == "..":
+        __check_dots(target, parent_dir[1:])
+        return (True, parent_dir[1:])
+    elif parent_dir[0] == ".":
+        return (False, [])
+    else:
+        __check_dots(target, parent_dir)
+        return (False, parent_dir)
+
+# tests in /tests/unit_tests/BUILD
+def create_rpath_entry(
+        binary,
+        dependency,
+        keep_filename,
+        prefix = ""):
+    """Return a (relative) path that points from `binary` to `dependecy`
+    while not leaving the current bazel runpath, taking into account weird
+    corner cases of `.short_path` concerning external repositories.
+    The resulting entry should be able to be inserted into rpath or similar.
+
+    Examples:
+
+      bin.short_path=foo/a.so and dep.short_path=bar/b.so
+        => create_rpath_entry(bin, dep, False) = ../bar
+           and
+           create_rpath_entry(bin, dep, True) = ../bar/b.so
+           and
+           create_rpath_entry(bin, dep, True, "$ORIGIN") = $ORIGIN/../bar/b.so
+
+    Args:
+      binary: target of current binary
+      dependency: target of dependency to relatively point to
+      keep_filename: whether to point to the filename or its parent dir
+      prefix: string path prefix to add before the relative path
+
+    Returns:
+      relative path string
+    """
+
+    (bin_is_external, bin_parent_dir) = _get_target_parent_dir(binary)
+    (dep_is_external, dep_parent_dir) = _get_target_parent_dir(dependency)
+
+    # backup through parent directories of the binary,
+    # to the runfiles directory
+    bin_backup = [".."] * len(bin_parent_dir)
+
+    # external repositories live in `target.runfiles/external`,
+    # while the internal repository lives in `target.runfiles`.
+    # The `.short_path`s of external repositories are strange,
+    # they start with `../`, but you cannot just append that in
+    # order to find the correct runpath. Instead you have to use
+    # the following logic to construct the correct runpaths:
+    if bin_is_external:
+        if dep_is_external:
+            # stay in `external`
+            path_segments = bin_backup
+        else:
+            # backup out of `external`
+            path_segments = [".."] + bin_backup
+    elif dep_is_external:
+        # go into `external`
+        path_segments = bin_backup + ["external"]
+    else:
+        # no special external traversal
+        path_segments = bin_backup
+
+    # then add the parent dir to our dependency
+    path_segments.extend(dep_parent_dir)
+
+    # optionally add the filename
+    if keep_filename:
+        path_segments.append(
+            paths.basename(dependency.short_path),
+        )
+
+    # normalize for good measure and create the final path
+    path = paths.normalize("/".join(path_segments))
+
+    # and add the prefix if applicable
+    if prefix == "":
+        return path
+    else:
+        return prefix + "/" + path
+
+def _merge_parameter_files(hs, file1, file2):
+    """Merge two GHC parameter files into one.
+
+    Args:
+      hs: Haskell context.
+      file1: The first parameter file.
+      file2: The second parameter file.
+
+    Returns:
+      File: A new parameter file containing the parameters of both input files.
+        The file name is based on the file names of the input files. The file
+        is located next to the first input file.
+    """
+    params_file = hs.actions.declare_file(
+        file1.basename + ".and." + file2.basename,
+        sibling = file1,
+    )
+    hs.actions.run_shell(
+        inputs = [file1, file2],
+        outputs = [params_file],
+        command = """
+            cat {file1} {file2} > {out}
+        """.format(
+            file1 = file1.path,
+            file2 = file2.path,
+            out = params_file.path,
+        ),
+    )
+    return params_file
+
+def _darwin_create_extra_linker_flags_file(hs, cc, objects_dir, executable, dynamic, solibs):
+    """Write additional linker flags required on MacOS to a parameter file.
+
+    Args:
+      hs: Haskell context.
+      cc: CcInteropInfo, information about C dependencies.
+      objects_dir: Directory storing object files.
+        Used to determine output file location.
+      executable: The executable being built.
+      dynamic: Bool: Whether to link dynamically or statically.
+      solibs: List of dynamic library dependencies.
+
+    Returns:
+      File: Parameter file with additional linker flags. To be passed to GHC.
+    """
+
+    # On Darwin GHC will pass the dead_strip_dylibs flag to the linker. This
+    # flag will remove any shared library loads from the binary's header that
+    # are not directly resolving undefined symbols in the binary. I.e. any
+    # indirect shared library dependencies will be removed. This conflicts with
+    # Bazel's builtin cc rules, which assume that the final binary will load
+    # all transitive shared library dependencies. In particlar shared libraries
+    # produced by Bazel's cc rules never load shared libraries themselves. This
+    # causes missing symbols at runtime on MacOS, see #170.
+    #
+    # The following work-around applies the `-u` flag to the linker for any
+    # symbol that is undefined in any transitive shared library dependency.
+    # This forces the linker to resolve these undefined symbols in all
+    # transitive shared library dependencies and keep the corresponding load
+    # commands in the binary's header.
+    #
+    # Unfortunately, this prohibits elimination of any truly redundant shared
+    # library dependencies. Furthermore, the transitive closure of shared
+    # library dependencies can be large, so this makes it more likely to exceed
+    # the MACH-O header size limit on MacOS.
+    #
+    # This is a horrendous hack, but it seems to be forced on us by how Bazel
+    # builds dynamic cc libraries.
+    suffix = ".dynamic.linker_flags" if dynamic else ".static.linker_flags"
+    linker_flags_file = hs.actions.declare_file(
+        executable.basename + suffix,
+        sibling = objects_dir,
+    )
+
+    hs.actions.run_shell(
+        inputs = solibs,
+        outputs = [linker_flags_file],
+        command = """
+        touch {out}
+        for lib in {solibs}; do
+            {nm} -u "$lib" | sed 's/^/-optl-Wl,-u,/' >> {out}
+        done
+        """.format(
+            nm = cc.tools.nm,
+            solibs = " ".join(["\"" + l.path + "\"" for l in solibs]),
+            out = linker_flags_file.path,
+        ),
+    )
+    return linker_flags_file
+
+def _create_objects_dir_manifest(hs, objects_dir, dynamic, with_profiling):
+    suffix = ".dynamic.manifest" if dynamic else ".static.manifest"
+    objects_dir_manifest = hs.actions.declare_file(
+        objects_dir.basename + suffix,
+        sibling = objects_dir,
+    )
+
+    if with_profiling:
+        ext = "p_o"
+    elif dynamic:
+        ext = "dyn_o"
+    else:
+        ext = "o"
+    hs.actions.run_shell(
+        inputs = [objects_dir],
+        outputs = [objects_dir_manifest],
+        command = """
+        find {dir} -name '*.{ext}' > {out}
+        """.format(
+            dir = objects_dir.path,
+            ext = ext,
+            out = objects_dir_manifest.path,
+        ),
+        use_default_shell_env = True,
+    )
+
+    return objects_dir_manifest
+
+def _link_dependencies(hs, dep_info, dynamic, binary, args):
+    """Configure linker flags and inputs.
+
+    Configure linker flags for C library dependencies and runtime dynamic
+    library dependencies. And collect the C libraries to pass as inputs to
+    the linking action.
+
+    Args:
+      hs: Haskell context.
+      dep_info: HaskellInfo provider.
+      dynamic: Bool: Whether to link dynamically, or statically.
+      binary: Final linked binary.
+      args: Arguments to the linking action.
+
+    Returns:
+      depset: C library dependencies to provide as input to the linking action.
+    """
+
+    # Pick linking context based on linking mode.
+    if dynamic:
+        link_ctx = dep_info.cc_dependencies.dynamic_linking
+        trans_link_ctx = dep_info.transitive_cc_dependencies.dynamic_linking
+    else:
+        link_ctx = dep_info.cc_dependencies.static_linking
+        trans_link_ctx = dep_info.transitive_cc_dependencies.static_linking
+
+    # Direct C library dependencies to link.
+    # I.e. not indirect through another Haskell dependency.
+    # Such indirect dependencies are linked by GHC based on the extra-libraries
+    # fields in the dependency's package configuration file.
+    libs_to_link = link_ctx.libraries_to_link.to_list()
+    _add_external_libraries(args, libs_to_link)
+
+    # Transitive library dependencies to have in scope for linking.
+    trans_libs_to_link = trans_link_ctx.libraries_to_link.to_list()
+
+    # Libraries to pass as inputs to linking action.
+    cc_link_libs = depset(transitive = [
+        depset(trans_libs_to_link),
+    ])
+
+    # Transitive dynamic library dependencies to have in RUNPATH.
+    cc_solibs = trans_link_ctx.dynamic_libraries_for_runtime.to_list()
+
+    # Collect Haskell dynamic library dependencies in common RUNPATH.
+    # This is to keep the number of RUNPATH entries low, for faster loading
+    # and to avoid exceeding the MACH-O header size limit on MacOS.
+    hs_solibs = []
+    if dynamic:
+        hs_solibs_prefix = "_hssolib_%s" % hs.name
+        for dep in set.to_list(dep_info.dynamic_libraries):
+            dep_link = hs.actions.declare_file(
+                paths.join(hs_solibs_prefix, dep.basename),
+                sibling = binary,
+            )
+            ln(hs, dep, dep_link)
+            hs_solibs.append(dep_link)
+
+    # Configure RUNPATH.
+    rpaths = _infer_rpaths(
+        hs.toolchain.is_darwin,
+        binary,
+        trans_link_ctx.dynamic_libraries_for_runtime.to_list() +
+        hs_solibs,
+    )
+    for rpath in set.to_list(rpaths):
+        args.add("-optl-Wl,-rpath," + rpath)
+
+    return (cc_link_libs, cc_solibs, hs_solibs)
+
+def link_binary(
+        hs,
+        cc,
+        dep_info,
+        extra_srcs,
+        compiler_flags,
+        objects_dir,
+        dynamic,
+        with_profiling,
+        version):
+    """Link Haskell binary from static object files.
+
+    Returns:
+      File: produced executable
+    """
+
+    exe_name = hs.name + (".exe" if hs.toolchain.is_windows else "")
+    executable = hs.actions.declare_file(exe_name)
+
+    args = hs.actions.args()
+    args.add_all(["-optl" + f for f in cc.linker_flags])
+    if with_profiling:
+        args.add("-prof")
+    args.add_all(hs.toolchain.compiler_flags)
+    args.add_all(compiler_flags)
+
+    # By default, GHC will produce mostly-static binaries, i.e. in which all
+    # Haskell code is statically linked and foreign libraries and system
+    # dependencies are dynamically linked. If linkstatic is false, i.e. the user
+    # has requested fully dynamic linking, we must therefore add flags to make
+    # sure that GHC dynamically links Haskell code too. The one exception to
+    # this is when we are compiling for profiling, which currently does not play
+    # nicely with dynamic linking.
+    if dynamic:
+        if with_profiling:
+            print("WARNING: dynamic linking and profiling don't mix. Omitting -dynamic.\nSee https://ghc.haskell.org/trac/ghc/ticket/15394")
+        else:
+            args.add_all(["-pie", "-dynamic"])
+
+    # When compiling with `-threaded`, GHC needs to link against
+    # the pthread library when linking against static archives (.a).
+    # We assume it’s not a problem to pass it for other cases,
+    # so we just default to passing it.
+    args.add("-optl-pthread")
+
+    args.add_all(["-o", executable.path])
+
+    # De-duplicate optl calls while preserving ordering: we want last
+    # invocation of an object to remain last. That is `-optl foo -optl
+    # bar -optl foo` becomes `-optl bar -optl foo`. Do this by counting
+    # number of occurrences. That way we only build dict and add to args
+    # directly rather than doing multiple reversals with temporary
+    # lists.
+
+    args.add_all(pkg_info_to_compile_flags(expose_packages(
+        dep_info,
+        lib_info = None,
+        use_direct = True,
+        use_my_pkg_id = None,
+        custom_package_databases = None,
+        version = version,
+    )))
+
+    (cc_link_libs, cc_solibs, hs_solibs) = _link_dependencies(
+        hs = hs,
+        dep_info = dep_info,
+        dynamic = dynamic,
+        binary = executable,
+        args = args,
+    )
+
+    # XXX: Suppress a warning that Clang prints due to GHC automatically passing
+    # "-pie" or "-no-pie" to the C compiler.
+    # This is linked to https://ghc.haskell.org/trac/ghc/ticket/15319
+    args.add_all([
+        "-optc-Wno-unused-command-line-argument",
+        "-optl-Wno-unused-command-line-argument",
+    ])
+
+    objects_dir_manifest = _create_objects_dir_manifest(
+        hs,
+        objects_dir,
+        dynamic = dynamic,
+        with_profiling = with_profiling,
+    )
+
+    extra_linker_flags_file = None
+    if hs.toolchain.is_darwin:
+        args.add("-optl-Wl,-headerpad_max_install_names")
+
+        # Nixpkgs commit 3513034208a introduces -liconv in NIX_LDFLAGS on
+        # Darwin. We don't currently handle NIX_LDFLAGS in any special
+        # way, so a hack is to simply do what NIX_LDFLAGS is telling us we
+        # should do always when using a toolchain from Nixpkgs.
+        # TODO remove this gross hack.
+        args.add("-liconv")
+
+        extra_linker_flags_file = _darwin_create_extra_linker_flags_file(
+            hs,
+            cc,
+            objects_dir,
+            executable,
+            dynamic,
+            cc_solibs,
+        )
+
+    if extra_linker_flags_file != None:
+        params_file = _merge_parameter_files(hs, objects_dir_manifest, extra_linker_flags_file)
+    else:
+        params_file = objects_dir_manifest
+
+    hs.toolchain.actions.run_ghc(
+        hs,
+        cc,
+        inputs = depset(transitive = [
+            depset(extra_srcs),
+            set.to_depset(dep_info.package_databases),
+            set.to_depset(dep_info.dynamic_libraries),
+            depset(dep_info.static_libraries),
+            depset(dep_info.static_libraries_prof),
+            depset([objects_dir]),
+            cc_link_libs,
+        ]),
+        outputs = [executable],
+        mnemonic = "HaskellLinkBinary",
+        arguments = args,
+        params_file = params_file,
+    )
+
+    return (executable, cc_solibs + hs_solibs)
+
+def _add_external_libraries(args, ext_libs):
+    """Add options to `args` that allow us to link to `ext_libs`.
+
+    Args:
+      args: Args object.
+      ext_libs: C library dependencies.
+    """
+
+    # Deduplicate the list of ext_libs based on their
+    # library name (file name stripped of lib prefix and endings).
+    # This keeps the command lines short, e.g. when a C library
+    # like `liblz4.so` appears in multiple dependencies.
+    # XXX: this is only done in here
+    # Shouldn’t the deduplication be applied to *all* external libraries?
+    deduped = list.dedup_on(get_lib_name, ext_libs)
+
+    for lib in deduped:
+        args.add_all([
+            "-L{0}".format(
+                paths.dirname(lib.path),
+            ),
+            "-l{0}".format(
+                # technically this is the second call to get_lib_name,
+                #  but the added clarity makes up for it.
+                get_lib_name(lib),
+            ),
+        ])
+
+def _infer_rpaths(is_darwin, target, solibs):
+    """Return set of RPATH values to be added to target so it can find all
+    solibs
+
+    The resulting paths look like:
+    $ORIGIN/../../path/to/solib/dir
+    This means: "go upwards to your runfiles directory, then descend into
+    the parent folder of the solib".
+
+    Args:
+      is_darwin: Whether we're compiling on and for Darwin.
+      target: File, executable or library we're linking.
+      solibs: A list of Files, shared objects that the target needs.
+
+    Returns:
+      Set of strings: rpaths to add to target.
+    """
+    r = set.empty()
+
+    if is_darwin:
+        prefix = "@loader_path"
+    else:
+        prefix = "$ORIGIN"
+
+    for solib in solibs:
+        rpath = create_rpath_entry(
+            binary = target,
+            dependency = solib,
+            keep_filename = False,
+            prefix = prefix,
+        )
+        set.mutable_insert(r, rpath)
+
+    return r
+
+def _so_extension(hs):
+    """Returns the extension for shared libraries.
+
+    Args:
+      hs: Haskell rule context.
+
+    Returns:
+      string of extension.
+    """
+    return "dylib" if hs.toolchain.is_darwin else "so"
+
+def link_library_static(hs, cc, dep_info, objects_dir, my_pkg_id, with_profiling):
+    """Link a static library for the package using given object files.
+
+    Returns:
+      File: Produced static library.
+    """
+    static_library = hs.actions.declare_file(
+        "lib{0}.a".format(pkg_id.library_name(hs, my_pkg_id, prof_suffix = with_profiling)),
+    )
+    objects_dir_manifest = _create_objects_dir_manifest(
+        hs,
+        objects_dir,
+        dynamic = False,
+        with_profiling = with_profiling,
+    )
+    args = hs.actions.args()
+    inputs = [objects_dir, objects_dir_manifest] + cc.files
+
+    if hs.toolchain.is_darwin:
+        # On Darwin, ar doesn't support params files.
+        args.add_all([
+            static_library,
+            objects_dir_manifest.path,
+        ])
+
+        # TODO Get ar location from the CC toolchain. This is
+        # complicated by the fact that the CC toolchain does not
+        # always use ar, and libtool has an entirely different CLI.
+        # See https://github.com/bazelbuild/bazel/issues/5127
+        hs.actions.run_shell(
+            inputs = inputs,
+            outputs = [static_library],
+            mnemonic = "HaskellLinkStaticLibrary",
+            command = "{ar} qc $1 $(< $2)".format(ar = cc.tools.ar),
+            arguments = [args],
+
+            # Use the default macosx toolchain
+            env = {"SDKROOT": "macosx"},
+        )
+    else:
+        args.add_all([
+            "qc",
+            static_library,
+            "@" + objects_dir_manifest.path,
+        ])
+        hs.actions.run(
+            inputs = inputs,
+            outputs = [static_library],
+            mnemonic = "HaskellLinkStaticLibrary",
+            executable = cc.tools.ar,
+            arguments = [args],
+        )
+
+    return static_library
+
+def link_library_dynamic(hs, cc, dep_info, extra_srcs, objects_dir, my_pkg_id):
+    """Link a dynamic library for the package using given object files.
+
+    Returns:
+      File: Produced dynamic library.
+    """
+
+    dynamic_library = hs.actions.declare_file(
+        "lib{0}-ghc{1}.{2}".format(
+            pkg_id.library_name(hs, my_pkg_id),
+            hs.toolchain.version,
+            _so_extension(hs),
+        ),
+    )
+
+    args = hs.actions.args()
+    args.add_all(["-optl" + f for f in cc.linker_flags])
+    args.add_all(["-shared", "-dynamic"])
+
+    # Work around macOS linker limits.  This fix has landed in GHC HEAD, but is
+    # not yet in a release; plus, we still want to support older versions of
+    # GHC.  For details, see: https://phabricator.haskell.org/D4714
+    if hs.toolchain.is_darwin:
+        args.add("-optl-Wl,-dead_strip_dylibs")
+
+    args.add_all(pkg_info_to_compile_flags(expose_packages(
+        dep_info,
+        lib_info = None,
+        use_direct = True,
+        use_my_pkg_id = None,
+        custom_package_databases = None,
+        version = my_pkg_id.version if my_pkg_id else None,
+    )))
+
+    (cc_link_libs, _cc_solibs, _hs_solibs) = _link_dependencies(
+        hs = hs,
+        dep_info = dep_info,
+        dynamic = True,
+        binary = dynamic_library,
+        args = args,
+    )
+
+    args.add_all(["-o", dynamic_library.path])
+
+    # Profiling not supported for dynamic libraries.
+    objects_dir_manifest = _create_objects_dir_manifest(
+        hs,
+        objects_dir,
+        dynamic = True,
+        with_profiling = False,
+    )
+
+    hs.toolchain.actions.run_ghc(
+        hs,
+        cc,
+        inputs = depset([objects_dir], transitive = [
+            depset(extra_srcs),
+            set.to_depset(dep_info.package_databases),
+            set.to_depset(dep_info.dynamic_libraries),
+            cc_link_libs,
+        ]),
+        outputs = [dynamic_library],
+        mnemonic = "HaskellLinkDynamicLibrary",
+        arguments = args,
+        params_file = objects_dir_manifest,
+    )
+
+    return dynamic_library