diff options
Diffstat (limited to 'third_party/bazel/rules_haskell/haskell/private/actions/link.bzl')
-rw-r--r-- | third_party/bazel/rules_haskell/haskell/private/actions/link.bzl | 667 |
1 files changed, 667 insertions, 0 deletions
diff --git a/third_party/bazel/rules_haskell/haskell/private/actions/link.bzl b/third_party/bazel/rules_haskell/haskell/private/actions/link.bzl new file mode 100644 index 000000000000..65cd2c6e4327 --- /dev/null +++ b/third_party/bazel/rules_haskell/haskell/private/actions/link.bzl @@ -0,0 +1,667 @@ +"""Actions for linking object code produced by compilation""" + +load(":private/packages.bzl", "expose_packages", "pkg_info_to_compile_flags") +load("@bazel_skylib//lib:paths.bzl", "paths") +load( + ":private/path_utils.bzl", + "get_lib_name", + "is_shared_library", + "is_static_library", + "ln", +) +load(":private/pkg_id.bzl", "pkg_id") +load(":private/set.bzl", "set") +load(":private/list.bzl", "list") + +# tests in /tests/unit_tests/BUILD +def parent_dir_path(path): + """Returns the path of the parent directory. + For a relative path with just a file, "." is returned. + The path is not normalized. + + foo => . + foo/ => foo + foo/bar => foo + foo/bar/baz => foo/bar + foo/../bar => foo/.. + + Args: + a path string + + Returns: + A path list of the form `["foo", "bar"]` + """ + path_dir = paths.dirname(path) + + # dirname returns "" if there is no parent directory + # In that case we return the identity path, which is ".". + if path_dir == "": + return ["."] + else: + return path_dir.split("/") + +def __check_dots(target, path): + # there’s still (non-leading) .. in split + if ".." in path: + fail("the short_path of target {} (which is {}) contains more dots than loading `../`. We can’t handle that.".format( + target, + target.short_path, + )) + +# skylark doesn’t allow nested defs, which is a mystery. +def _get_target_parent_dir(target): + """get the parent dir and handle leading short_path dots, + which signify that the target is in an external repository. + + Args: + target: a target, .short_path is used + Returns: + (is_external, parent_dir) + `is_external`: Bool whether the path points to an external repository + `parent_dir`: The parent directory, either up to the runfiles toplel, + up to the external repository toplevel. + Is `[]` if there is no parent dir. + """ + + parent_dir = parent_dir_path(target.short_path) + + if parent_dir[0] == "..": + __check_dots(target, parent_dir[1:]) + return (True, parent_dir[1:]) + elif parent_dir[0] == ".": + return (False, []) + else: + __check_dots(target, parent_dir) + return (False, parent_dir) + +# tests in /tests/unit_tests/BUILD +def create_rpath_entry( + binary, + dependency, + keep_filename, + prefix = ""): + """Return a (relative) path that points from `binary` to `dependecy` + while not leaving the current bazel runpath, taking into account weird + corner cases of `.short_path` concerning external repositories. + The resulting entry should be able to be inserted into rpath or similar. + + Examples: + + bin.short_path=foo/a.so and dep.short_path=bar/b.so + => create_rpath_entry(bin, dep, False) = ../bar + and + create_rpath_entry(bin, dep, True) = ../bar/b.so + and + create_rpath_entry(bin, dep, True, "$ORIGIN") = $ORIGIN/../bar/b.so + + Args: + binary: target of current binary + dependency: target of dependency to relatively point to + keep_filename: whether to point to the filename or its parent dir + prefix: string path prefix to add before the relative path + + Returns: + relative path string + """ + + (bin_is_external, bin_parent_dir) = _get_target_parent_dir(binary) + (dep_is_external, dep_parent_dir) = _get_target_parent_dir(dependency) + + # backup through parent directories of the binary, + # to the runfiles directory + bin_backup = [".."] * len(bin_parent_dir) + + # external repositories live in `target.runfiles/external`, + # while the internal repository lives in `target.runfiles`. + # The `.short_path`s of external repositories are strange, + # they start with `../`, but you cannot just append that in + # order to find the correct runpath. Instead you have to use + # the following logic to construct the correct runpaths: + if bin_is_external: + if dep_is_external: + # stay in `external` + path_segments = bin_backup + else: + # backup out of `external` + path_segments = [".."] + bin_backup + elif dep_is_external: + # go into `external` + path_segments = bin_backup + ["external"] + else: + # no special external traversal + path_segments = bin_backup + + # then add the parent dir to our dependency + path_segments.extend(dep_parent_dir) + + # optionally add the filename + if keep_filename: + path_segments.append( + paths.basename(dependency.short_path), + ) + + # normalize for good measure and create the final path + path = paths.normalize("/".join(path_segments)) + + # and add the prefix if applicable + if prefix == "": + return path + else: + return prefix + "/" + path + +def _merge_parameter_files(hs, file1, file2): + """Merge two GHC parameter files into one. + + Args: + hs: Haskell context. + file1: The first parameter file. + file2: The second parameter file. + + Returns: + File: A new parameter file containing the parameters of both input files. + The file name is based on the file names of the input files. The file + is located next to the first input file. + """ + params_file = hs.actions.declare_file( + file1.basename + ".and." + file2.basename, + sibling = file1, + ) + hs.actions.run_shell( + inputs = [file1, file2], + outputs = [params_file], + command = """ + cat {file1} {file2} > {out} + """.format( + file1 = file1.path, + file2 = file2.path, + out = params_file.path, + ), + ) + return params_file + +def _darwin_create_extra_linker_flags_file(hs, cc, objects_dir, executable, dynamic, solibs): + """Write additional linker flags required on MacOS to a parameter file. + + Args: + hs: Haskell context. + cc: CcInteropInfo, information about C dependencies. + objects_dir: Directory storing object files. + Used to determine output file location. + executable: The executable being built. + dynamic: Bool: Whether to link dynamically or statically. + solibs: List of dynamic library dependencies. + + Returns: + File: Parameter file with additional linker flags. To be passed to GHC. + """ + + # On Darwin GHC will pass the dead_strip_dylibs flag to the linker. This + # flag will remove any shared library loads from the binary's header that + # are not directly resolving undefined symbols in the binary. I.e. any + # indirect shared library dependencies will be removed. This conflicts with + # Bazel's builtin cc rules, which assume that the final binary will load + # all transitive shared library dependencies. In particlar shared libraries + # produced by Bazel's cc rules never load shared libraries themselves. This + # causes missing symbols at runtime on MacOS, see #170. + # + # The following work-around applies the `-u` flag to the linker for any + # symbol that is undefined in any transitive shared library dependency. + # This forces the linker to resolve these undefined symbols in all + # transitive shared library dependencies and keep the corresponding load + # commands in the binary's header. + # + # Unfortunately, this prohibits elimination of any truly redundant shared + # library dependencies. Furthermore, the transitive closure of shared + # library dependencies can be large, so this makes it more likely to exceed + # the MACH-O header size limit on MacOS. + # + # This is a horrendous hack, but it seems to be forced on us by how Bazel + # builds dynamic cc libraries. + suffix = ".dynamic.linker_flags" if dynamic else ".static.linker_flags" + linker_flags_file = hs.actions.declare_file( + executable.basename + suffix, + sibling = objects_dir, + ) + + hs.actions.run_shell( + inputs = solibs, + outputs = [linker_flags_file], + command = """ + touch {out} + for lib in {solibs}; do + {nm} -u "$lib" | sed 's/^/-optl-Wl,-u,/' >> {out} + done + """.format( + nm = cc.tools.nm, + solibs = " ".join(["\"" + l.path + "\"" for l in solibs]), + out = linker_flags_file.path, + ), + ) + return linker_flags_file + +def _create_objects_dir_manifest(hs, objects_dir, dynamic, with_profiling): + suffix = ".dynamic.manifest" if dynamic else ".static.manifest" + objects_dir_manifest = hs.actions.declare_file( + objects_dir.basename + suffix, + sibling = objects_dir, + ) + + if with_profiling: + ext = "p_o" + elif dynamic: + ext = "dyn_o" + else: + ext = "o" + hs.actions.run_shell( + inputs = [objects_dir], + outputs = [objects_dir_manifest], + command = """ + find {dir} -name '*.{ext}' > {out} + """.format( + dir = objects_dir.path, + ext = ext, + out = objects_dir_manifest.path, + ), + use_default_shell_env = True, + ) + + return objects_dir_manifest + +def _link_dependencies(hs, dep_info, dynamic, binary, args): + """Configure linker flags and inputs. + + Configure linker flags for C library dependencies and runtime dynamic + library dependencies. And collect the C libraries to pass as inputs to + the linking action. + + Args: + hs: Haskell context. + dep_info: HaskellInfo provider. + dynamic: Bool: Whether to link dynamically, or statically. + binary: Final linked binary. + args: Arguments to the linking action. + + Returns: + depset: C library dependencies to provide as input to the linking action. + """ + + # Pick linking context based on linking mode. + if dynamic: + link_ctx = dep_info.cc_dependencies.dynamic_linking + trans_link_ctx = dep_info.transitive_cc_dependencies.dynamic_linking + else: + link_ctx = dep_info.cc_dependencies.static_linking + trans_link_ctx = dep_info.transitive_cc_dependencies.static_linking + + # Direct C library dependencies to link. + # I.e. not indirect through another Haskell dependency. + # Such indirect dependencies are linked by GHC based on the extra-libraries + # fields in the dependency's package configuration file. + libs_to_link = link_ctx.libraries_to_link.to_list() + _add_external_libraries(args, libs_to_link) + + # Transitive library dependencies to have in scope for linking. + trans_libs_to_link = trans_link_ctx.libraries_to_link.to_list() + + # Libraries to pass as inputs to linking action. + cc_link_libs = depset(transitive = [ + depset(trans_libs_to_link), + ]) + + # Transitive dynamic library dependencies to have in RUNPATH. + cc_solibs = trans_link_ctx.dynamic_libraries_for_runtime.to_list() + + # Collect Haskell dynamic library dependencies in common RUNPATH. + # This is to keep the number of RUNPATH entries low, for faster loading + # and to avoid exceeding the MACH-O header size limit on MacOS. + hs_solibs = [] + if dynamic: + hs_solibs_prefix = "_hssolib_%s" % hs.name + for dep in set.to_list(dep_info.dynamic_libraries): + dep_link = hs.actions.declare_file( + paths.join(hs_solibs_prefix, dep.basename), + sibling = binary, + ) + ln(hs, dep, dep_link) + hs_solibs.append(dep_link) + + # Configure RUNPATH. + rpaths = _infer_rpaths( + hs.toolchain.is_darwin, + binary, + trans_link_ctx.dynamic_libraries_for_runtime.to_list() + + hs_solibs, + ) + for rpath in set.to_list(rpaths): + args.add("-optl-Wl,-rpath," + rpath) + + return (cc_link_libs, cc_solibs, hs_solibs) + +def link_binary( + hs, + cc, + dep_info, + extra_srcs, + compiler_flags, + objects_dir, + dynamic, + with_profiling, + version): + """Link Haskell binary from static object files. + + Returns: + File: produced executable + """ + + exe_name = hs.name + (".exe" if hs.toolchain.is_windows else "") + executable = hs.actions.declare_file(exe_name) + + args = hs.actions.args() + args.add_all(["-optl" + f for f in cc.linker_flags]) + if with_profiling: + args.add("-prof") + args.add_all(hs.toolchain.compiler_flags) + args.add_all(compiler_flags) + + # By default, GHC will produce mostly-static binaries, i.e. in which all + # Haskell code is statically linked and foreign libraries and system + # dependencies are dynamically linked. If linkstatic is false, i.e. the user + # has requested fully dynamic linking, we must therefore add flags to make + # sure that GHC dynamically links Haskell code too. The one exception to + # this is when we are compiling for profiling, which currently does not play + # nicely with dynamic linking. + if dynamic: + if with_profiling: + print("WARNING: dynamic linking and profiling don't mix. Omitting -dynamic.\nSee https://ghc.haskell.org/trac/ghc/ticket/15394") + else: + args.add_all(["-pie", "-dynamic"]) + + # When compiling with `-threaded`, GHC needs to link against + # the pthread library when linking against static archives (.a). + # We assume it’s not a problem to pass it for other cases, + # so we just default to passing it. + args.add("-optl-pthread") + + args.add_all(["-o", executable.path]) + + # De-duplicate optl calls while preserving ordering: we want last + # invocation of an object to remain last. That is `-optl foo -optl + # bar -optl foo` becomes `-optl bar -optl foo`. Do this by counting + # number of occurrences. That way we only build dict and add to args + # directly rather than doing multiple reversals with temporary + # lists. + + args.add_all(pkg_info_to_compile_flags(expose_packages( + dep_info, + lib_info = None, + use_direct = True, + use_my_pkg_id = None, + custom_package_databases = None, + version = version, + ))) + + (cc_link_libs, cc_solibs, hs_solibs) = _link_dependencies( + hs = hs, + dep_info = dep_info, + dynamic = dynamic, + binary = executable, + args = args, + ) + + # XXX: Suppress a warning that Clang prints due to GHC automatically passing + # "-pie" or "-no-pie" to the C compiler. + # This is linked to https://ghc.haskell.org/trac/ghc/ticket/15319 + args.add_all([ + "-optc-Wno-unused-command-line-argument", + "-optl-Wno-unused-command-line-argument", + ]) + + objects_dir_manifest = _create_objects_dir_manifest( + hs, + objects_dir, + dynamic = dynamic, + with_profiling = with_profiling, + ) + + extra_linker_flags_file = None + if hs.toolchain.is_darwin: + args.add("-optl-Wl,-headerpad_max_install_names") + + # Nixpkgs commit 3513034208a introduces -liconv in NIX_LDFLAGS on + # Darwin. We don't currently handle NIX_LDFLAGS in any special + # way, so a hack is to simply do what NIX_LDFLAGS is telling us we + # should do always when using a toolchain from Nixpkgs. + # TODO remove this gross hack. + args.add("-liconv") + + extra_linker_flags_file = _darwin_create_extra_linker_flags_file( + hs, + cc, + objects_dir, + executable, + dynamic, + cc_solibs, + ) + + if extra_linker_flags_file != None: + params_file = _merge_parameter_files(hs, objects_dir_manifest, extra_linker_flags_file) + else: + params_file = objects_dir_manifest + + hs.toolchain.actions.run_ghc( + hs, + cc, + inputs = depset(transitive = [ + depset(extra_srcs), + set.to_depset(dep_info.package_databases), + set.to_depset(dep_info.dynamic_libraries), + depset(dep_info.static_libraries), + depset(dep_info.static_libraries_prof), + depset([objects_dir]), + cc_link_libs, + ]), + outputs = [executable], + mnemonic = "HaskellLinkBinary", + arguments = args, + params_file = params_file, + ) + + return (executable, cc_solibs + hs_solibs) + +def _add_external_libraries(args, ext_libs): + """Add options to `args` that allow us to link to `ext_libs`. + + Args: + args: Args object. + ext_libs: C library dependencies. + """ + + # Deduplicate the list of ext_libs based on their + # library name (file name stripped of lib prefix and endings). + # This keeps the command lines short, e.g. when a C library + # like `liblz4.so` appears in multiple dependencies. + # XXX: this is only done in here + # Shouldn’t the deduplication be applied to *all* external libraries? + deduped = list.dedup_on(get_lib_name, ext_libs) + + for lib in deduped: + args.add_all([ + "-L{0}".format( + paths.dirname(lib.path), + ), + "-l{0}".format( + # technically this is the second call to get_lib_name, + # but the added clarity makes up for it. + get_lib_name(lib), + ), + ]) + +def _infer_rpaths(is_darwin, target, solibs): + """Return set of RPATH values to be added to target so it can find all + solibs + + The resulting paths look like: + $ORIGIN/../../path/to/solib/dir + This means: "go upwards to your runfiles directory, then descend into + the parent folder of the solib". + + Args: + is_darwin: Whether we're compiling on and for Darwin. + target: File, executable or library we're linking. + solibs: A list of Files, shared objects that the target needs. + + Returns: + Set of strings: rpaths to add to target. + """ + r = set.empty() + + if is_darwin: + prefix = "@loader_path" + else: + prefix = "$ORIGIN" + + for solib in solibs: + rpath = create_rpath_entry( + binary = target, + dependency = solib, + keep_filename = False, + prefix = prefix, + ) + set.mutable_insert(r, rpath) + + return r + +def _so_extension(hs): + """Returns the extension for shared libraries. + + Args: + hs: Haskell rule context. + + Returns: + string of extension. + """ + return "dylib" if hs.toolchain.is_darwin else "so" + +def link_library_static(hs, cc, dep_info, objects_dir, my_pkg_id, with_profiling): + """Link a static library for the package using given object files. + + Returns: + File: Produced static library. + """ + static_library = hs.actions.declare_file( + "lib{0}.a".format(pkg_id.library_name(hs, my_pkg_id, prof_suffix = with_profiling)), + ) + objects_dir_manifest = _create_objects_dir_manifest( + hs, + objects_dir, + dynamic = False, + with_profiling = with_profiling, + ) + args = hs.actions.args() + inputs = [objects_dir, objects_dir_manifest] + cc.files + + if hs.toolchain.is_darwin: + # On Darwin, ar doesn't support params files. + args.add_all([ + static_library, + objects_dir_manifest.path, + ]) + + # TODO Get ar location from the CC toolchain. This is + # complicated by the fact that the CC toolchain does not + # always use ar, and libtool has an entirely different CLI. + # See https://github.com/bazelbuild/bazel/issues/5127 + hs.actions.run_shell( + inputs = inputs, + outputs = [static_library], + mnemonic = "HaskellLinkStaticLibrary", + command = "{ar} qc $1 $(< $2)".format(ar = cc.tools.ar), + arguments = [args], + + # Use the default macosx toolchain + env = {"SDKROOT": "macosx"}, + ) + else: + args.add_all([ + "qc", + static_library, + "@" + objects_dir_manifest.path, + ]) + hs.actions.run( + inputs = inputs, + outputs = [static_library], + mnemonic = "HaskellLinkStaticLibrary", + executable = cc.tools.ar, + arguments = [args], + ) + + return static_library + +def link_library_dynamic(hs, cc, dep_info, extra_srcs, objects_dir, my_pkg_id): + """Link a dynamic library for the package using given object files. + + Returns: + File: Produced dynamic library. + """ + + dynamic_library = hs.actions.declare_file( + "lib{0}-ghc{1}.{2}".format( + pkg_id.library_name(hs, my_pkg_id), + hs.toolchain.version, + _so_extension(hs), + ), + ) + + args = hs.actions.args() + args.add_all(["-optl" + f for f in cc.linker_flags]) + args.add_all(["-shared", "-dynamic"]) + + # Work around macOS linker limits. This fix has landed in GHC HEAD, but is + # not yet in a release; plus, we still want to support older versions of + # GHC. For details, see: https://phabricator.haskell.org/D4714 + if hs.toolchain.is_darwin: + args.add("-optl-Wl,-dead_strip_dylibs") + + args.add_all(pkg_info_to_compile_flags(expose_packages( + dep_info, + lib_info = None, + use_direct = True, + use_my_pkg_id = None, + custom_package_databases = None, + version = my_pkg_id.version if my_pkg_id else None, + ))) + + (cc_link_libs, _cc_solibs, _hs_solibs) = _link_dependencies( + hs = hs, + dep_info = dep_info, + dynamic = True, + binary = dynamic_library, + args = args, + ) + + args.add_all(["-o", dynamic_library.path]) + + # Profiling not supported for dynamic libraries. + objects_dir_manifest = _create_objects_dir_manifest( + hs, + objects_dir, + dynamic = True, + with_profiling = False, + ) + + hs.toolchain.actions.run_ghc( + hs, + cc, + inputs = depset([objects_dir], transitive = [ + depset(extra_srcs), + set.to_depset(dep_info.package_databases), + set.to_depset(dep_info.dynamic_libraries), + cc_link_libs, + ]), + outputs = [dynamic_library], + mnemonic = "HaskellLinkDynamicLibrary", + arguments = args, + params_file = objects_dir_manifest, + ) + + return dynamic_library |