about summary refs log tree commit diff
path: root/presentations/bootstrapping-2018
diff options
context:
space:
mode:
authorVincent Ambo <mail@tazj.in>2020-06-26T19·38+0100
committertazjin <mail@tazj.in>2020-06-26T19·51+0000
commit2e3b03b5ae04cc9d4da0001aff07962bf4107d42 (patch)
tree75d929acb15720bc8eb1182d105e2ecaa2626ba0 /presentations/bootstrapping-2018
parent1d0e421cb86861c64b58d5aa66dce295ffe28af5 (diff)
chore(tazjin): Move my presentations to my user directory r/1090
Change-Id: I72b25680e7167c3a55477111c28b1d4936c60e2c
Reviewed-on: https://cl.tvl.fyi/c/depot/+/606
Reviewed-by: tazjin <mail@tazj.in>
Diffstat (limited to 'presentations/bootstrapping-2018')
-rw-r--r--presentations/bootstrapping-2018/README.md5
-rw-r--r--presentations/bootstrapping-2018/default.nix50
-rw-r--r--presentations/bootstrapping-2018/drake-meme.pngbin246872 -> 0 bytes
-rw-r--r--presentations/bootstrapping-2018/nixos-logo.pngbin90542 -> 0 bytes
-rw-r--r--presentations/bootstrapping-2018/notes.org89
-rw-r--r--presentations/bootstrapping-2018/presentation.pdfbin527371 -> 0 bytes
-rw-r--r--presentations/bootstrapping-2018/presentation.tex251
-rw-r--r--presentations/bootstrapping-2018/quine-relay.pngbin52350 -> 0 bytes
-rw-r--r--presentations/bootstrapping-2018/result.pdfpc142
9 files changed, 0 insertions, 537 deletions
diff --git a/presentations/bootstrapping-2018/README.md b/presentations/bootstrapping-2018/README.md
deleted file mode 100644
index e9573ae3f2..0000000000
--- a/presentations/bootstrapping-2018/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-These are the slides for a talk I gave at the Norwegian Unix User Group on
-2018-03-13.
-
-There is more information and a recording on the [event
-page](https://www.nuug.no/aktiviteter/20180313-reproduible-compiler/).
diff --git a/presentations/bootstrapping-2018/default.nix b/presentations/bootstrapping-2018/default.nix
deleted file mode 100644
index 0dff14b2a1..0000000000
--- a/presentations/bootstrapping-2018/default.nix
+++ /dev/null
@@ -1,50 +0,0 @@
-# This derivation builds the LaTeX presentation.
-
-{ pkgs, ... }:
-
-with pkgs;
-
-let tex = texlive.combine {
-  inherit (texlive)
-    beamer
-    beamertheme-metropolis
-    etoolbox
-    euenc
-    extsizes
-    fontspec
-    lualibs
-    luaotfload
-    luatex
-    minted
-    ms
-    pgfopts
-    scheme-basic
-    translator;
-};
-in stdenv.mkDerivation {
-  name = "nuug-bootstrapping-slides";
-  src = ./.;
-
-  FONTCONFIG_FILE = makeFontsConf {
-    fontDirectories = [ fira fira-code fira-mono ];
-  };
-
-  buildInputs = [ tex fira fira-code fira-mono ];
-  buildPhase = ''
-    # LaTeX needs a cache folder in /home/ ...
-    mkdir home
-    export HOME=$PWD/home
-    # ${tex}/bin/luaotfload-tool -ufv
-
-    # As usual, TeX needs to be run twice ...
-    function run() {
-      ${tex}/bin/lualatex presentation.tex
-    }
-    run && run
-  '';
-
-  installPhase = ''
-    mkdir -p $out
-    cp presentation.pdf $out/
-  '';
-}
diff --git a/presentations/bootstrapping-2018/drake-meme.png b/presentations/bootstrapping-2018/drake-meme.png
deleted file mode 100644
index 4b03675438..0000000000
--- a/presentations/bootstrapping-2018/drake-meme.png
+++ /dev/null
Binary files differdiff --git a/presentations/bootstrapping-2018/nixos-logo.png b/presentations/bootstrapping-2018/nixos-logo.png
deleted file mode 100644
index ce0c98c2ca..0000000000
--- a/presentations/bootstrapping-2018/nixos-logo.png
+++ /dev/null
Binary files differdiff --git a/presentations/bootstrapping-2018/notes.org b/presentations/bootstrapping-2018/notes.org
deleted file mode 100644
index 363d75352e..0000000000
--- a/presentations/bootstrapping-2018/notes.org
+++ /dev/null
@@ -1,89 +0,0 @@
-#+TITLE: Bootstrapping, reproducibility, etc.
-#+AUTHOR: Vincent Ambo
-#+DATE: <2018-03-10 Sat>
-
-* Compiler bootstrapping
-  This section contains notes about compiler bootstrapping, the
-  history thereof, which compilers need it - and so on:
-
-** C
-
-** Haskell
-   - self-hosted compiler (GHC)
-
-** Common Lisp
-   CL is fairly interesting in this space because it is a language
-   that is defined via an ANSI standard that compiler implementations
-   normally actually follow!
-
-   CL has several ecosystem components that focus on making
-   abstracting away implementation-specific calls and if a self-hosted
-   compiler is written in CL using those components it can be
-   cross-bootstrapped.
-
-** Python
-
-* A note on runtimes
-  Sometimes the compiler just isn't enough ...
-
-** LLVM
-** JVM
-
-* References
-  https://github.com/mame/quine-relay
-  https://manishearth.github.io/blog/2016/12/02/reflections-on-rusting-trust/
-  https://tests.reproducible-builds.org/debian/reproducible.html
-
-* Slide thoughts:
-  1. Hardware trust has been discussed here a bunch, most recently
-     during the puri.sm talk. Hardware trust is important, as we see
-     with IME, but it's striking that people often take a leap to "I'm
-     now on my trusted Debian with free software".
-
-     Unless you built it yourself from scratch (Spoiler: you haven't)
-     you're placing trust in what is basically foreign binary blobs.
-
-     Agenda: Implications/attack vectors of this, state of the chicken
-     & egg, the topic of reproducibility, what can you do? (Nix!)
-
-  2. Chicken-and-egg issue
-
-     It's an important milestone for a language to become self-hosted:
-     You begin doing a kind of dogfeeding, you begin to enforce
-     reliability & consistency guarantees to avoid having to redo your
-     own codebase constantly and so on.
-
-     However, the implication is now that you need your own compiler
-     to compile itself.
-
-     Common examples:
-     - C/C++ compilers needed to build C/C++ compilers:
-
-       GCC 4.7 was the last version of GCC that could be built with a
-       standard C-compiler, nowadays it is mostly written in C++.
-
-       Certain versions of GCC can be built with LLVM/Clang.
-
-       Clang/LLVM can be compiled by itself and also GCC.
-
-     - Rust was originally written in OCAML but moved to being
-       self-hosted in 2011. Currently rustc-releases are always built
-       with a copy of the previous release.
-
-       It's relatively new so we can build the chain all the way.
-
-     Notable exceptions: Some popular languages are not self-hosted,
-     for example Clojure. Languages also have runtimes, which may be
-     written in something else (e.g. Haskell -> C runtime)
-* How to help:
-  Most of this advice is about reproducible builds, not bootstrapping,
-  as that is a much harder project.
-
-  - fix reproducibility issues listed in Debian's issue tracker (focus
-    on non-Debian specific ones though)
-  - experiment with NixOS / GuixSD to get a better grasp on the
-    problem space of reproducibility
-
-  If you want to contribute to bootstrapping, look at
-  bootstrappable.org and their wiki. Several initiatives such as MES
-  could need help!
diff --git a/presentations/bootstrapping-2018/presentation.pdf b/presentations/bootstrapping-2018/presentation.pdf
deleted file mode 100644
index 7f435fe5b5..0000000000
--- a/presentations/bootstrapping-2018/presentation.pdf
+++ /dev/null
Binary files differdiff --git a/presentations/bootstrapping-2018/presentation.tex b/presentations/bootstrapping-2018/presentation.tex
deleted file mode 100644
index d3aa613375..0000000000
--- a/presentations/bootstrapping-2018/presentation.tex
+++ /dev/null
@@ -1,251 +0,0 @@
-\documentclass[12pt]{beamer}
-\usetheme{metropolis}
-\newenvironment{code}{\ttfamily}{\par}
-\title{Where does \textit{your} compiler come from?}
-\date{2018-03-13}
-\author{Vincent Ambo}
-\institute{Norwegian Unix User Group}
-\begin{document}
-  \maketitle
-
-  %% Slide 1:
-  \section{Introduction}
-
-  %% Slide 2:
-  \begin{frame}{Chicken and egg}
-    Self-hosted compilers are often built using themselves, for example:
-
-    \begin{itemize}
-    \item C-family compilers bootstrap themselves \& each other
-    \item (Some!) Common Lisp compilers can bootstrap each other
-    \item \texttt{rustc} bootstraps itself with a previous version
-    \item ... same for many other languages!
-    \end{itemize}
-  \end{frame}
-
-  \begin{frame}{Chicken, egg and ... lizard?}
-    It's not just compilers: Languages have runtimes, too.
-
-    \begin{itemize}
-    \item JVM is implemented in C++
-    \item Erlang-VM is C
-    \item Haskell runtime is C
-    \end{itemize}
-
-    ... we can't ever get away from C, can we?
-  \end{frame}
-
-  %% Slide 3:
-  \begin{frame}{Trusting Trust}
-    \begin{center}
-      \huge{Could this be exploited?}
-    \end{center}
-  \end{frame}
-
-  %% Slide 4:
-  \begin{frame}{Short interlude: A quine}
-    \begin{center}
-      \begin{code}
-        ((lambda (x) (list x (list 'quote x)))
-        \newline\vspace*{6mm} '(lambda (x) (list x (list 'quote x))))
-      \end{code}
-    \end{center}
-  \end{frame}
-
-  %% Slide 5:
-  \begin{frame}{Short interlude: Quine Relay}
-    \begin{center}
-      \includegraphics[
-        keepaspectratio=true,
-        height=\textheight
-      ]{quine-relay.png}
-    \end{center}
-  \end{frame}
-
-  %% Slide 6:
-  \begin{frame}{Trusting Trust}
-    An attack described by Ken Thompson in 1983:
-
-    \begin{enumerate}
-    \item Modify a compiler to detect when it's compiling itself.
-    \item Let the modification insert \textit{itself} into the new compiler.
-    \item Add arbitrary attack code to the modification.
-    \item \textit{Optional!} Remove the attack from the source after compilation.
-    \end{enumerate}
-  \end{frame}
-
-  %% Slide 7:
-  \begin{frame}{Damage potential?}
-    \begin{center}
-      \large{Let your imagination run wild!}
-    \end{center}
-  \end{frame}
-
-  %% Slide 8:
-  \section{Countermeasures}
-
-  %% Slide 9:
-  \begin{frame}{Diverse Double-Compiling}
-    Assume we have:
-
-    \begin{itemize}
-    \item Target language compilers $A$ and $T$
-    \item The source code of $A$: $ S_{A} $
-    \end{itemize}
-  \end{frame}
-
-  %% Slide 10:
-  \begin{frame}{Diverse Double-Compiling}
-    Apply the first stage (functional equivalence):
-
-    \begin{itemize}
-    \item $ X = A(S_{A})$
-    \item $ Y = T(S_{A})$
-    \end{itemize}
-
-    Apply the second stage (bit-for-bit equivalence):
-
-    \begin{itemize}
-    \item $ V = X(S_{A})$
-    \item $ W = Y(S_{A})$
-    \end{itemize}
-
-    Now we have a new problem: Reproducibility!
-  \end{frame}
-
-  %% Slide 11:
-  \begin{frame}{Reproducibility}
-    Bit-for-bit equivalent output is hard, for example:
-
-    \begin{itemize}
-    \item Timestamps in output artifacts
-    \item Non-deterministic linking order in concurrent builds
-    \item Non-deterministic VM \& memory states in outputs
-    \item Randomness in builds (sic!)
-    \end{itemize}
-  \end{frame}
-
-  \begin{frame}{Reproducibility}
-    \begin{center}
-      Without reproducibility, we can never trust that any shipped
-      binary matches the source code!
-    \end{center}
-  \end{frame}
-
-  %% Slide 12:
-  \section{(Partial) State of the Union}
-
-  \begin{frame}{The Desired State}
-    \begin{center}
-      \begin{enumerate}
-      \item Full-source bootstrap!
-      \item All packages reproducible!
-      \end{enumerate}
-    \end{center}
-  \end{frame}
-
-  %% Slide 13:
-  \begin{frame}{Bootstrapping Debian}
-    \begin{itemize}
-    \item Sparse information on the Debian-wiki
-    \item Bootstrapping discussions mostly resolve around new architectures
-    \item GCC is compiled by depending on previous versions of GCC
-    \end{itemize}
-  \end{frame}
-
-  \begin{frame}{Reproducing Debian}
-    Debian has a very active effort for reproducible builds:
-
-    \begin{itemize}
-    \item Organised information about reproducibility status
-    \item Over 90\% reproducibility in Debian package base!
-    \end{itemize}
-  \end{frame}
-
-  \begin{frame}{Short interlude: Nix}
-    \begin{center}
-      \includegraphics[
-        keepaspectratio=true,
-        height=0.7\textheight
-      ]{nixos-logo.png}
-    \end{center}
-  \end{frame}
-
-  \begin{frame}{Short interlude: Nix}
-    \begin{center}
-      \includegraphics[
-        keepaspectratio=true,
-        height=0.90\textheight
-      ]{drake-meme.png}
-    \end{center}
-  \end{frame}
-
-  \begin{frame}{Short interlude: Nix}
-    \begin{center}
-      \includegraphics[
-        keepaspectratio=true,
-        height=0.7\textheight
-      ]{nixos-logo.png}
-    \end{center}
-  \end{frame}
-
-  \begin{frame}{Bootstrapping NixOS}
-    Nix evaluation can not recurse forever: The bootstrap can not
-    simply depend on a previous GCC.
-
-    Workaround: \texttt{bootstrap-tools} tarball from a previous
-    binary cache is fetched and used.
-
-    An unfortunate magic binary blob ...
-  \end{frame}
-
-  \begin{frame}{Reproducing NixOS}
-    Not all reproducibility patches have been ported from Debian.
-
-    However: Builds are fully repeatable via the Nix fundamentals!
-  \end{frame}
-
-  \section{Future Developments}
-
-  \begin{frame}{Bootstrappable: stage0}
-    Hand-rolled ``Cthulhu's Path to Madness'' hex-programs:
-
-    \begin{itemize}
-    \item No non-auditable binary blobs
-    \item Aims for understandability by 70\% of programmers
-    \item End goal is a full-source bootstrap of GCC
-    \end{itemize}
-  \end{frame}
-
-
-  \begin{frame}{Bootstrappable: MES}
-    Bootstrapping the ``Maxwell Equations of Software'':
-
-    \begin{itemize}
-    \item Minimal C-compiler written in Scheme
-    \item Minimal Scheme-interpreter (currently in C, but intended to
-      be rewritten in stage0 macros)
-    \item End goal is full-source bootstrap of the entire GuixSD
-    \end{itemize}
-  \end{frame}
-
-  \begin{frame}{Other platforms}
-    \begin{itemize}
-    \item Nix for Darwin is actively maintained
-    \item F-Droid Android repository works towards fully reproducible
-      builds of (open) Android software
-    \item Mobile devices (phones, tablets, etc.) are a lost cause at
-      the moment
-    \end{itemize}
-  \end{frame}
-
-  \begin{frame}{Thanks!}
-    Resources:
-    \begin{itemize}
-    \item bootstrappable.org
-    \item reproducible-builds.org
-    \end{itemize}
-
-    @tazjin | mail@tazj.in
-  \end{frame}
-\end{document}
diff --git a/presentations/bootstrapping-2018/quine-relay.png b/presentations/bootstrapping-2018/quine-relay.png
deleted file mode 100644
index 5644dc3900..0000000000
--- a/presentations/bootstrapping-2018/quine-relay.png
+++ /dev/null
Binary files differdiff --git a/presentations/bootstrapping-2018/result.pdfpc b/presentations/bootstrapping-2018/result.pdfpc
deleted file mode 100644
index b0fa6c9a0e..0000000000
--- a/presentations/bootstrapping-2018/result.pdfpc
+++ /dev/null
@@ -1,142 +0,0 @@
-[file]
-result
-[last_saved_slide]
-10
-[font_size]
-20000
-[notes]
-### 1
-- previous discussions of hardware trust (e.g. purism presentation)
-- people leap to "now I'm on my trusted Debian!"
-- unless you built it from scratch (spoiler: you haven't) you're *trusting* someone
-
-Agenda: Implications of trust with focus on bootstrap paths and reproducibility, plus how you can help.### 2
-self-hosting:
-- C-family: GCC pre/post 4.7, Clang
-- Common Lisp: Sunshine land! (with SBCL)
-- rustc: Bootstrap based on previous versions (C++ transpiler underway!)
-- many other languages also work this way!
-
-(Noteable counterexample: Clojure is written in Java!)### 3
-
-- compilers are just one bit, the various runtimes exist, too!### 4
-
-Could this be exploited?
-
-People don't think about where their compiler comes from.
-
-Even if they do, they may only go so far as to say "I'll just recompile it using <other compiler>".
-
-Unfortunately, spoiler alert, life isn't that easy in the computer world and yes, exploitation is possible.### 5
-
-- describe what a quine is
-- classic Lisp quine
-- explain demo quine
-- demo demo quine
-
-- this is interesting, but not useful - can quines do more than that?### 6
-
-- quine-relay: "art project" with 128-language circular quine
-
-- show source of quine-relay
-
-- (demo quine relay?)
-
-- side-note: this program is very, very trustworthy!### 7
-
-Ken Thompson (designer of UNIX and a couple other things!) received Turing award in 1983, and described attack in speech.
-
-- figure out how to detect self-compilation
-- make that modification a quine
-- insert modification into new compiler
-- add attack code to modification
-- remove attack from source, distributed binary will still be compromised! it's like evolution :)### 8
-
-damage potential is basically infinite:
-
-- classic "login" attack
-=> also applicable to other credentials
-
-- attack (weaken) crypto algorithms
-
-- you can probably think of more!### 10
-
-idea being: potential vulnerability would have to work across compilers:
-
-the more compilers we can introduce (e.g. more architectures, different versions, different compilers), the harder it gets for a vulnerability to survive all of those
-
-The more compilers, the merrier! Lisps are pretty good at this.### 11
-
-if we get a bit-mismatch after DDC, not all hope is lost: Maybe the thing just isn't reproducible!
-
-- many reasons for failures
-- timestamps are a classic! artifacts can be build logs, metadata in ZIP-files or whatever
-- non-determinism is the devil
-- sometimes people actively introduce build-randomness (NaCl)### 12
-
-- Does that binary download on the project's website really match the source?
-
-- Your Linux packages are signed by someone - cool - but what does that mean?### 13
-
-Two things should be achieved - gross oversimplification - to get to the ideal "desired state of the union":
-
-1. full-source bootstrap: without ever introducing any binaries, go from nothing to a full Linux distribution
-
-2. when packages are distributed, we should be able to know the expected output of a source package beforehand
-
-=> suddenly binary distributions become a cache! But more on Nix later.### 14
-
-- Debian project does not seem as concerned with bootstrapping as with reproducibility
-- Debian mostly bootstraps on new architectures (using cross-compilation and similar techniques, from an existing binary base)
-- core bootstrap (GCC & friends) is performed with previous Debian version and depending on GCC### 15
-
-... however! Debian cares about reproducibility.
-
-- automated testing of reproducibility
-- information about the status of all packages is made available in repos
-- Over 90% packages of packages are reproducible!
-
-< show reproducible builds website >
-
-Debian is still fundamentally a binary distribution though, but it doesn't have to be that way.### 16
-
-Nix - a purely functional package manager
-
-It's not a new project (10+ years), been discussed here before, has multiple components: package manager, language, NixOS.
-
-Instead of describing *how* to build a thing, Nix describes *what* to build:### 17
-### 19
-
-In Nix, it's impossible to say "GCC is the result of applying GCC to the GCC source", because that happens to be infinite recursion.
-
-Bootstrapping in Nix works by introducing a binary pinned by its full-hash, which was built on some previous Nix version.
-
-Unfortunately also just a magic binary blob ... ### 20
-
-NixOS is not actively porting all of Debian's reproducibility patches, but builds are fully repeatable:
-
-- introducing a malicious compiler would produce a different input hash -> different package
-
-Future slide: hope is not lost! Things are underway.### 21
-
-- bootstrappable.org (demo?) is an umbrella page for several projects working on bootstrappability
-
-- stage0 is an important piece: manually, small, auditable Hex programs to get to a Hex macro expander
-
-- end goal is a full-source bootrap, but pieces are missing### 22
-
-MES is out of the GuixSD circles (explain Guix, GNU Hurd joke)
-
-- idea being that once you have a Lisp, you have all of computing (as Alan Key said)
-
-- includes MesCC in Scheme -> can *almost* make a working tinyCC -> can *almost* make a working gcc 4.7
-
-- minimal Scheme interpreter, currently built in C to get the higher-level stuff to work, goal is rewrite in hex
-- bootstrapping Guix is the end goal### 23
-
-- userspace in Darwin has a Nix project
-- unsure about other BSDs, but if anyone knows - input welcome!
-- F-Droid has reproducible Android packages, but that's also userspace only
-- All other mobile platforms are a lost cause
-
-Generally, all closed-source software is impossible to trust.