blob: 75fe4a91de45cda888c10f26e33ae886f5538900 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
|
\documentclass[12pt]{beamer}
\usetheme{metropolis}
\newenvironment{code}{\ttfamily}{\par}
\title{Where does \textit{your} compiler come from?}
\date{2018-03-13}
\author{Vincent Ambo}
\institute{Norwegian Unix User Group}
\begin{document}
\maketitle
%% Slide 1:
\section{Introduction}
%% Slide 2:
\begin{frame}{Chicken and egg}
Self-hosted compilers are often built using themselves, for example:
\begin{itemize}
\item C-family compilers bootstrap themselves \& each other
\item (Some!) Common Lisp compilers can bootstrap each other
\item \texttt{rustc} bootstraps itself with a previous version
\item ... same for many other languages!
\end{itemize}
\end{frame}
%% Slide 3:
\begin{frame}{Trusting Trust}
\begin{center}
Could this be exploited?
\end{center}
\end{frame}
%% Slide 4:
\begin{frame}{Short interlude: A quine}
\begin{center}
\begin{code}
((lambda (x) (list x (list 'quote x)))
\newline\vspace*{6mm} '(lambda (x) (list x (list 'quote x))))
\end{code}
\end{center}
\end{frame}
%% Slide 5:
\begin{frame}{Short interlude: Quine Relay}
\begin{center}
\includegraphics[
keepaspectratio=true,
height=\textheight
]{quine-relay.png}
\end{center}
\end{frame}
%% Slide 6:
\begin{frame}{Trusting Trust}
An attack described by Ken Thompson in 1984:
\begin{enumerate}
\item Modify a compiler to detect when it's compiling itself.
\item Let the modification insert \textit{itself} into the new compiler.
\item Add arbitrary attack code to the modification.
\end{enumerate}
\end{frame}
%% Slide 7:
\begin{frame}{Damage potential?}
\begin{center}
Let your imagination run wild!
\end{center}
\end{frame}
%% Slide 8:
\section{Countermeasures}
%% Slide 9:
\begin{frame}{Diverse Double-Compiling}
Assume we have:
\begin{itemize}
\item Target language compilers $A$ and $T$
\item The source code of $A$: $ S_{A} $
\end{itemize}
\end{frame}
%% Slide 10:
\begin{frame}{Diverse Double-Compiling}
Apply the first stage (functional equivalence):
\begin{itemize}
\item $ X = A(S_{A})$
\item $ Y = T(S_{A})$
\end{itemize}
Apply the second stage (bit-for-bit equivalence):
\begin{itemize}
\item $ V = X(S_{A})$
\item $ W = Y(S_{A})$
\end{itemize}
Now we have a new problem: Reproducibility!
\end{frame}
%% Slide 11:
\begin{frame}{Reproducibility}
Bit-for-bit equivalent output is hard, for example:
\begin{itemize}
\item Timestamps in output artifacts
\item Non-deterministic linking order in concurrent builds
\item Non-deterministic VM & memory states in outputs
\item Randomness in builds (sic!)
\end{itemize}
\end{frame}
%% Slide 12:
\section{State of (some part of) the Union}
\end{document}
|