about summary refs log tree commit diff
path: root/tvix/store/src/proto.rs
use anyhow::Result;
use std::collections::HashSet;
use thiserror::Error;

use prost::Message;

tonic::include_proto!("tvix.store.v1");

/// Errors that can occur during the validation of Directory messages.
#[derive(Debug, Error, PartialEq)]
pub enum ValidateDirectoryError {
    /// Elements are not in sorted order
    #[error("{0} is not sorted")]
    WrongSorting(String),
    /// Multiple elements with the same name encountered
    #[error("{0} is a duplicate name")]
    DuplicateName(String),
    /// Invalid name encountered
    #[error("Invalid name in {0}")]
    InvalidName(String),
    /// Invalid digest length encountered
    #[error("Ivalid Digest length: {0}")]
    InvalidDigestLen(usize),
}

/// Checks a name for validity.
/// We disallow slashes, null bytes, '.', '..' and the empty string.
/// Depending on the context, a [DirectoryNode], [FileNode] or [SymlinkNode]
/// message with an empty string as name is allowed, but they don't occur
/// inside a Directory message.
fn validate_node_name(name: &str) -> Result<(), ValidateDirectoryError> {
    if name == "" || name == ".." || name == "." || name.contains("\x00") || name.contains("/") {
        return Err(ValidateDirectoryError::InvalidName(
            name.to_string().clone(),
        ));
    }
    Ok(())
}

/// Checks a digest for validity.
/// Digests are 32 bytes long, as we store blake3 digests.
fn validate_digest(digest: &Vec<u8>) -> Result<(), ValidateDirectoryError> {
    if digest.len() != 32 {
        return Err(ValidateDirectoryError::InvalidDigestLen(digest.len()));
    }
    Ok(())
}

/// Accepts a name, and a mutable reference to the previous name.
/// If the passed name is larger than the previous one, the reference is updated.
/// If it's not, an error is returned.
fn update_if_lt_prev<'set, 'n>(
    prev_name: &'set mut &'n str,
    name: &'n str,
) -> Result<(), ValidateDirectoryError> {
    if *name < **prev_name {
        return Err(ValidateDirectoryError::WrongSorting(
            name.to_string().clone(),
        ));
    }
    *prev_name = name;
    Ok(())
}

/// Inserts the given name into a HashSet if it's not already in there.
/// If it is, an error is returned.
fn insert_once<'n>(
    seen_names: &mut HashSet<&'n str>,
    name: &'n str,
) -> Result<(), ValidateDirectoryError> {
    if seen_names.get(name).is_some() {
        return Err(ValidateDirectoryError::DuplicateName(
            name.to_string().clone(),
        ));
    }
    seen_names.insert(name);
    Ok(())
}

impl Directory {
    // The size of a directory is the number of all regular and symlink elements,
    // the number of directory elements, and their size fields.
    pub fn size(&self) -> u32 {
        self.files.len() as u32
            + self.symlinks.len() as u32
            + self
                .directories
                .iter()
                .fold(0, |acc: u32, e| (acc + 1 + e.size) as u32)
    }

    pub fn digest(&self) -> Vec<u8> {
        let mut hasher = blake3::Hasher::new();

        hasher.update(&self.encode_to_vec()).finalize().as_bytes()[..].to_vec()
    }

    /// validate checks the directory for invalid data, such as:
    /// - violations of name restrictions
    /// - invalid digest lengths
    /// - not properly sorted lists
    /// - duplicate names in the three lists
    pub fn validate(&self) -> Result<(), ValidateDirectoryError> {
        let mut seen_names: HashSet<&str> = HashSet::new();

        let mut last_directory_name: &str = "";
        let mut last_file_name: &str = "";
        let mut last_symlink_name: &str = "";

        // check directories
        for directory_node in &self.directories {
            validate_node_name(&directory_node.name)?;
            validate_digest(&directory_node.digest)?;

            update_if_lt_prev(&mut last_directory_name, &mut directory_node.name.as_str())?;
            insert_once(&mut seen_names, &directory_node.name.as_str())?;
        }

        // check files
        for file_node in &self.files {
            validate_node_name(&file_node.name)?;
            validate_digest(&file_node.digest)?;

            update_if_lt_prev(&mut last_file_name, &mut file_node.name.as_str())?;
            insert_once(&mut seen_names, &file_node.name.as_str())?;
        }

        // check symlinks
        for symlink_node in &self.symlinks {
            validate_node_name(&symlink_node.name)?;

            update_if_lt_prev(&mut last_symlink_name, &mut symlink_node.name.as_str())?;
            insert_once(&mut seen_names, &symlink_node.name.as_str())?;
        }

        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use super::{Directory, DirectoryNode, FileNode, SymlinkNode, ValidateDirectoryError};
    use lazy_static::lazy_static;

    lazy_static! {
        static ref DUMMY_DIGEST: Vec<u8> = vec![
            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
            0x00, 0x00, 0x00, 0x00,
        ];
    }
    #[test]
    fn test_directory_size() {
        {
            let d = Directory::default();
            assert_eq!(d.size(), 0);
        }
        {
            let d = Directory {
                directories: vec![DirectoryNode {
                    name: String::from("foo"),
                    digest: DUMMY_DIGEST.to_vec(),
                    size: 0,
                }],
                ..Default::default()
            };
            assert_eq!(d.size(), 1);
        }
        {
            let d = Directory {
                directories: vec![DirectoryNode {
                    name: String::from("foo"),
                    digest: DUMMY_DIGEST.to_vec(),
                    size: 4,
                }],
                ..Default::default()
            };
            assert_eq!(d.size(), 5);
        }
        {
            let d = Directory {
                files: vec![FileNode {
                    name: String::from("foo"),
                    digest: DUMMY_DIGEST.to_vec(),
                    size: 42,
                    executable: false,
                }],
                ..Default::default()
            };
            assert_eq!(d.size(), 1);
        }
        {
            let d = Directory {
                symlinks: vec![SymlinkNode {
                    name: String::from("foo"),
                    target: String::from("bar"),
                }],
                ..Default::default()
            };
            assert_eq!(d.size(), 1);
        }
    }

    #[test]
    fn test_digest() {
        let d = Directory::default();

        assert_eq!(
            d.digest(),
            vec![
                0xaf, 0x13, 0x49, 0xb9, 0xf5, 0xf9, 0xa1, 0xa6, 0xa0, 0x40, 0x4d, 0xea, 0x36, 0xdc,
                0xc9, 0x49, 0x9b, 0xcb, 0x25, 0xc9, 0xad, 0xc1, 0x12, 0xb7, 0xcc, 0x9a, 0x93, 0xca,
                0xe4, 0x1f, 0x32, 0x62
            ]
        )
    }

    #[test]
    fn test_directory_validate_empty() {
        let d = Directory::default();
        assert_eq!(d.validate(), Ok(()));
    }

    #[test]
    fn test_directory_validate_invalid_names() {
        {
            let d = Directory {
                directories: vec![DirectoryNode {
                    name: "".to_string(),
                    digest: DUMMY_DIGEST.to_vec(),
                    size: 42,
                }],
                ..Default::default()
            };
            match d.validate().expect_err("must fail") {
                ValidateDirectoryError::InvalidName(n) => {
                    assert_eq!(n, "")
                }
                _ => panic!("unexpected error"),
            };
        }

        {
            let d = Directory {
                directories: vec![DirectoryNode {
                    name: ".".to_string(),
                    digest: DUMMY_DIGEST.to_vec(),
                    size: 42,
                }],
                ..Default::default()
            };
            match d.validate().expect_err("must fail") {
                ValidateDirectoryError::InvalidName(n) => {
                    assert_eq!(n, ".")
                }
                _ => panic!("unexpected error"),
            };
        }

        {
            let d = Directory {
                files: vec![FileNode {
                    name: "..".to_string(),
                    digest: DUMMY_DIGEST.to_vec(),
                    size: 42,
                    executable: false,
                }],
                ..Default::default()
            };
            match d.validate().expect_err("must fail") {
                ValidateDirectoryError::InvalidName(n) => {
                    assert_eq!(n, "..")
                }
                _ => panic!("unexpected error"),
            };
        }

        {
            let d = Directory {
                symlinks: vec![SymlinkNode {
                    name: "\x00".to_string(),
                    target: "foo".to_string(),
                }],
                ..Default::default()
            };
            match d.validate().expect_err("must fail") {
                ValidateDirectoryError::InvalidName(n) => {
                    assert_eq!(n, "\x00")
                }
                _ => panic!("unexpected error"),
            };
        }

        {
            let d = Directory {
                symlinks: vec![SymlinkNode {
                    name: "foo/bar".to_string(),
                    target: "foo".to_string(),
                }],
                ..Default::default()
            };
            match d.validate().expect_err("must fail") {
                ValidateDirectoryError::InvalidName(n) => {
                    assert_eq!(n, "foo/bar")
                }
                _ => panic!("unexpected error"),
            };
        }
    }

    #[test]
    fn test_directory_validate_invalid_digest() {
        let d = Directory {
            directories: vec![DirectoryNode {
                name: "foo".to_string(),
                digest: vec![0x00, 0x42], // invalid length
                size: 42,
            }],
            ..Default::default()
        };
        match d.validate().expect_err("must fail") {
            ValidateDirectoryError::InvalidDigestLen(n) => {
                assert_eq!(n, 2)
            }
            _ => panic!("unexpected error"),
        }
    }

    #[test]
    fn test_directory_validate_sorting() {
        // "b" comes before "a", bad.
        {
            let d = Directory {
                directories: vec![
                    DirectoryNode {
                        name: "b".to_string(),
                        digest: DUMMY_DIGEST.to_vec(),
                        size: 42,
                    },
                    DirectoryNode {
                        name: "a".to_string(),
                        digest: DUMMY_DIGEST.to_vec(),
                        size: 42,
                    },
                ],
                ..Default::default()
            };
            match d.validate().expect_err("must fail") {
                ValidateDirectoryError::WrongSorting(s) => {
                    assert_eq!(s, "a".to_string());
                }
                _ => panic!("unexpected error"),
            }
        }

        // "a" exists twice, bad.
        {
            let d = Directory {
                directories: vec![
                    DirectoryNode {
                        name: "a".to_string(),
                        digest: DUMMY_DIGEST.to_vec(),
                        size: 42,
                    },
                    DirectoryNode {
                        name: "a".to_string(),
                        digest: DUMMY_DIGEST.to_vec(),
                        size: 42,
                    },
                ],
                ..Default::default()
            };
            match d.validate().expect_err("must fail") {
                ValidateDirectoryError::DuplicateName(s) => {
                    assert_eq!(s, "a".to_string());
                }
                _ => panic!("unexpected error"),
            }
        }

        // "a" comes before "b", all good.
        {
            let d = Directory {
                directories: vec![
                    DirectoryNode {
                        name: "a".to_string(),
                        digest: DUMMY_DIGEST.to_vec(),
                        size: 42,
                    },
                    DirectoryNode {
                        name: "b".to_string(),
                        digest: DUMMY_DIGEST.to_vec(),
                        size: 42,
                    },
                ],
                ..Default::default()
            };

            d.validate().expect("validate shouldn't error");
        }

        // [b, c] and [a] are both properly sorted.
        {
            let d = Directory {
                directories: vec![
                    DirectoryNode {
                        name: "b".to_string(),
                        digest: DUMMY_DIGEST.to_vec(),
                        size: 42,
                    },
                    DirectoryNode {
                        name: "c".to_string(),
                        digest: DUMMY_DIGEST.to_vec(),
                        size: 42,
                    },
                ],
                symlinks: vec![SymlinkNode {
                    name: "a".to_string(),
                    target: "foo".to_string(),
                }],
                ..Default::default()
            };

            d.validate().expect("validate shouldn't error");
        }
    }
}