diff options
author | Vincent Ambo <tazjin@google.com> | 2020-05-22T16·46+0100 |
---|---|---|
committer | Vincent Ambo <tazjin@google.com> | 2020-05-22T16·46+0100 |
commit | 5229c9b232de5bfa959ad6ebbb4c8192ac513352 (patch) | |
tree | 8539e7e23682cac110900f946f034ae44162cacd /third_party/git/path.c | |
parent | f2b211131f2347342dde63975b09cf603149f1a3 (diff) | |
parent | 8518a7a51faaf50f830646d4c3585f51236b9349 (diff) |
merge(3p/git): Merge git upstream at v2.26.2 r/808
Diffstat (limited to 'third_party/git/path.c')
-rw-r--r-- | third_party/git/path.c | 265 |
1 files changed, 173 insertions, 92 deletions
diff --git a/third_party/git/path.c b/third_party/git/path.c index 25e97b8c3f76..88cf59300738 100644 --- a/third_party/git/path.c +++ b/third_party/git/path.c @@ -11,6 +11,7 @@ #include "path.h" #include "packfile.h" #include "object-store.h" +#include "lockfile.h" static int get_st_mode_bits(const char *path, int *mode) { @@ -101,36 +102,36 @@ struct common_dir { /* Not considered garbage for report_linked_checkout_garbage */ unsigned ignore_garbage:1; unsigned is_dir:1; - /* Not common even though its parent is */ - unsigned exclude:1; - const char *dirname; + /* Belongs to the common dir, though it may contain paths that don't */ + unsigned is_common:1; + const char *path; }; static struct common_dir common_list[] = { - { 0, 1, 0, "branches" }, - { 0, 1, 0, "common" }, - { 0, 1, 0, "hooks" }, - { 0, 1, 0, "info" }, - { 0, 0, 1, "info/sparse-checkout" }, - { 1, 1, 0, "logs" }, - { 1, 1, 1, "logs/HEAD" }, - { 0, 1, 1, "logs/refs/bisect" }, - { 0, 1, 1, "logs/refs/rewritten" }, - { 0, 1, 1, "logs/refs/worktree" }, - { 0, 1, 0, "lost-found" }, - { 0, 1, 0, "objects" }, - { 0, 1, 0, "refs" }, - { 0, 1, 1, "refs/bisect" }, - { 0, 1, 1, "refs/rewritten" }, - { 0, 1, 1, "refs/worktree" }, - { 0, 1, 0, "remotes" }, - { 0, 1, 0, "worktrees" }, - { 0, 1, 0, "rr-cache" }, - { 0, 1, 0, "svn" }, - { 0, 0, 0, "config" }, - { 1, 0, 0, "gc.pid" }, - { 0, 0, 0, "packed-refs" }, - { 0, 0, 0, "shallow" }, + { 0, 1, 1, "branches" }, + { 0, 1, 1, "common" }, + { 0, 1, 1, "hooks" }, + { 0, 1, 1, "info" }, + { 0, 0, 0, "info/sparse-checkout" }, + { 1, 1, 1, "logs" }, + { 1, 0, 0, "logs/HEAD" }, + { 0, 1, 0, "logs/refs/bisect" }, + { 0, 1, 0, "logs/refs/rewritten" }, + { 0, 1, 0, "logs/refs/worktree" }, + { 0, 1, 1, "lost-found" }, + { 0, 1, 1, "objects" }, + { 0, 1, 1, "refs" }, + { 0, 1, 0, "refs/bisect" }, + { 0, 1, 0, "refs/rewritten" }, + { 0, 1, 0, "refs/worktree" }, + { 0, 1, 1, "remotes" }, + { 0, 1, 1, "worktrees" }, + { 0, 1, 1, "rr-cache" }, + { 0, 1, 1, "svn" }, + { 0, 0, 1, "config" }, + { 1, 0, 1, "gc.pid" }, + { 0, 0, 1, "packed-refs" }, + { 0, 0, 1, "shallow" }, { 0, 0, 0, NULL } }; @@ -236,30 +237,41 @@ static void *add_to_trie(struct trie *root, const char *key, void *value) return old; } -typedef int (*match_fn)(const char *unmatched, void *data, void *baton); +typedef int (*match_fn)(const char *unmatched, void *value, void *baton); /* * Search a trie for some key. Find the longest /-or-\0-terminated - * prefix of the key for which the trie contains a value. Call fn - * with the unmatched portion of the key and the found value, and - * return its return value. If there is no such prefix, return -1. + * prefix of the key for which the trie contains a value. If there is + * no such prefix, return -1. Otherwise call fn with the unmatched + * portion of the key and the found value. If fn returns 0 or + * positive, then return its return value. If fn returns negative, + * then call fn with the next-longest /-terminated prefix of the key + * (i.e. a parent directory) for which the trie contains a value, and + * handle its return value the same way. If there is no shorter + * /-terminated prefix with a value left, then return the negative + * return value of the most recent fn invocation. * * The key is partially normalized: consecutive slashes are skipped. * - * For example, consider the trie containing only [refs, - * refs/worktree] (both with values). - * - * | key | unmatched | val from node | return value | - * |-----------------|------------|---------------|--------------| - * | a | not called | n/a | -1 | - * | refs | \0 | refs | as per fn | - * | refs/ | / | refs | as per fn | - * | refs/w | /w | refs | as per fn | - * | refs/worktree | \0 | refs/worktree | as per fn | - * | refs/worktree/ | / | refs/worktree | as per fn | - * | refs/worktree/a | /a | refs/worktree | as per fn | - * |-----------------|------------|---------------|--------------| + * For example, consider the trie containing only [logs, + * logs/refs/bisect], both with values, but not logs/refs. * + * | key | unmatched | prefix to node | return value | + * |--------------------|----------------|------------------|--------------| + * | a | not called | n/a | -1 | + * | logstore | not called | n/a | -1 | + * | logs | \0 | logs | as per fn | + * | logs/ | / | logs | as per fn | + * | logs/refs | /refs | logs | as per fn | + * | logs/refs/ | /refs/ | logs | as per fn | + * | logs/refs/b | /refs/b | logs | as per fn | + * | logs/refs/bisected | /refs/bisected | logs | as per fn | + * | logs/refs/bisect | \0 | logs/refs/bisect | as per fn | + * | logs/refs/bisect/ | / | logs/refs/bisect | as per fn | + * | logs/refs/bisect/a | /a | logs/refs/bisect | as per fn | + * | (If fn in the previous line returns -1, then fn is called once more:) | + * | logs/refs/bisect/a | /refs/bisect/a | logs | as per fn | + * |--------------------|----------------|------------------|--------------| */ static int trie_find(struct trie *root, const char *key, match_fn fn, void *baton) @@ -288,9 +300,13 @@ static int trie_find(struct trie *root, const char *key, match_fn fn, /* Matched the entire compressed section */ key += i; - if (!*key) + if (!*key) { /* End of key */ - return fn(key, root->value, baton); + if (root->value) + return fn(key, root->value, baton); + else + return -1; + } /* Partial path normalization: skip consecutive slashes */ while (key[0] == '/' && key[1] == '/') @@ -320,8 +336,8 @@ static void init_common_trie(void) if (common_trie_done_setup) return; - for (p = common_list; p->dirname; p++) - add_to_trie(&common_trie, p->dirname, p); + for (p = common_list; p->path; p++) + add_to_trie(&common_trie, p->path, p); common_trie_done_setup = 1; } @@ -334,14 +350,11 @@ static int check_common(const char *unmatched, void *value, void *baton) { struct common_dir *dir = value; - if (!dir) - return 0; - if (dir->is_dir && (unmatched[0] == 0 || unmatched[0] == '/')) - return !dir->exclude; + return dir->is_common; if (!dir->is_dir && unmatched[0] == 0) - return !dir->exclude; + return dir->is_common; return 0; } @@ -350,9 +363,14 @@ static void update_common_dir(struct strbuf *buf, int git_dir_len, const char *common_dir) { char *base = buf->buf + git_dir_len; + int has_lock_suffix = strbuf_strip_suffix(buf, LOCK_SUFFIX); + init_common_trie(); if (trie_find(&common_trie, base, check_common, NULL) > 0) replace_dir(buf, git_dir_len, common_dir); + + if (has_lock_suffix) + strbuf_addstr(buf, LOCK_SUFFIX); } void report_linked_checkout_garbage(void) @@ -365,8 +383,8 @@ void report_linked_checkout_garbage(void) return; strbuf_addf(&sb, "%s/", get_git_dir()); len = sb.len; - for (p = common_list; p->dirname; p++) { - const char *path = p->dirname; + for (p = common_list; p->path; p++) { + const char *path = p->path; if (p->ignore_garbage) continue; strbuf_setlen(&sb, len); @@ -1059,6 +1077,8 @@ const char *remove_leading_path(const char *in, const char *prefix) /* * It is okay if dst == src, but they should not overlap otherwise. + * The "dst" buffer must be at least as long as "src"; normalizing may shrink + * the size of the path, but will never grow it. * * Performs the following normalizations on src, storing the result in dst: * - Ensures that components are separated by '/' (Windows only) @@ -1221,31 +1241,52 @@ static inline int chomp_trailing_dir_sep(const char *path, int len) } /* - * If path ends with suffix (complete path components), returns the - * part before suffix (sans trailing directory separators). - * Otherwise returns NULL. + * If path ends with suffix (complete path components), returns the offset of + * the last character in the path before the suffix (sans trailing directory + * separators), and -1 otherwise. */ -char *strip_path_suffix(const char *path, const char *suffix) +static ssize_t stripped_path_suffix_offset(const char *path, const char *suffix) { int path_len = strlen(path), suffix_len = strlen(suffix); while (suffix_len) { if (!path_len) - return NULL; + return -1; if (is_dir_sep(path[path_len - 1])) { if (!is_dir_sep(suffix[suffix_len - 1])) - return NULL; + return -1; path_len = chomp_trailing_dir_sep(path, path_len); suffix_len = chomp_trailing_dir_sep(suffix, suffix_len); } else if (path[--path_len] != suffix[--suffix_len]) - return NULL; + return -1; } if (path_len && !is_dir_sep(path[path_len - 1])) - return NULL; - return xstrndup(path, chomp_trailing_dir_sep(path, path_len)); + return -1; + return chomp_trailing_dir_sep(path, path_len); +} + +/* + * Returns true if the path ends with components, considering only complete path + * components, and false otherwise. + */ +int ends_with_path_components(const char *path, const char *components) +{ + return stripped_path_suffix_offset(path, components) != -1; +} + +/* + * If path ends with suffix (complete path components), returns the + * part before suffix (sans trailing directory separators). + * Otherwise returns NULL. + */ +char *strip_path_suffix(const char *path, const char *suffix) +{ + ssize_t offset = stripped_path_suffix_offset(path, suffix); + + return offset == -1 ? NULL : xstrndup(path, offset); } int daemon_avoid_alias(const char *p) @@ -1295,37 +1336,77 @@ int daemon_avoid_alias(const char *p) } } -static int only_spaces_and_periods(const char *path, size_t len, size_t skip) +/* + * On NTFS, we need to be careful to disallow certain synonyms of the `.git/` + * directory: + * + * - For historical reasons, file names that end in spaces or periods are + * automatically trimmed. Therefore, `.git . . ./` is a valid way to refer + * to `.git/`. + * + * - For other historical reasons, file names that do not conform to the 8.3 + * format (up to eight characters for the basename, three for the file + * extension, certain characters not allowed such as `+`, etc) are associated + * with a so-called "short name", at least on the `C:` drive by default. + * Which means that `git~1/` is a valid way to refer to `.git/`. + * + * Note: Technically, `.git/` could receive the short name `git~2` if the + * short name `git~1` were already used. In Git, however, we guarantee that + * `.git` is the first item in a directory, therefore it will be associated + * with the short name `git~1` (unless short names are disabled). + * + * - For yet other historical reasons, NTFS supports so-called "Alternate Data + * Streams", i.e. metadata associated with a given file, referred to via + * `<filename>:<stream-name>:<stream-type>`. There exists a default stream + * type for directories, allowing `.git/` to be accessed via + * `.git::$INDEX_ALLOCATION/`. + * + * When this function returns 1, it indicates that the specified file/directory + * name refers to a `.git` file or directory, or to any of these synonyms, and + * Git should therefore not track it. + * + * For performance reasons, _all_ Alternate Data Streams of `.git/` are + * forbidden, not just `::$INDEX_ALLOCATION`. + * + * This function is intended to be used by `git fsck` even on platforms where + * the backslash is a regular filename character, therefore it needs to handle + * backlash characters in the provided `name` specially: they are interpreted + * as directory separators. + */ +int is_ntfs_dotgit(const char *name) { - if (len < skip) + char c; + + /* + * Note that when we don't find `.git` or `git~1` we end up with `name` + * advanced partway through the string. That's okay, though, as we + * return immediately in those cases, without looking at `name` any + * further. + */ + c = *(name++); + if (c == '.') { + /* .git */ + if (((c = *(name++)) != 'g' && c != 'G') || + ((c = *(name++)) != 'i' && c != 'I') || + ((c = *(name++)) != 't' && c != 'T')) + return 0; + } else if (c == 'g' || c == 'G') { + /* git ~1 */ + if (((c = *(name++)) != 'i' && c != 'I') || + ((c = *(name++)) != 't' && c != 'T') || + *(name++) != '~' || + *(name++) != '1') + return 0; + } else return 0; - len -= skip; - path += skip; - while (len-- > 0) { - char c = *(path++); - if (c != ' ' && c != '.') + + for (;;) { + c = *(name++); + if (!c || c == '\\' || c == '/' || c == ':') + return 1; + if (c != '.' && c != ' ') return 0; } - return 1; -} - -int is_ntfs_dotgit(const char *name) -{ - size_t len; - - for (len = 0; ; len++) - if (!name[len] || name[len] == '\\' || is_dir_sep(name[len])) { - if (only_spaces_and_periods(name, len, 4) && - !strncasecmp(name, ".git", 4)) - return 1; - if (only_spaces_and_periods(name, len, 5) && - !strncasecmp(name, "git~1", 5)) - return 1; - if (name[len] != '\\') - return 0; - name += len + 1; - len = -1; - } } static int is_ntfs_dot_generic(const char *name, @@ -1341,7 +1422,7 @@ static int is_ntfs_dot_generic(const char *name, only_spaces_and_periods: for (;;) { char c = name[i++]; - if (!c) + if (!c || c == ':') return 1; if (c != ' ' && c != '.') return 0; |