From 1b593e1ea4d2af0f6444d9a7788d5d99abd6fde5 Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Sat, 11 Jan 2020 23:36:56 +0000 Subject: Squashed 'third_party/git/' content from commit cb71568594 git-subtree-dir: third_party/git git-subtree-split: cb715685942260375e1eb8153b0768a376e4ece7 --- perl/Git/SVN/Editor.pm | 605 ++++++++++++++++++++++++++++++++++++ perl/Git/SVN/Fetcher.pm | 622 +++++++++++++++++++++++++++++++++++++ perl/Git/SVN/GlobSpec.pm | 65 ++++ perl/Git/SVN/Log.pm | 400 ++++++++++++++++++++++++ perl/Git/SVN/Memoize/YAML.pm | 93 ++++++ perl/Git/SVN/Migration.pm | 265 ++++++++++++++++ perl/Git/SVN/Prompt.pm | 184 +++++++++++ perl/Git/SVN/Ra.pm | 708 +++++++++++++++++++++++++++++++++++++++++++ perl/Git/SVN/Utils.pm | 232 ++++++++++++++ 9 files changed, 3174 insertions(+) create mode 100644 perl/Git/SVN/Editor.pm create mode 100644 perl/Git/SVN/Fetcher.pm create mode 100644 perl/Git/SVN/GlobSpec.pm create mode 100644 perl/Git/SVN/Log.pm create mode 100644 perl/Git/SVN/Memoize/YAML.pm create mode 100644 perl/Git/SVN/Migration.pm create mode 100644 perl/Git/SVN/Prompt.pm create mode 100644 perl/Git/SVN/Ra.pm create mode 100644 perl/Git/SVN/Utils.pm (limited to 'perl/Git/SVN') diff --git a/perl/Git/SVN/Editor.pm b/perl/Git/SVN/Editor.pm new file mode 100644 index 000000000000..0df16ed72688 --- /dev/null +++ b/perl/Git/SVN/Editor.pm @@ -0,0 +1,605 @@ +package Git::SVN::Editor; +use vars qw/@ISA $_rmdir $_cp_similarity $_find_copies_harder $_rename_limit/; +use strict; +use warnings; +use SVN::Core; +use SVN::Delta; +use Carp qw/croak/; +use Git qw/command command_oneline command_noisy command_output_pipe + command_input_pipe command_close_pipe + command_bidi_pipe command_close_bidi_pipe + get_record/; + +BEGIN { + @ISA = qw(SVN::Delta::Editor); +} + +sub new { + my ($class, $opts) = @_; + foreach (qw/svn_path r ra tree_a tree_b log editor_cb/) { + die "$_ required!\n" unless (defined $opts->{$_}); + } + + my $pool = SVN::Pool->new; + my $mods = generate_diff($opts->{tree_a}, $opts->{tree_b}); + my $types = check_diff_paths($opts->{ra}, $opts->{svn_path}, + $opts->{r}, $mods); + + # $opts->{ra} functions should not be used after this: + my @ce = $opts->{ra}->get_commit_editor($opts->{log}, + $opts->{editor_cb}, $pool); + my $self = SVN::Delta::Editor->new(@ce, $pool); + bless $self, $class; + foreach (qw/svn_path r tree_a tree_b/) { + $self->{$_} = $opts->{$_}; + } + $self->{url} = $opts->{ra}->{url}; + $self->{mods} = $mods; + $self->{types} = $types; + $self->{pool} = $pool; + $self->{bat} = { '' => $self->open_root($self->{r}, $self->{pool}) }; + $self->{rm} = { }; + $self->{path_prefix} = length $self->{svn_path} ? + "$self->{svn_path}/" : ''; + $self->{config} = $opts->{config}; + $self->{mergeinfo} = $opts->{mergeinfo}; + $self->{pathnameencoding} = Git::config('svn.pathnameencoding'); + return $self; +} + +sub generate_diff { + my ($tree_a, $tree_b) = @_; + my @diff_tree = qw(diff-tree -z -r); + if ($_cp_similarity) { + push @diff_tree, "-C$_cp_similarity"; + } else { + push @diff_tree, '-C'; + } + push @diff_tree, '--find-copies-harder' if $_find_copies_harder; + push @diff_tree, "-l$_rename_limit" if defined $_rename_limit; + push @diff_tree, $tree_a, $tree_b; + my ($diff_fh, $ctx) = command_output_pipe(@diff_tree); + my $state = 'meta'; + my @mods; + while (defined($_ = get_record($diff_fh, "\0"))) { + if ($state eq 'meta' && /^:(\d{6})\s(\d{6})\s + ($::sha1)\s($::sha1)\s + ([MTCRAD])\d*$/xo) { + push @mods, { mode_a => $1, mode_b => $2, + sha1_a => $3, sha1_b => $4, + chg => $5 }; + if ($5 =~ /^(?:C|R)$/) { + $state = 'file_a'; + } else { + $state = 'file_b'; + } + } elsif ($state eq 'file_a') { + my $x = $mods[$#mods] or croak "Empty array\n"; + if ($x->{chg} !~ /^(?:C|R)$/) { + croak "Error parsing $_, $x->{chg}\n"; + } + $x->{file_a} = $_; + $state = 'file_b'; + } elsif ($state eq 'file_b') { + my $x = $mods[$#mods] or croak "Empty array\n"; + if (exists $x->{file_a} && $x->{chg} !~ /^(?:C|R)$/) { + croak "Error parsing $_, $x->{chg}\n"; + } + if (!exists $x->{file_a} && $x->{chg} =~ /^(?:C|R)$/) { + croak "Error parsing $_, $x->{chg}\n"; + } + $x->{file_b} = $_; + $state = 'meta'; + } else { + croak "Error parsing $_\n"; + } + } + command_close_pipe($diff_fh, $ctx); + \@mods; +} + +sub check_diff_paths { + my ($ra, $pfx, $rev, $mods) = @_; + my %types; + $pfx .= '/' if length $pfx; + + sub type_diff_paths { + my ($ra, $types, $path, $rev) = @_; + my @p = split m#/+#, $path; + my $c = shift @p; + unless (defined $types->{$c}) { + $types->{$c} = $ra->check_path($c, $rev); + } + while (@p) { + $c .= '/' . shift @p; + next if defined $types->{$c}; + $types->{$c} = $ra->check_path($c, $rev); + } + } + + foreach my $m (@$mods) { + foreach my $f (qw/file_a file_b/) { + next unless defined $m->{$f}; + my ($dir) = ($m->{$f} =~ m#^(.*?)/?(?:[^/]+)$#); + if (length $pfx.$dir && ! defined $types{$dir}) { + type_diff_paths($ra, \%types, $pfx.$dir, $rev); + } + } + } + \%types; +} + +sub split_path { + return ($_[0] =~ m#^(.*?)/?([^/]+)$#); +} + +sub repo_path { + my ($self, $path) = @_; + if (my $enc = $self->{pathnameencoding}) { + require Encode; + Encode::from_to($path, $enc, 'UTF-8'); + } + $self->{path_prefix}.(defined $path ? $path : ''); +} + +sub url_path { + my ($self, $path) = @_; + $path = $self->repo_path($path); + if ($self->{url} =~ m#^https?://#) { + # characters are taken from subversion/libsvn_subr/path.c + $path =~ s#([^~a-zA-Z0-9_./!$&'()*+,-])#sprintf("%%%02X",ord($1))#eg; + } + $self->{url} . '/' . $path; +} + +sub rmdirs { + my ($self) = @_; + my $rm = $self->{rm}; + delete $rm->{''}; # we never delete the url we're tracking + return unless %$rm; + + foreach (keys %$rm) { + my @d = split m#/#, $_; + my $c = shift @d; + $rm->{$c} = 1; + while (@d) { + $c .= '/' . shift @d; + $rm->{$c} = 1; + } + } + delete $rm->{$self->{svn_path}}; + delete $rm->{''}; # we never delete the url we're tracking + return unless %$rm; + + my ($fh, $ctx) = command_output_pipe(qw/ls-tree --name-only -r -z/, + $self->{tree_b}); + while (defined($_ = get_record($fh, "\0"))) { + my @dn = split m#/#, $_; + while (pop @dn) { + delete $rm->{join '/', @dn}; + } + unless (%$rm) { + close $fh; + return; + } + } + command_close_pipe($fh, $ctx); + + my ($r, $p, $bat) = ($self->{r}, $self->{pool}, $self->{bat}); + foreach my $d (sort { $b =~ tr#/#/# <=> $a =~ tr#/#/# } keys %$rm) { + $self->close_directory($bat->{$d}, $p); + my ($dn) = ($d =~ m#^(.*?)/?(?:[^/]+)$#); + print "\tD+\t$d/\n" unless $::_q; + $self->SUPER::delete_entry($d, $r, $bat->{$dn}, $p); + delete $bat->{$d}; + } +} + +sub open_or_add_dir { + my ($self, $full_path, $baton, $deletions) = @_; + my $t = $self->{types}->{$full_path}; + if (!defined $t) { + die "$full_path not known in r$self->{r} or we have a bug!\n"; + } + { + no warnings 'once'; + # SVN::Node::none and SVN::Node::file are used only once, + # so we're shutting up Perl's warnings about them. + if ($t == $SVN::Node::none || defined($deletions->{$full_path})) { + return $self->add_directory($full_path, $baton, + undef, -1, $self->{pool}); + } elsif ($t == $SVN::Node::dir) { + return $self->open_directory($full_path, $baton, + $self->{r}, $self->{pool}); + } # no warnings 'once' + print STDERR "$full_path already exists in repository at ", + "r$self->{r} and it is not a directory (", + ($t == $SVN::Node::file ? 'file' : 'unknown'),"/$t)\n"; + } # no warnings 'once' + exit 1; +} + +sub ensure_path { + my ($self, $path, $deletions) = @_; + my $bat = $self->{bat}; + my $repo_path = $self->repo_path($path); + return $bat->{''} unless (length $repo_path); + + my @p = split m#/+#, $repo_path; + my $c = shift @p; + $bat->{$c} ||= $self->open_or_add_dir($c, $bat->{''}, $deletions); + while (@p) { + my $c0 = $c; + $c .= '/' . shift @p; + $bat->{$c} ||= $self->open_or_add_dir($c, $bat->{$c0}, $deletions); + } + return $bat->{$c}; +} + +# Subroutine to convert a globbing pattern to a regular expression. +# From perl cookbook. +sub glob2pat { + my $globstr = shift; + my %patmap = ('*' => '.*', '?' => '.', '[' => '[', ']' => ']'); + $globstr =~ s{(.)} { $patmap{$1} || "\Q$1" }ge; + return '^' . $globstr . '$'; +} + +sub check_autoprop { + my ($self, $pattern, $properties, $file, $fbat) = @_; + # Convert the globbing pattern to a regular expression. + my $regex = glob2pat($pattern); + # Check if the pattern matches the file name. + if($file =~ m/($regex)/) { + # Parse the list of properties to set. + my @props = split(/;/, $properties); + foreach my $prop (@props) { + # Parse 'name=value' syntax and set the property. + if ($prop =~ /([^=]+)=(.*)/) { + my ($n,$v) = ($1,$2); + for ($n, $v) { + s/^\s+//; s/\s+$//; + } + $self->change_file_prop($fbat, $n, $v); + } + } + } +} + +sub apply_autoprops { + my ($self, $file, $fbat) = @_; + my $conf_t = ${$self->{config}}{'config'}; + no warnings 'once'; + # Check [miscellany]/enable-auto-props in svn configuration. + if (SVN::_Core::svn_config_get_bool( + $conf_t, + $SVN::_Core::SVN_CONFIG_SECTION_MISCELLANY, + $SVN::_Core::SVN_CONFIG_OPTION_ENABLE_AUTO_PROPS, + 0)) { + # Auto-props are enabled. Enumerate them to look for matches. + my $callback = sub { + $self->check_autoprop($_[0], $_[1], $file, $fbat); + }; + SVN::_Core::svn_config_enumerate( + $conf_t, + $SVN::_Core::SVN_CONFIG_SECTION_AUTO_PROPS, + $callback); + } +} + +sub check_attr { + my ($attr,$path) = @_; + my $val = command_oneline("check-attr", $attr, "--", $path); + if ($val) { $val =~ s/^[^:]*:\s*[^:]*:\s*(.*)\s*$/$1/; } + return $val; +} + +sub apply_manualprops { + my ($self, $file, $fbat) = @_; + my $pending_properties = check_attr( "svn-properties", $file ); + if ($pending_properties eq "") { return; } + # Parse the list of properties to set. + my @props = split(/;/, $pending_properties); + # TODO: get existing properties to compare to + # - this fails for add so currently not done + # my $existing_props = ::get_svnprops($file); + my $existing_props = {}; + # TODO: caching svn properties or storing them in .gitattributes + # would make that faster + foreach my $prop (@props) { + # Parse 'name=value' syntax and set the property. + if ($prop =~ /([^=]+)=(.*)/) { + my ($n,$v) = ($1,$2); + for ($n, $v) { + s/^\s+//; s/\s+$//; + } + my $existing = $existing_props->{$n}; + if (!defined($existing) || $existing ne $v) { + $self->change_file_prop($fbat, $n, $v); + } + } + } +} + +sub A { + my ($self, $m, $deletions) = @_; + my ($dir, $file) = split_path($m->{file_b}); + my $pbat = $self->ensure_path($dir, $deletions); + my $fbat = $self->add_file($self->repo_path($m->{file_b}), $pbat, + undef, -1); + print "\tA\t$m->{file_b}\n" unless $::_q; + $self->apply_autoprops($file, $fbat); + $self->apply_manualprops($m->{file_b}, $fbat); + $self->chg_file($fbat, $m); + $self->close_file($fbat,undef,$self->{pool}); +} + +sub C { + my ($self, $m, $deletions) = @_; + my ($dir, $file) = split_path($m->{file_b}); + my $pbat = $self->ensure_path($dir, $deletions); + # workaround for a bug in svn serf backend (v1.8.5 and below): + # store third argument to ->add_file() in a local variable, to make it + # have the same lifetime as $fbat + my $upa = $self->url_path($m->{file_a}); + my $fbat = $self->add_file($self->repo_path($m->{file_b}), $pbat, + $upa, $self->{r}); + print "\tC\t$m->{file_a} => $m->{file_b}\n" unless $::_q; + $self->apply_manualprops($m->{file_b}, $fbat); + $self->chg_file($fbat, $m); + $self->close_file($fbat,undef,$self->{pool}); +} + +sub delete_entry { + my ($self, $path, $pbat) = @_; + my $rpath = $self->repo_path($path); + my ($dir, $file) = split_path($rpath); + $self->{rm}->{$dir} = 1; + $self->SUPER::delete_entry($rpath, $self->{r}, $pbat, $self->{pool}); +} + +sub R { + my ($self, $m, $deletions) = @_; + my ($dir, $file) = split_path($m->{file_b}); + my $pbat = $self->ensure_path($dir, $deletions); + # workaround for a bug in svn serf backend, see comment in C() above + my $upa = $self->url_path($m->{file_a}); + my $fbat = $self->add_file($self->repo_path($m->{file_b}), $pbat, + $upa, $self->{r}); + print "\tR\t$m->{file_a} => $m->{file_b}\n" unless $::_q; + $self->apply_autoprops($file, $fbat); + $self->apply_manualprops($m->{file_b}, $fbat); + $self->chg_file($fbat, $m); + $self->close_file($fbat,undef,$self->{pool}); + + ($dir, $file) = split_path($m->{file_a}); + $pbat = $self->ensure_path($dir, $deletions); + $self->delete_entry($m->{file_a}, $pbat); +} + +sub M { + my ($self, $m, $deletions) = @_; + my ($dir, $file) = split_path($m->{file_b}); + my $pbat = $self->ensure_path($dir, $deletions); + my $fbat = $self->open_file($self->repo_path($m->{file_b}), + $pbat,$self->{r},$self->{pool}); + print "\t$m->{chg}\t$m->{file_b}\n" unless $::_q; + $self->apply_manualprops($m->{file_b}, $fbat); + $self->chg_file($fbat, $m); + $self->close_file($fbat,undef,$self->{pool}); +} + +sub T { + my ($self, $m, $deletions) = @_; + + # Work around subversion issue 4091: toggling the "is a + # symlink" property requires removing and re-adding a + # file or else "svn up" on affected clients trips an + # assertion and aborts. + if (($m->{mode_b} =~ /^120/ && $m->{mode_a} !~ /^120/) || + ($m->{mode_b} !~ /^120/ && $m->{mode_a} =~ /^120/)) { + $self->D({ + mode_a => $m->{mode_a}, mode_b => '000000', + sha1_a => $m->{sha1_a}, sha1_b => '0' x 40, + chg => 'D', file_b => $m->{file_b} + }, $deletions); + $self->A({ + mode_a => '000000', mode_b => $m->{mode_b}, + sha1_a => '0' x 40, sha1_b => $m->{sha1_b}, + chg => 'A', file_b => $m->{file_b} + }, $deletions); + return; + } + + $self->M($m, $deletions); +} + +sub change_file_prop { + my ($self, $fbat, $pname, $pval) = @_; + $self->SUPER::change_file_prop($fbat, $pname, $pval, $self->{pool}); +} + +sub change_dir_prop { + my ($self, $pbat, $pname, $pval) = @_; + $self->SUPER::change_dir_prop($pbat, $pname, $pval, $self->{pool}); +} + +sub _chg_file_get_blob ($$$$) { + my ($self, $fbat, $m, $which) = @_; + my $fh = $::_repository->temp_acquire("git_blob_$which"); + if ($m->{"mode_$which"} =~ /^120/) { + print $fh 'link ' or croak $!; + $self->change_file_prop($fbat,'svn:special','*'); + } elsif ($m->{mode_a} =~ /^120/ && $m->{"mode_$which"} !~ /^120/) { + $self->change_file_prop($fbat,'svn:special',undef); + } + my $blob = $m->{"sha1_$which"}; + return ($fh,) if ($blob =~ /^0{40}$/); + my $size = $::_repository->cat_blob($blob, $fh); + croak "Failed to read object $blob" if ($size < 0); + $fh->flush == 0 or croak $!; + seek $fh, 0, 0 or croak $!; + + my $exp = ::md5sum($fh); + seek $fh, 0, 0 or croak $!; + return ($fh, $exp); +} + +sub chg_file { + my ($self, $fbat, $m) = @_; + if ($m->{mode_b} =~ /755$/ && $m->{mode_a} !~ /755$/) { + $self->change_file_prop($fbat,'svn:executable','*'); + } elsif ($m->{mode_b} !~ /755$/ && $m->{mode_a} =~ /755$/) { + $self->change_file_prop($fbat,'svn:executable',undef); + } + my ($fh_a, $exp_a) = _chg_file_get_blob $self, $fbat, $m, 'a'; + my ($fh_b, $exp_b) = _chg_file_get_blob $self, $fbat, $m, 'b'; + my $pool = SVN::Pool->new; + my $atd = $self->apply_textdelta($fbat, $exp_a, $pool); + if (-s $fh_a) { + my $txstream = SVN::TxDelta::new ($fh_a, $fh_b, $pool); + my $res = SVN::TxDelta::send_txstream($txstream, @$atd, $pool); + if (defined $res) { + die "Unexpected result from send_txstream: $res\n", + "(SVN::Core::VERSION: $SVN::Core::VERSION)\n"; + } + } else { + my $got = SVN::TxDelta::send_stream($fh_b, @$atd, $pool); + die "Checksum mismatch\nexpected: $exp_b\ngot: $got\n" + if ($got ne $exp_b); + } + Git::temp_release($fh_b, 1); + Git::temp_release($fh_a, 1); + $pool->clear; +} + +sub D { + my ($self, $m, $deletions) = @_; + my ($dir, $file) = split_path($m->{file_b}); + my $pbat = $self->ensure_path($dir, $deletions); + print "\tD\t$m->{file_b}\n" unless $::_q; + $self->delete_entry($m->{file_b}, $pbat); +} + +sub close_edit { + my ($self) = @_; + my ($p,$bat) = ($self->{pool}, $self->{bat}); + foreach (sort { $b =~ tr#/#/# <=> $a =~ tr#/#/# } keys %$bat) { + next if $_ eq ''; + $self->close_directory($bat->{$_}, $p); + } + $self->close_directory($bat->{''}, $p); + $self->SUPER::close_edit($p); + $p->clear; +} + +sub abort_edit { + my ($self) = @_; + $self->SUPER::abort_edit($self->{pool}); +} + +sub DESTROY { + my $self = shift; + $self->SUPER::DESTROY(@_); + $self->{pool}->clear; +} + +# this drives the editor +sub apply_diff { + my ($self) = @_; + my $mods = $self->{mods}; + my %o = ( D => 0, C => 1, R => 2, A => 3, M => 4, T => 5 ); + my %deletions; + + foreach my $m (@$mods) { + if ($m->{chg} eq "D") { + $deletions{$m->{file_b}} = 1; + } + } + + foreach my $m (sort { $o{$a->{chg}} <=> $o{$b->{chg}} } @$mods) { + my $f = $m->{chg}; + if (defined $o{$f}) { + $self->$f($m, \%deletions); + } else { + fatal("Invalid change type: $f"); + } + } + + if (defined($self->{mergeinfo})) { + $self->change_dir_prop($self->{bat}{''}, "svn:mergeinfo", + $self->{mergeinfo}); + } + $self->rmdirs if $_rmdir; + if (@$mods == 0 && !defined($self->{mergeinfo})) { + $self->abort_edit; + } else { + $self->close_edit; + } + return scalar @$mods; +} + +1; +__END__ + +=head1 NAME + +Git::SVN::Editor - commit driver for "git svn set-tree" and dcommit + +=head1 SYNOPSIS + + use Git::SVN::Editor; + use Git::SVN::Ra; + + my $ra = Git::SVN::Ra->new($url); + my %opts = ( + r => 19, + log => "log message", + ra => $ra, + config => SVN::Core::config_get_config($svn_config_dir), + tree_a => "$commit^", + tree_b => "$commit", + editor_cb => sub { print "Committed r$_[0]\n"; }, + mergeinfo => "/branches/foo:1-10", + svn_path => "trunk" + ); + Git::SVN::Editor->new(\%opts)->apply_diff or print "No changes\n"; + + my $re = Git::SVN::Editor::glob2pat("trunk/*"); + if ($branchname =~ /$re/) { + print "matched!\n"; + } + +=head1 DESCRIPTION + +This module is an implementation detail of the "git svn" command. +Do not use it unless you are developing git-svn. + +This module adapts the C object returned by +C and drives it to convey the +difference between two git tree objects to a remote Subversion +repository. + +The interface will change as git-svn evolves. + +=head1 DEPENDENCIES + +Subversion perl bindings, +the core L module, +and git's L helper module. + +C has not been tested using callers other than +B itself. + +=head1 SEE ALSO + +L, +L. + +=head1 INCOMPATIBILITIES + +None reported. + +=head1 BUGS + +None. diff --git a/perl/Git/SVN/Fetcher.pm b/perl/Git/SVN/Fetcher.pm new file mode 100644 index 000000000000..64e900a0e910 --- /dev/null +++ b/perl/Git/SVN/Fetcher.pm @@ -0,0 +1,622 @@ +package Git::SVN::Fetcher; +use vars qw/@ISA $_ignore_regex $_include_regex $_preserve_empty_dirs + $_placeholder_filename @deleted_gpath %added_placeholder + $repo_id/; +use strict; +use warnings; +use SVN::Delta; +use Carp qw/croak/; +use File::Basename qw/dirname/; +use Git qw/command command_oneline command_noisy command_output_pipe + command_input_pipe command_close_pipe + command_bidi_pipe command_close_bidi_pipe + get_record/; +BEGIN { + @ISA = qw(SVN::Delta::Editor); +} + +# file baton members: path, mode_a, mode_b, pool, fh, blob, base +sub new { + my ($class, $git_svn, $switch_path) = @_; + my $self = SVN::Delta::Editor->new; + bless $self, $class; + if (exists $git_svn->{last_commit}) { + $self->{c} = $git_svn->{last_commit}; + $self->{empty_symlinks} = + _mark_empty_symlinks($git_svn, $switch_path); + } + + # some options are read globally, but can be overridden locally + # per [svn-remote "..."] section. Command-line options will *NOT* + # override options set in an [svn-remote "..."] section + $repo_id = $git_svn->{repo_id}; + my $k = "svn-remote.$repo_id.ignore-paths"; + my $v = eval { command_oneline('config', '--get', $k) }; + $self->{ignore_regex} = $v; + + $k = "svn-remote.$repo_id.include-paths"; + $v = eval { command_oneline('config', '--get', $k) }; + $self->{include_regex} = $v; + + $k = "svn-remote.$repo_id.preserve-empty-dirs"; + $v = eval { command_oneline('config', '--get', '--bool', $k) }; + if ($v && $v eq 'true') { + $_preserve_empty_dirs = 1; + $k = "svn-remote.$repo_id.placeholder-filename"; + $v = eval { command_oneline('config', '--get', $k) }; + $_placeholder_filename = $v; + } + + # Load the list of placeholder files added during previous invocations. + $k = "svn-remote.$repo_id.added-placeholder"; + $v = eval { command_oneline('config', '--get-all', $k) }; + if ($_preserve_empty_dirs && $v) { + # command() prints errors to stderr, so we only call it if + # command_oneline() succeeded. + my @v = command('config', '--get-all', $k); + $added_placeholder{ dirname($_) } = $_ foreach @v; + } + + $self->{empty} = {}; + $self->{dir_prop} = {}; + $self->{file_prop} = {}; + $self->{absent_dir} = {}; + $self->{absent_file} = {}; + require Git::IndexInfo; + $self->{gii} = $git_svn->tmp_index_do(sub { Git::IndexInfo->new }); + $self->{pathnameencoding} = Git::config('svn.pathnameencoding'); + $self; +} + +# this uses the Ra object, so it must be called before do_{switch,update}, +# not inside them (when the Git::SVN::Fetcher object is passed) to +# do_{switch,update} +sub _mark_empty_symlinks { + my ($git_svn, $switch_path) = @_; + my $bool = Git::config_bool('svn.brokenSymlinkWorkaround'); + return {} if (!defined($bool)) || (defined($bool) && ! $bool); + + my %ret; + my ($rev, $cmt) = $git_svn->last_rev_commit; + return {} unless ($rev && $cmt); + + # allow the warning to be printed for each revision we fetch to + # ensure the user sees it. The user can also disable the workaround + # on the repository even while git svn is running and the next + # revision fetched will skip this expensive function. + my $printed_warning; + chomp(my $empty_blob = `git hash-object -t blob --stdin < /dev/null`); + my ($ls, $ctx) = command_output_pipe(qw/ls-tree -r -z/, $cmt); + my $pfx = defined($switch_path) ? $switch_path : $git_svn->path; + $pfx .= '/' if length($pfx); + while (defined($_ = get_record($ls, "\0"))) { + s/\A100644 blob $empty_blob\t//o or next; + unless ($printed_warning) { + print STDERR "Scanning for empty symlinks, ", + "this may take a while if you have ", + "many empty files\n", + "You may disable this with `", + "git config svn.brokenSymlinkWorkaround ", + "false'.\n", + "This may be done in a different ", + "terminal without restarting ", + "git svn\n"; + $printed_warning = 1; + } + my $path = $_; + my (undef, $props) = + $git_svn->ra->get_file($pfx.$path, $rev, undef); + if ($props->{'svn:special'}) { + $ret{$path} = 1; + } + } + command_close_pipe($ls, $ctx); + \%ret; +} + +# returns true if a given path is inside a ".git" directory +sub in_dot_git { + $_[0] =~ m{(?:^|/)\.git(?:/|$)}; +} + +# return value: 0 -- don't ignore, 1 -- ignore +# This will also check whether the path is explicitly included +sub is_path_ignored { + my ($self, $path) = @_; + return 1 if in_dot_git($path); + return 1 if defined($self->{ignore_regex}) && + $path =~ m!$self->{ignore_regex}!; + return 0 if defined($self->{include_regex}) && + $path =~ m!$self->{include_regex}!; + return 0 if defined($_include_regex) && + $path =~ m!$_include_regex!; + return 1 if defined($self->{include_regex}); + return 1 if defined($_include_regex); + return 0 unless defined($_ignore_regex); + return 1 if $path =~ m!$_ignore_regex!o; + return 0; +} + +sub set_path_strip { + my ($self, $path) = @_; + $self->{path_strip} = qr/^\Q$path\E(\/|$)/ if length $path; +} + +sub open_root { + { path => '' }; +} + +sub open_directory { + my ($self, $path, $pb, $rev) = @_; + { path => $path }; +} + +sub git_path { + my ($self, $path) = @_; + if (my $enc = $self->{pathnameencoding}) { + require Encode; + Encode::from_to($path, 'UTF-8', $enc); + } + if ($self->{path_strip}) { + $path =~ s!$self->{path_strip}!! or + die "Failed to strip path '$path' ($self->{path_strip})\n"; + } + $path; +} + +sub delete_entry { + my ($self, $path, $rev, $pb) = @_; + return undef if $self->is_path_ignored($path); + + my $gpath = $self->git_path($path); + return undef if ($gpath eq ''); + + # remove entire directories. + my ($tree) = (command('ls-tree', '-z', $self->{c}, "./$gpath") + =~ /\A040000 tree ([a-f\d]{40})\t\Q$gpath\E\0/); + if ($tree) { + my ($ls, $ctx) = command_output_pipe(qw/ls-tree + -r --name-only -z/, + $tree); + while (defined($_ = get_record($ls, "\0"))) { + my $rmpath = "$gpath/$_"; + $self->{gii}->remove($rmpath); + print "\tD\t$rmpath\n" unless $::_q; + } + print "\tD\t$gpath/\n" unless $::_q; + command_close_pipe($ls, $ctx); + } else { + $self->{gii}->remove($gpath); + print "\tD\t$gpath\n" unless $::_q; + } + # Don't add to @deleted_gpath if we're deleting a placeholder file. + push @deleted_gpath, $gpath unless $added_placeholder{dirname($path)}; + $self->{empty}->{$path} = 0; + undef; +} + +sub open_file { + my ($self, $path, $pb, $rev) = @_; + my ($mode, $blob); + + goto out if $self->is_path_ignored($path); + + my $gpath = $self->git_path($path); + ($mode, $blob) = (command('ls-tree', '-z', $self->{c}, "./$gpath") + =~ /\A(\d{6}) blob ([a-f\d]{40})\t\Q$gpath\E\0/); + unless (defined $mode && defined $blob) { + die "$path was not found in commit $self->{c} (r$rev)\n"; + } + if ($mode eq '100644' && $self->{empty_symlinks}->{$path}) { + $mode = '120000'; + } +out: + { path => $path, mode_a => $mode, mode_b => $mode, blob => $blob, + pool => SVN::Pool->new, action => 'M' }; +} + +sub add_file { + my ($self, $path, $pb, $cp_path, $cp_rev) = @_; + my $mode; + + if (!$self->is_path_ignored($path)) { + my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#); + delete $self->{empty}->{$dir}; + $mode = '100644'; + + if ($added_placeholder{$dir}) { + # Remove our placeholder file, if we created one. + delete_entry($self, $added_placeholder{$dir}) + unless $path eq $added_placeholder{$dir}; + delete $added_placeholder{$dir} + } + } + + { path => $path, mode_a => $mode, mode_b => $mode, + pool => SVN::Pool->new, action => 'A' }; +} + +sub add_directory { + my ($self, $path, $cp_path, $cp_rev) = @_; + goto out if $self->is_path_ignored($path); + my $gpath = $self->git_path($path); + if ($gpath eq '') { + my ($ls, $ctx) = command_output_pipe(qw/ls-tree + -r --name-only -z/, + $self->{c}); + while (defined($_ = get_record($ls, "\0"))) { + $self->{gii}->remove($_); + print "\tD\t$_\n" unless $::_q; + push @deleted_gpath, $gpath; + } + command_close_pipe($ls, $ctx); + $self->{empty}->{$path} = 0; + } + my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#); + delete $self->{empty}->{$dir}; + $self->{empty}->{$path} = 1; + + if ($added_placeholder{$dir}) { + # Remove our placeholder file, if we created one. + delete_entry($self, $added_placeholder{$dir}); + delete $added_placeholder{$dir} + } + +out: + { path => $path }; +} + +sub change_dir_prop { + my ($self, $db, $prop, $value) = @_; + return undef if $self->is_path_ignored($db->{path}); + $self->{dir_prop}->{$db->{path}} ||= {}; + $self->{dir_prop}->{$db->{path}}->{$prop} = $value; + undef; +} + +sub absent_directory { + my ($self, $path, $pb) = @_; + return undef if $self->is_path_ignored($path); + $self->{absent_dir}->{$pb->{path}} ||= []; + push @{$self->{absent_dir}->{$pb->{path}}}, $path; + undef; +} + +sub absent_file { + my ($self, $path, $pb) = @_; + return undef if $self->is_path_ignored($path); + $self->{absent_file}->{$pb->{path}} ||= []; + push @{$self->{absent_file}->{$pb->{path}}}, $path; + undef; +} + +sub change_file_prop { + my ($self, $fb, $prop, $value) = @_; + return undef if $self->is_path_ignored($fb->{path}); + if ($prop eq 'svn:executable') { + if ($fb->{mode_b} != 120000) { + $fb->{mode_b} = defined $value ? 100755 : 100644; + } + } elsif ($prop eq 'svn:special') { + $fb->{mode_b} = defined $value ? 120000 : 100644; + } else { + $self->{file_prop}->{$fb->{path}} ||= {}; + $self->{file_prop}->{$fb->{path}}->{$prop} = $value; + } + undef; +} + +sub apply_textdelta { + my ($self, $fb, $exp) = @_; + return undef if $self->is_path_ignored($fb->{path}); + my $suffix = 0; + ++$suffix while $::_repository->temp_is_locked("svn_delta_${$}_$suffix"); + my $fh = $::_repository->temp_acquire("svn_delta_${$}_$suffix"); + # $fh gets auto-closed() by SVN::TxDelta::apply(), + # (but $base does not,) so dup() it for reading in close_file + open my $dup, '<&', $fh or croak $!; + my $base = $::_repository->temp_acquire("git_blob_${$}_$suffix"); + # close_file may call temp_acquire on 'svn_hash', but because of the + # call chain, if the temp_acquire call from close_file ends up being the + # call that first creates the 'svn_hash' temp file, then the FileHandle + # that's created as a result will end up in an SVN::Pool that we clear + # in SVN::Ra::gs_fetch_loop_common. Avoid that by making sure the + # 'svn_hash' FileHandle is already created before close_file is called. + my $tmp_fh = $::_repository->temp_acquire('svn_hash'); + $::_repository->temp_release($tmp_fh, 1); + + if ($fb->{blob}) { + my ($base_is_link, $size); + + if ($fb->{mode_a} eq '120000' && + ! $self->{empty_symlinks}->{$fb->{path}}) { + print $base 'link ' or die "print $!\n"; + $base_is_link = 1; + } + retry: + $size = $::_repository->cat_blob($fb->{blob}, $base); + die "Failed to read object $fb->{blob}" if ($size < 0); + + if (defined $exp) { + seek $base, 0, 0 or croak $!; + my $got = ::md5sum($base); + if ($got ne $exp) { + my $err = "Checksum mismatch: ". + "$fb->{path} $fb->{blob}\n" . + "expected: $exp\n" . + " got: $got\n"; + if ($base_is_link) { + warn $err, + "Retrying... (possibly ", + "a bad symlink from SVN)\n"; + $::_repository->temp_reset($base); + $base_is_link = 0; + goto retry; + } + die $err; + } + } + } + seek $base, 0, 0 or croak $!; + $fb->{fh} = $fh; + $fb->{base} = $base; + [ SVN::TxDelta::apply($base, $dup, undef, $fb->{path}, $fb->{pool}) ]; +} + +sub close_file { + my ($self, $fb, $exp) = @_; + return undef if $self->is_path_ignored($fb->{path}); + + my $hash; + my $path = $self->git_path($fb->{path}); + if (my $fh = $fb->{fh}) { + if (defined $exp) { + seek($fh, 0, 0) or croak $!; + my $got = ::md5sum($fh); + if ($got ne $exp) { + die "Checksum mismatch: $path\n", + "expected: $exp\n got: $got\n"; + } + } + if ($fb->{mode_b} == 120000) { + sysseek($fh, 0, 0) or croak $!; + my $rd = sysread($fh, my $buf, 5); + + if (!defined $rd) { + croak "sysread: $!\n"; + } elsif ($rd == 0) { + warn "$path has mode 120000", + " but it points to nothing\n", + "converting to an empty file with mode", + " 100644\n"; + $fb->{mode_b} = '100644'; + } elsif ($buf ne 'link ') { + warn "$path has mode 120000", + " but is not a link\n"; + } else { + my $tmp_fh = $::_repository->temp_acquire( + 'svn_hash'); + my $res; + while ($res = sysread($fh, my $str, 1024)) { + my $out = syswrite($tmp_fh, $str, $res); + defined($out) && $out == $res + or croak("write ", + Git::temp_path($tmp_fh), + ": $!\n"); + } + defined $res or croak $!; + + ($fh, $tmp_fh) = ($tmp_fh, $fh); + Git::temp_release($tmp_fh, 1); + } + } + + $hash = $::_repository->hash_and_insert_object( + Git::temp_path($fh)); + $hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n"; + + Git::temp_release($fb->{base}, 1); + Git::temp_release($fh, 1); + } else { + $hash = $fb->{blob} or die "no blob information\n"; + } + $fb->{pool}->clear; + $self->{gii}->update($fb->{mode_b}, $hash, $path) or croak $!; + print "\t$fb->{action}\t$path\n" if $fb->{action} && ! $::_q; + undef; +} + +sub abort_edit { + my $self = shift; + $self->{nr} = $self->{gii}->{nr}; + delete $self->{gii}; + $self->SUPER::abort_edit(@_); +} + +sub close_edit { + my $self = shift; + + if ($_preserve_empty_dirs) { + my @empty_dirs; + + # Any entry flagged as empty that also has an associated + # dir_prop represents a newly created empty directory. + foreach my $i (keys %{$self->{empty}}) { + push @empty_dirs, $i if exists $self->{dir_prop}->{$i}; + } + + # Search for directories that have become empty due subsequent + # file deletes. + push @empty_dirs, $self->find_empty_directories(); + + # Finally, add a placeholder file to each empty directory. + $self->add_placeholder_file($_) foreach (@empty_dirs); + + $self->stash_placeholder_list(); + } + + $self->{git_commit_ok} = 1; + $self->{nr} = $self->{gii}->{nr}; + delete $self->{gii}; + $self->SUPER::close_edit(@_); +} + +sub find_empty_directories { + my ($self) = @_; + my @empty_dirs; + my %dirs = map { dirname($_) => 1 } @deleted_gpath; + + foreach my $dir (sort keys %dirs) { + next if $dir eq "."; + + # If there have been any additions to this directory, there is + # no reason to check if it is empty. + my $skip_added = 0; + foreach my $t (qw/dir_prop file_prop/) { + foreach my $path (keys %{ $self->{$t} }) { + if (exists $self->{$t}->{dirname($path)}) { + $skip_added = 1; + last; + } + } + last if $skip_added; + } + next if $skip_added; + + # Use `git ls-tree` to get the filenames of this directory + # that existed prior to this particular commit. + my $ls = command('ls-tree', '-z', '--name-only', + $self->{c}, "$dir/"); + my %files = map { $_ => 1 } split(/\0/, $ls); + + # Remove the filenames that were deleted during this commit. + delete $files{$_} foreach (@deleted_gpath); + + # Report the directory if there are no filenames left. + push @empty_dirs, $dir unless (scalar %files); + } + @empty_dirs; +} + +sub add_placeholder_file { + my ($self, $dir) = @_; + my $path = "$dir/$_placeholder_filename"; + my $gpath = $self->git_path($path); + + my $fh = $::_repository->temp_acquire($gpath); + my $hash = $::_repository->hash_and_insert_object(Git::temp_path($fh)); + Git::temp_release($fh, 1); + $self->{gii}->update('100644', $hash, $gpath) or croak $!; + + # The directory should no longer be considered empty. + delete $self->{empty}->{$dir} if exists $self->{empty}->{$dir}; + + # Keep track of any placeholder files we create. + $added_placeholder{$dir} = $path; +} + +sub stash_placeholder_list { + my ($self) = @_; + my $k = "svn-remote.$repo_id.added-placeholder"; + my $v = eval { command_oneline('config', '--get-all', $k) }; + command_noisy('config', '--unset-all', $k) if $v; + foreach (values %added_placeholder) { + command_noisy('config', '--add', $k, $_); + } +} + +1; +__END__ + +=head1 NAME + +Git::SVN::Fetcher - tree delta consumer for "git svn fetch" + +=head1 SYNOPSIS + + use SVN::Core; + use SVN::Ra; + use Git::SVN; + use Git::SVN::Fetcher; + use Git; + + my $gs = Git::SVN->find_by_url($url); + my $ra = SVN::Ra->new(url => $url); + my $editor = Git::SVN::Fetcher->new($gs); + my $reporter = $ra->do_update($SVN::Core::INVALID_REVNUM, '', + 1, $editor); + $reporter->set_path('', $old_rev, 0); + $reporter->finish_report; + my $tree = $gs->tmp_index_do(sub { command_oneline('write-tree') }); + + foreach my $path (keys %{$editor->{dir_prop}) { + my $props = $editor->{dir_prop}{$path}; + foreach my $prop (keys %$props) { + print "property $prop at $path changed to $props->{$prop}\n"; + } + } + foreach my $path (keys %{$editor->{empty}) { + my $action = $editor->{empty}{$path} ? 'added' : 'removed'; + print "empty directory $path $action\n"; + } + foreach my $path (keys %{$editor->{file_prop}) { ... } + foreach my $parent (keys %{$editor->{absent_dir}}) { + my @children = @{$editor->{abstent_dir}{$parent}}; + print "cannot fetch directory $parent/$_: not authorized?\n" + foreach @children; + } + foreach my $parent (keys %{$editor->{absent_file}) { ... } + +=head1 DESCRIPTION + +This is a subclass of C, which means it implements +callbacks to act as a consumer of Subversion tree deltas. This +particular implementation of those callbacks is meant to store +information about the resulting content which B could +use to populate new commits and new entries for F. +More specifically: + +=over + +=item * Additions, removals, and modifications of files are propagated +to git-svn's index file F<$GIT_DIR/svn/$refname/index> using +B. + +=item * Changes in Subversion path properties are recorded in the +C and C fields (which are hashes). + +=item * Addition and removal of empty directories are indicated by +entries with value 1 and 0 respectively in the C hash. + +=item * Paths that are present but cannot be conveyed (presumably due +to permissions) are recorded in the C and +C hashes. For each key, the corresponding value is +a list of paths under that directory that were present but +could not be conveyed. + +=back + +The interface is unstable. Do not use this module unless you are +developing git-svn. + +=head1 DEPENDENCIES + +L from the Subversion perl bindings, +the core L and L modules, +and git's L helper module. + +C has not been tested using callers other than +B itself. + +=head1 SEE ALSO + +L, +L. + +=head1 INCOMPATIBILITIES + +None reported. + +=head1 BUGS + +None. diff --git a/perl/Git/SVN/GlobSpec.pm b/perl/Git/SVN/GlobSpec.pm new file mode 100644 index 000000000000..a0a8d1762150 --- /dev/null +++ b/perl/Git/SVN/GlobSpec.pm @@ -0,0 +1,65 @@ +package Git::SVN::GlobSpec; +use strict; +use warnings; + +sub new { + my ($class, $glob, $pattern_ok) = @_; + my $re = $glob; + $re =~ s!/+$!!g; # no need for trailing slashes + my (@left, @right, @patterns); + my $state = "left"; + my $die_msg = "Only one set of wildcards " . + "(e.g. '*' or '*/*/*') is supported: $glob\n"; + for my $part (split(m|/|, $glob)) { + if ($pattern_ok && $part =~ /[{}]/ && + $part !~ /^\{[^{}]+\}/) { + die "Invalid pattern in '$glob': $part\n"; + } + my $nstars = $part =~ tr/*//; + if ($nstars > 1) { + die "Only one '*' is allowed in a pattern: '$part'\n"; + } + if ($part =~ /(.*)\*(.*)/) { + die $die_msg if $state eq "right"; + my ($l, $r) = ($1, $2); + $state = "pattern"; + my $pat = quotemeta($l) . '[^/]*' . quotemeta($r); + push(@patterns, $pat); + } elsif ($pattern_ok && $part =~ /^\{(.*)\}$/) { + die $die_msg if $state eq "right"; + $state = "pattern"; + my $p = quotemeta($1); + $p =~ s/\\,/|/g; + push(@patterns, "(?:$p)"); + } else { + if ($state eq "left") { + push(@left, $part); + } else { + push(@right, $part); + $state = "right"; + } + } + } + my $depth = @patterns; + if ($depth == 0) { + die "One '*' is needed in glob: '$glob'\n"; + } + my $left = join('/', @left); + my $right = join('/', @right); + $re = join('/', @patterns); + $re = join('\/', + grep(length, quotemeta($left), + "($re)(?=/|\$)", + quotemeta($right))); + my $left_re = qr/^\/\Q$left\E(\/|$)/; + bless { left => $left, right => $right, left_regex => $left_re, + regex => qr/$re/, glob => $glob, depth => $depth }, $class; +} + +sub full_path { + my ($self, $path) = @_; + return (length $self->{left} ? "$self->{left}/" : '') . + $path . (length $self->{right} ? "/$self->{right}" : ''); +} + +1; diff --git a/perl/Git/SVN/Log.pm b/perl/Git/SVN/Log.pm new file mode 100644 index 000000000000..664105357c00 --- /dev/null +++ b/perl/Git/SVN/Log.pm @@ -0,0 +1,400 @@ +package Git::SVN::Log; +use strict; +use warnings; +use Git::SVN::Utils qw(fatal); +use Git qw(command + command_oneline + command_output_pipe + command_close_pipe + get_tz_offset); +use POSIX qw/strftime/; +use constant commit_log_separator => ('-' x 72) . "\n"; +use vars qw/$TZ $limit $color $pager $non_recursive $verbose $oneline + %rusers $show_commit $incremental/; + +# Option set in git-svn +our $_git_format; + +sub cmt_showable { + my ($c) = @_; + return 1 if defined $c->{r}; + + # big commit message got truncated by the 16k pretty buffer in rev-list + if ($c->{l} && $c->{l}->[-1] eq "...\n" && + $c->{a_raw} =~ /\@([a-f\d\-]+)>$/) { + @{$c->{l}} = (); + my @log = command(qw/cat-file commit/, $c->{c}); + + # shift off the headers + shift @log while ($log[0] ne ''); + shift @log; + + # TODO: make $c->{l} not have a trailing newline in the future + @{$c->{l}} = map { "$_\n" } grep !/^git-svn-id: /, @log; + + (undef, $c->{r}, undef) = ::extract_metadata( + (grep(/^git-svn-id: /, @log))[-1]); + } + return defined $c->{r}; +} + +sub log_use_color { + return $color || Git->repository->get_colorbool('color.diff'); +} + +sub git_svn_log_cmd { + my ($r_min, $r_max, @args) = @_; + my $head = 'HEAD'; + my (@files, @log_opts); + foreach my $x (@args) { + if ($x eq '--' || @files) { + push @files, $x; + } else { + if (::verify_ref("$x^0")) { + $head = $x; + } else { + push @log_opts, $x; + } + } + } + + my ($url, $rev, $uuid, $gs) = ::working_head_info($head); + + require Git::SVN; + $gs ||= Git::SVN->_new; + my @cmd = (qw/log --abbrev-commit --pretty=raw --default/, + $gs->refname); + push @cmd, '-r' unless $non_recursive; + push @cmd, qw/--raw --name-status/ if $verbose; + push @cmd, '--color' if log_use_color(); + push @cmd, @log_opts; + if (defined $r_max && $r_max == $r_min) { + push @cmd, '--max-count=1'; + if (my $c = $gs->rev_map_get($r_max)) { + push @cmd, $c; + } + } elsif (defined $r_max) { + if ($r_max < $r_min) { + ($r_min, $r_max) = ($r_max, $r_min); + } + my (undef, $c_max) = $gs->find_rev_before($r_max, 1, $r_min); + my (undef, $c_min) = $gs->find_rev_after($r_min, 1, $r_max); + # If there are no commits in the range, both $c_max and $c_min + # will be undefined. If there is at least 1 commit in the + # range, both will be defined. + return () if !defined $c_min || !defined $c_max; + if ($c_min eq $c_max) { + push @cmd, '--max-count=1', $c_min; + } else { + push @cmd, '--boundary', "$c_min..$c_max"; + } + } + return (@cmd, @files); +} + +# adapted from pager.c +sub config_pager { + if (! -t *STDOUT) { + $ENV{GIT_PAGER_IN_USE} = 'false'; + $pager = undef; + return; + } + chomp($pager = command_oneline(qw(var GIT_PAGER))); + if ($pager eq 'cat') { + $pager = undef; + } + $ENV{GIT_PAGER_IN_USE} = defined($pager); +} + +sub run_pager { + return unless defined $pager; + pipe my ($rfd, $wfd) or return; + defined(my $pid = fork) or fatal "Can't fork: $!"; + if (!$pid) { + open STDOUT, '>&', $wfd or + fatal "Can't redirect to stdout: $!"; + return; + } + open STDIN, '<&', $rfd or fatal "Can't redirect stdin: $!"; + $ENV{LESS} ||= 'FRX'; + $ENV{LV} ||= '-c'; + exec $pager or fatal "Can't run pager: $! ($pager)"; +} + +sub format_svn_date { + my $t = shift || time; + require Git::SVN; + my $gmoff = get_tz_offset($t); + return strftime("%Y-%m-%d %H:%M:%S $gmoff (%a, %d %b %Y)", localtime($t)); +} + +sub parse_git_date { + my ($t, $tz) = @_; + # Date::Parse isn't in the standard Perl distro :( + if ($tz =~ s/^\+//) { + $t += tz_to_s_offset($tz); + } elsif ($tz =~ s/^\-//) { + $t -= tz_to_s_offset($tz); + } + return $t; +} + +sub set_local_timezone { + if (defined $TZ) { + $ENV{TZ} = $TZ; + } else { + delete $ENV{TZ}; + } +} + +sub tz_to_s_offset { + my ($tz) = @_; + $tz =~ s/(\d\d)$//; + return ($1 * 60) + ($tz * 3600); +} + +sub get_author_info { + my ($dest, $author, $t, $tz) = @_; + $author =~ s/(?:^\s*|\s*$)//g; + $dest->{a_raw} = $author; + my $au; + if ($::_authors) { + $au = $rusers{$author} || undef; + } + if (!$au) { + ($au) = ($author =~ /<([^>]+)\@[^>]+>$/); + } + $dest->{t} = $t; + $dest->{tz} = $tz; + $dest->{a} = $au; + $dest->{t_utc} = parse_git_date($t, $tz); +} + +sub process_commit { + my ($c, $r_min, $r_max, $defer) = @_; + if (defined $r_min && defined $r_max) { + if ($r_min == $c->{r} && $r_min == $r_max) { + show_commit($c); + return 0; + } + return 1 if $r_min == $r_max; + if ($r_min < $r_max) { + # we need to reverse the print order + return 0 if (defined $limit && --$limit < 0); + push @$defer, $c; + return 1; + } + if ($r_min != $r_max) { + return 1 if ($r_min < $c->{r}); + return 1 if ($r_max > $c->{r}); + } + } + return 0 if (defined $limit && --$limit < 0); + show_commit($c); + return 1; +} + +my $l_fmt; +sub show_commit { + my $c = shift; + if ($oneline) { + my $x = "\n"; + if (my $l = $c->{l}) { + while ($l->[0] =~ /^\s*$/) { shift @$l } + $x = $l->[0]; + } + $l_fmt ||= 'A' . length($c->{r}); + print 'r',pack($l_fmt, $c->{r}),' | '; + print "$c->{c} | " if $show_commit; + print $x; + } else { + show_commit_normal($c); + } +} + +sub show_commit_changed_paths { + my ($c) = @_; + return unless $c->{changed}; + print "Changed paths:\n", @{$c->{changed}}; +} + +sub show_commit_normal { + my ($c) = @_; + print commit_log_separator, "r$c->{r} | "; + print "$c->{c} | " if $show_commit; + print "$c->{a} | ", format_svn_date($c->{t_utc}), ' | '; + my $nr_line = 0; + + if (my $l = $c->{l}) { + while ($l->[$#$l] eq "\n" && $#$l > 0 + && $l->[($#$l - 1)] eq "\n") { + pop @$l; + } + $nr_line = scalar @$l; + if (!$nr_line) { + print "1 line\n\n\n"; + } else { + if ($nr_line == 1) { + $nr_line = '1 line'; + } else { + $nr_line .= ' lines'; + } + print $nr_line, "\n"; + show_commit_changed_paths($c); + print "\n"; + print $_ foreach @$l; + } + } else { + print "1 line\n"; + show_commit_changed_paths($c); + print "\n"; + + } + foreach my $x (qw/raw stat diff/) { + if ($c->{$x}) { + print "\n"; + print $_ foreach @{$c->{$x}} + } + } +} + +sub cmd_show_log { + my (@args) = @_; + my ($r_min, $r_max); + my $r_last = -1; # prevent dupes + set_local_timezone(); + if (defined $::_revision) { + if ($::_revision =~ /^(\d+):(\d+)$/) { + ($r_min, $r_max) = ($1, $2); + } elsif ($::_revision =~ /^\d+$/) { + $r_min = $r_max = $::_revision; + } else { + fatal "-r$::_revision is not supported, use ", + "standard 'git log' arguments instead"; + } + } + + config_pager(); + @args = git_svn_log_cmd($r_min, $r_max, @args); + if (!@args) { + print commit_log_separator unless $incremental || $oneline; + return; + } + my $log = command_output_pipe(@args); + run_pager(); + my (@k, $c, $d, $stat); + my $esc_color = qr/(?:\033\[(?:(?:\d+;)*\d*)?m)*/; + while (<$log>) { + if (/^${esc_color}commit (?:- )?($::sha1_short)/o) { + my $cmt = $1; + if ($c && cmt_showable($c) && $c->{r} != $r_last) { + $r_last = $c->{r}; + process_commit($c, $r_min, $r_max, \@k) or + goto out; + } + $d = undef; + $c = { c => $cmt }; + } elsif (/^${esc_color}author (.+) (\d+) ([\-\+]?\d+)$/o) { + get_author_info($c, $1, $2, $3); + } elsif (/^${esc_color}(?:tree|parent|committer) /o) { + # ignore + } elsif (/^${esc_color}:\d{6} \d{6} $::sha1_short/o) { + push @{$c->{raw}}, $_; + } elsif (/^${esc_color}[ACRMDT]\t/) { + # we could add $SVN->{svn_path} here, but that requires + # remote access at the moment (repo_path_split)... + s#^(${esc_color})([ACRMDT])\t#$1 $2 #o; + push @{$c->{changed}}, $_; + } elsif (/^${esc_color}diff /o) { + $d = 1; + push @{$c->{diff}}, $_; + } elsif ($d) { + push @{$c->{diff}}, $_; + } elsif (/^\ .+\ \|\s*\d+\ $esc_color[\+\-]* + $esc_color*[\+\-]*$esc_color$/x) { + $stat = 1; + push @{$c->{stat}}, $_; + } elsif ($stat && /^ \d+ files changed, \d+ insertions/) { + push @{$c->{stat}}, $_; + $stat = undef; + } elsif (/^${esc_color} (git-svn-id:.+)$/o) { + ($c->{url}, $c->{r}, undef) = ::extract_metadata($1); + } elsif (s/^${esc_color} //o) { + push @{$c->{l}}, $_; + } + } + if ($c && defined $c->{r} && $c->{r} != $r_last) { + $r_last = $c->{r}; + process_commit($c, $r_min, $r_max, \@k); + } + if (@k) { + ($r_min, $r_max) = ($r_max, $r_min); + process_commit($_, $r_min, $r_max) foreach reverse @k; + } +out: + close $log; + print commit_log_separator unless $incremental || $oneline; +} + +sub cmd_blame { + my $path = pop; + + config_pager(); + run_pager(); + + my ($fh, $ctx, $rev); + + if ($_git_format) { + ($fh, $ctx) = command_output_pipe('blame', @_, $path); + while (my $line = <$fh>) { + if ($line =~ /^\^?([[:xdigit:]]+)\s/) { + # Uncommitted edits show up as a rev ID of + # all zeros, which we can't look up with + # cmt_metadata + if ($1 !~ /^0+$/) { + (undef, $rev, undef) = + ::cmt_metadata($1); + $rev = '0' if (!$rev); + } else { + $rev = '0'; + } + $rev = sprintf('%-10s', $rev); + $line =~ s/^\^?[[:xdigit:]]+(\s)/$rev$1/; + } + print $line; + } + } else { + ($fh, $ctx) = command_output_pipe('blame', '-p', @_, 'HEAD', + '--', $path); + my ($sha1); + my %authors; + my @buffer; + my %dsha; #distinct sha keys + + while (my $line = <$fh>) { + push @buffer, $line; + if ($line =~ /^([[:xdigit:]]{40})\s\d+\s\d+/) { + $dsha{$1} = 1; + } + } + + my $s2r = ::cmt_sha2rev_batch([keys %dsha]); + + foreach my $line (@buffer) { + if ($line =~ /^([[:xdigit:]]{40})\s\d+\s\d+/) { + $rev = $s2r->{$1}; + $rev = '0' if (!$rev) + } + elsif ($line =~ /^author (.*)/) { + $authors{$rev} = $1; + $authors{$rev} =~ s/\s/_/g; + } + elsif ($line =~ /^\t(.*)$/) { + printf("%6s %10s %s\n", $rev, $authors{$rev}, $1); + } + } + } + command_close_pipe($fh, $ctx); +} + +1; diff --git a/perl/Git/SVN/Memoize/YAML.pm b/perl/Git/SVN/Memoize/YAML.pm new file mode 100644 index 000000000000..9676b8f2f735 --- /dev/null +++ b/perl/Git/SVN/Memoize/YAML.pm @@ -0,0 +1,93 @@ +package Git::SVN::Memoize::YAML; +use warnings; +use strict; +use YAML::Any (); + +# based on Memoize::Storable. + +sub TIEHASH { + my $package = shift; + my $filename = shift; + my $truehash = (-e $filename) ? YAML::Any::LoadFile($filename) : {}; + my $self = {FILENAME => $filename, H => $truehash}; + bless $self => $package; +} + +sub STORE { + my $self = shift; + $self->{H}{$_[0]} = $_[1]; +} + +sub FETCH { + my $self = shift; + $self->{H}{$_[0]}; +} + +sub EXISTS { + my $self = shift; + exists $self->{H}{$_[0]}; +} + +sub DESTROY { + my $self = shift; + YAML::Any::DumpFile($self->{FILENAME}, $self->{H}); +} + +sub SCALAR { + my $self = shift; + scalar(%{$self->{H}}); +} + +sub FIRSTKEY { + 'Fake hash from Git::SVN::Memoize::YAML'; +} + +sub NEXTKEY { + undef; +} + +1; +__END__ + +=head1 NAME + +Git::SVN::Memoize::YAML - store Memoized data in YAML format + +=head1 SYNOPSIS + + use Memoize; + use Git::SVN::Memoize::YAML; + + tie my %cache => 'Git::SVN::Memoize::YAML', $filename; + memoize('slow_function', SCALAR_CACHE => [HASH => \%cache]); + slow_function(arguments); + +=head1 DESCRIPTION + +This module provides a class that can be used to tie a hash to a +YAML file. The file is read when the hash is initialized and +rewritten when the hash is destroyed. + +The intent is to allow L to back its cache with a file in +YAML format, just like L allows L to +back its cache with a file in Storable format. Unlike the Storable +format, the YAML format is platform-independent and fairly stable. + +Carps on error. + +=head1 DIAGNOSTICS + +See L. + +=head1 DEPENDENCIES + +L from CPAN. + +=head1 INCOMPATIBILITIES + +None reported. + +=head1 BUGS + +The entire cache is read into a Perl hash when loading the file, +so this is not very scalable. diff --git a/perl/Git/SVN/Migration.pm b/perl/Git/SVN/Migration.pm new file mode 100644 index 000000000000..dc90f6a62142 --- /dev/null +++ b/perl/Git/SVN/Migration.pm @@ -0,0 +1,265 @@ +package Git::SVN::Migration; +# these version numbers do NOT correspond to actual version numbers +# of git or git-svn. They are just relative. +# +# v0 layout: .git/$id/info/url, refs/heads/$id-HEAD +# +# v1 layout: .git/$id/info/url, refs/remotes/$id +# +# v2 layout: .git/svn/$id/info/url, refs/remotes/$id +# +# v3 layout: .git/svn/$id, refs/remotes/$id +# - info/url may remain for backwards compatibility +# - this is what we migrate up to this layout automatically, +# - this will be used by git svn init on single branches +# v3.1 layout (auto migrated): +# - .rev_db => .rev_db.$UUID, .rev_db will remain as a symlink +# for backwards compatibility +# +# v4 layout: .git/svn/$repo_id/$id, refs/remotes/$repo_id/$id +# - this is only created for newly multi-init-ed +# repositories. Similar in spirit to the +# --use-separate-remotes option in git-clone (now default) +# - we do not automatically migrate to this (following +# the example set by core git) +# +# v5 layout: .rev_db.$UUID => .rev_map.$UUID +# - newer, more-efficient format that uses 24-bytes per record +# with no filler space. +# - use xxd -c24 < .rev_map.$UUID to view and debug +# - This is a one-way migration, repositories updated to the +# new format will not be able to use old git-svn without +# rebuilding the .rev_db. Rebuilding the rev_db is not +# possible if noMetadata or useSvmProps are set; but should +# be no problem for users that use the (sensible) defaults. +use strict; +use warnings; +use Carp qw/croak/; +use File::Path qw/mkpath/; +use File::Basename qw/dirname basename/; + +our $_minimize; +use Git qw( + command + command_noisy + command_output_pipe + command_close_pipe + command_oneline +); +use Git::SVN; + +sub migrate_from_v0 { + my $git_dir = $ENV{GIT_DIR}; + return undef unless -d $git_dir; + my ($fh, $ctx) = command_output_pipe(qw/rev-parse --symbolic --all/); + my $migrated = 0; + while (<$fh>) { + chomp; + my ($id, $orig_ref) = ($_, $_); + next unless $id =~ s#^refs/heads/(.+)-HEAD$#$1#; + my $info_url = command_oneline(qw(rev-parse --git-path), + "$id/info/url"); + next unless -f $info_url; + my $new_ref = "refs/remotes/$id"; + if (::verify_ref("$new_ref^0")) { + print STDERR "W: $orig_ref is probably an old ", + "branch used by an ancient version of ", + "git-svn.\n", + "However, $new_ref also exists.\n", + "We will not be able ", + "to use this branch until this ", + "ambiguity is resolved.\n"; + next; + } + print STDERR "Migrating from v0 layout...\n" if !$migrated; + print STDERR "Renaming ref: $orig_ref => $new_ref\n"; + command_noisy('update-ref', $new_ref, $orig_ref); + command_noisy('update-ref', '-d', $orig_ref, $orig_ref); + $migrated++; + } + command_close_pipe($fh, $ctx); + print STDERR "Done migrating from v0 layout...\n" if $migrated; + $migrated; +} + +sub migrate_from_v1 { + my $git_dir = $ENV{GIT_DIR}; + my $migrated = 0; + return $migrated unless -d $git_dir; + my $svn_dir = Git::SVN::svn_dir(); + + # just in case somebody used 'svn' as their $id at some point... + return $migrated if -d $svn_dir && ! -f "$svn_dir/info/url"; + + print STDERR "Migrating from a git-svn v1 layout...\n"; + mkpath([$svn_dir]); + print STDERR "Data from a previous version of git-svn exists, but\n\t", + "$svn_dir\n\t(required for this version ", + "($::VERSION) of git-svn) does not exist.\n"; + my ($fh, $ctx) = command_output_pipe(qw/rev-parse --symbolic --all/); + while (<$fh>) { + my $x = $_; + next unless $x =~ s#^refs/remotes/##; + chomp $x; + my $info_url = command_oneline(qw(rev-parse --git-path), + "$x/info/url"); + next unless -f $info_url; + my $u = eval { ::file_to_s($info_url) }; + next unless $u; + my $dn = dirname("$svn_dir/$x"); + mkpath([$dn]) unless -d $dn; + if ($x eq 'svn') { # they used 'svn' as GIT_SVN_ID: + mkpath(["$svn_dir/svn"]); + print STDERR " - $git_dir/$x/info => ", + "$svn_dir/$x/info\n"; + rename "$git_dir/$x/info", "$svn_dir/$x/info" or + croak "$!: $x"; + # don't worry too much about these, they probably + # don't exist with repos this old (save for index, + # and we can easily regenerate that) + foreach my $f (qw/unhandled.log index .rev_db/) { + rename "$git_dir/$x/$f", "$svn_dir/$x/$f"; + } + } else { + print STDERR " - $git_dir/$x => $svn_dir/$x\n"; + rename "$git_dir/$x", "$svn_dir/$x" or croak "$!: $x"; + } + $migrated++; + } + command_close_pipe($fh, $ctx); + print STDERR "Done migrating from a git-svn v1 layout\n"; + $migrated; +} + +sub read_old_urls { + my ($l_map, $pfx, $path) = @_; + my @dir; + foreach (<$path/*>) { + if (-r "$_/info/url") { + $pfx .= '/' if $pfx && $pfx !~ m!/$!; + my $ref_id = $pfx . basename $_; + my $url = ::file_to_s("$_/info/url"); + $l_map->{$ref_id} = $url; + } elsif (-d $_) { + push @dir, $_; + } + } + my $svn_dir = Git::SVN::svn_dir(); + foreach (@dir) { + my $x = $_; + $x =~ s!^\Q$svn_dir\E/!!o; + read_old_urls($l_map, $x, $_); + } +} + +sub migrate_from_v2 { + my @cfg = command(qw/config -l/); + return if grep /^svn-remote\..+\.url=/, @cfg; + my %l_map; + read_old_urls(\%l_map, '', Git::SVN::svn_dir()); + my $migrated = 0; + + require Git::SVN; + foreach my $ref_id (sort keys %l_map) { + eval { Git::SVN->init($l_map{$ref_id}, '', undef, $ref_id) }; + if ($@) { + Git::SVN->init($l_map{$ref_id}, '', $ref_id, $ref_id); + } + $migrated++; + } + $migrated; +} + +sub minimize_connections { + require Git::SVN; + require Git::SVN::Ra; + + my $r = Git::SVN::read_all_remotes(); + my $new_urls = {}; + my $root_repos = {}; + foreach my $repo_id (keys %$r) { + my $url = $r->{$repo_id}->{url} or next; + my $fetch = $r->{$repo_id}->{fetch} or next; + my $ra = Git::SVN::Ra->new($url); + + # skip existing cases where we already connect to the root + if (($ra->url eq $ra->{repos_root}) || + ($ra->{repos_root} eq $repo_id)) { + $root_repos->{$ra->url} = $repo_id; + next; + } + + my $root_ra = Git::SVN::Ra->new($ra->{repos_root}); + my $root_path = $ra->url; + $root_path =~ s#^\Q$ra->{repos_root}\E(/|$)##; + foreach my $path (keys %$fetch) { + my $ref_id = $fetch->{$path}; + my $gs = Git::SVN->new($ref_id, $repo_id, $path); + + # make sure we can read when connecting to + # a higher level of a repository + my ($last_rev, undef) = $gs->last_rev_commit; + if (!defined $last_rev) { + $last_rev = eval { + $root_ra->get_latest_revnum; + }; + next if $@; + } + my $new = $root_path; + $new .= length $path ? "/$path" : ''; + eval { + $root_ra->get_log([$new], $last_rev, $last_rev, + 0, 0, 1, sub { }); + }; + next if $@; + $new_urls->{$ra->{repos_root}}->{$new} = + { ref_id => $ref_id, + old_repo_id => $repo_id, + old_path => $path }; + } + } + + my @emptied; + foreach my $url (keys %$new_urls) { + # see if we can re-use an existing [svn-remote "repo_id"] + # instead of creating a(n ugly) new section: + my $repo_id = $root_repos->{$url} || $url; + + my $fetch = $new_urls->{$url}; + foreach my $path (keys %$fetch) { + my $x = $fetch->{$path}; + Git::SVN->init($url, $path, $repo_id, $x->{ref_id}); + my $pfx = "svn-remote.$x->{old_repo_id}"; + + my $old_fetch = quotemeta("$x->{old_path}:". + "$x->{ref_id}"); + command_noisy(qw/config --unset/, + "$pfx.fetch", '^'. $old_fetch . '$'); + delete $r->{$x->{old_repo_id}}-> + {fetch}->{$x->{old_path}}; + if (!keys %{$r->{$x->{old_repo_id}}->{fetch}}) { + command_noisy(qw/config --unset/, + "$pfx.url"); + push @emptied, $x->{old_repo_id} + } + } + } + if (@emptied) { + my $file = $ENV{GIT_CONFIG} || + command_oneline(qw(rev-parse --git-path config)); + print STDERR <flush; + } + $cred->username($default_username); + } else { + username($cred, $realm, $may_save, $pool); + } + $cred->password(_read_password("Password for '" . + $cred->username . "': ", $realm)); + $cred->may_save($may_save); + $SVN::_Core::SVN_NO_ERROR; +} + +sub ssl_server_trust { + my ($cred, $realm, $failures, $cert_info, $may_save, $pool) = @_; + $may_save = undef if $_no_auth_cache; + print STDERR "Error validating server certificate for '$realm':\n"; + { + no warnings 'once'; + # All variables SVN::Auth::SSL::* are used only once, + # so we're shutting up Perl warnings about this. + if ($failures & $SVN::Auth::SSL::UNKNOWNCA) { + print STDERR " - The certificate is not issued ", + "by a trusted authority. Use the\n", + " fingerprint to validate ", + "the certificate manually!\n"; + } + if ($failures & $SVN::Auth::SSL::CNMISMATCH) { + print STDERR " - The certificate hostname ", + "does not match.\n"; + } + if ($failures & $SVN::Auth::SSL::NOTYETVALID) { + print STDERR " - The certificate is not yet valid.\n"; + } + if ($failures & $SVN::Auth::SSL::EXPIRED) { + print STDERR " - The certificate has expired.\n"; + } + if ($failures & $SVN::Auth::SSL::OTHER) { + print STDERR " - The certificate has ", + "an unknown error.\n"; + } + } # no warnings 'once' + printf STDERR + "Certificate information:\n". + " - Hostname: %s\n". + " - Valid: from %s until %s\n". + " - Issuer: %s\n". + " - Fingerprint: %s\n", + map $cert_info->$_, qw(hostname valid_from valid_until + issuer_dname fingerprint); + my $choice; +prompt: + my $options = $may_save ? + "(R)eject, accept (t)emporarily or accept (p)ermanently? " : + "(R)eject or accept (t)emporarily? "; + STDERR->flush; + $choice = lc(substr(Git::prompt("Certificate problem.\n" . $options) || 'R', 0, 1)); + if ($choice eq 't') { + $cred->may_save(undef); + } elsif ($choice eq 'r') { + return -1; + } elsif ($may_save && $choice eq 'p') { + $cred->may_save($may_save); + } else { + goto prompt; + } + $cred->accepted_failures($failures); + $SVN::_Core::SVN_NO_ERROR; +} + +sub ssl_client_cert { + my ($cred, $realm, $may_save, $pool) = @_; + $may_save = undef if $_no_auth_cache; + print STDERR "Client certificate filename: "; + STDERR->flush; + chomp(my $filename = ); + $cred->cert_file($filename); + $cred->may_save($may_save); + $SVN::_Core::SVN_NO_ERROR; +} + +sub ssl_client_cert_pw { + my ($cred, $realm, $may_save, $pool) = @_; + $may_save = undef if $_no_auth_cache; + $cred->password(_read_password("Password: ", $realm)); + $cred->may_save($may_save); + $SVN::_Core::SVN_NO_ERROR; +} + +sub username { + my ($cred, $realm, $may_save, $pool) = @_; + $may_save = undef if $_no_auth_cache; + if (defined $realm && length $realm) { + print STDERR "Authentication realm: $realm\n"; + } + my $username; + if (defined $_username) { + $username = $_username; + } else { + $username = Git::prompt("Username: "); + } + $cred->username($username); + $cred->may_save($may_save); + $SVN::_Core::SVN_NO_ERROR; +} + +sub _read_password { + my ($prompt, $realm) = @_; + my $password = Git::prompt($prompt, 1); + $password; +} + +1; +__END__ + +=head1 NAME + +Git::SVN::Prompt - authentication callbacks for git-svn + +=head1 SYNOPSIS + + use Git::SVN::Prompt qw(simple ssl_client_cert ssl_client_cert_pw + ssl_server_trust username); + use SVN::Client (); + + my $cached_simple = SVN::Client::get_simple_provider(); + my $git_simple = SVN::Client::get_simple_prompt_provider(\&simple, 2); + my $cached_ssl = SVN::Client::get_ssl_server_trust_file_provider(); + my $git_ssl = SVN::Client::get_ssl_server_trust_prompt_provider( + \&ssl_server_trust); + my $cached_cert = SVN::Client::get_ssl_client_cert_file_provider(); + my $git_cert = SVN::Client::get_ssl_client_cert_prompt_provider( + \&ssl_client_cert, 2); + my $cached_cert_pw = SVN::Client::get_ssl_client_cert_pw_file_provider(); + my $git_cert_pw = SVN::Client::get_ssl_client_cert_pw_prompt_provider( + \&ssl_client_cert_pw, 2); + my $cached_username = SVN::Client::get_username_provider(); + my $git_username = SVN::Client::get_username_prompt_provider( + \&username, 2); + + my $ctx = new SVN::Client( + auth => [ + $cached_simple, $git_simple, + $cached_ssl, $git_ssl, + $cached_cert, $git_cert, + $cached_cert_pw, $git_cert_pw, + $cached_username, $git_username + ]); + +=head1 DESCRIPTION + +This module is an implementation detail of the "git svn" command. +It implements git-svn's authentication policy. Do not use it unless +you are developing git-svn. + +The interface will change as git-svn evolves. + +=head1 DEPENDENCIES + +L. + +=head1 SEE ALSO + +L. + +=head1 INCOMPATIBILITIES + +None reported. + +=head1 BUGS + +None. diff --git a/perl/Git/SVN/Ra.pm b/perl/Git/SVN/Ra.pm new file mode 100644 index 000000000000..56ad9870bcfd --- /dev/null +++ b/perl/Git/SVN/Ra.pm @@ -0,0 +1,708 @@ +package Git::SVN::Ra; +use vars qw/@ISA $config_dir $_ignore_refs_regex $_log_window_size/; +use strict; +use warnings; +use Memoize; +use Git::SVN::Utils qw( + canonicalize_url + canonicalize_path + add_path_to_url +); + +use SVN::Ra; +BEGIN { + @ISA = qw(SVN::Ra); +} + +my ($ra_invalid, $can_do_switch, %ignored_err, $RA); + +BEGIN { + # enforce temporary pool usage for some simple functions + no strict 'refs'; + for my $f (qw/rev_proplist get_latest_revnum get_uuid get_repos_root + get_file/) { + my $SUPER = "SUPER::$f"; + *$f = sub { + my $self = shift; + my $pool = SVN::Pool->new; + my @ret = $self->$SUPER(@_,$pool); + $pool->clear; + wantarray ? @ret : $ret[0]; + }; + } +} + +# serf has a bug that leads to a coredump upon termination if the +# remote access object is left around (not fixed yet in serf 1.3.1). +# Explicitly free it to work around the issue. +END { + $RA = undef; + $ra_invalid = 1; +} + +sub _auth_providers () { + require SVN::Client; + my @rv = ( + SVN::Client::get_simple_provider(), + SVN::Client::get_ssl_server_trust_file_provider(), + SVN::Client::get_simple_prompt_provider( + \&Git::SVN::Prompt::simple, 2), + SVN::Client::get_ssl_client_cert_file_provider(), + SVN::Client::get_ssl_client_cert_prompt_provider( + \&Git::SVN::Prompt::ssl_client_cert, 2), + SVN::Client::get_ssl_client_cert_pw_file_provider(), + SVN::Client::get_ssl_client_cert_pw_prompt_provider( + \&Git::SVN::Prompt::ssl_client_cert_pw, 2), + SVN::Client::get_username_provider(), + SVN::Client::get_ssl_server_trust_prompt_provider( + \&Git::SVN::Prompt::ssl_server_trust), + SVN::Client::get_username_prompt_provider( + \&Git::SVN::Prompt::username, 2) + ); + + # earlier 1.6.x versions would segfault, and <= 1.5.x didn't have + # this function + if (::compare_svn_version('1.6.15') >= 0) { + my $config = SVN::Core::config_get_config($config_dir); + my ($p, @a); + # config_get_config returns all config files from + # ~/.subversion, auth_get_platform_specific_client_providers + # just wants the config "file". + @a = ($config->{'config'}, undef); + $p = SVN::Core::auth_get_platform_specific_client_providers(@a); + # Insert the return value from + # auth_get_platform_specific_providers + unshift @rv, @$p; + } + \@rv; +} + +sub prepare_config_once { + SVN::_Core::svn_config_ensure($config_dir, undef); + my ($baton, $callbacks) = SVN::Core::auth_open_helper(_auth_providers); + my $config = SVN::Core::config_get_config($config_dir); + my $conf_t = $config->{'config'}; + + no warnings 'once'; + # The usage of $SVN::_Core::SVN_CONFIG_* variables + # produces warnings that variables are used only once. + # I had not found the better way to shut them up, so + # the warnings of type 'once' are disabled in this block. + if (SVN::_Core::svn_config_get_bool($conf_t, + $SVN::_Core::SVN_CONFIG_SECTION_AUTH, + $SVN::_Core::SVN_CONFIG_OPTION_STORE_PASSWORDS, + 1) == 0) { + my $val = '1'; + if (::compare_svn_version('1.9.0') < 0) { # pre-SVN r1553823 + my $dont_store_passwords = 1; + $val = bless \$dont_store_passwords, "_p_void"; + } + SVN::_Core::svn_auth_set_parameter($baton, + $SVN::_Core::SVN_AUTH_PARAM_DONT_STORE_PASSWORDS, + $val); + } + if (SVN::_Core::svn_config_get_bool($conf_t, + $SVN::_Core::SVN_CONFIG_SECTION_AUTH, + $SVN::_Core::SVN_CONFIG_OPTION_STORE_AUTH_CREDS, + 1) == 0) { + $Git::SVN::Prompt::_no_auth_cache = 1; + } + + return ($config, $baton, $callbacks); +} # no warnings 'once' + +INIT { + Memoize::memoize '_auth_providers'; + Memoize::memoize 'prepare_config_once'; +} + +sub new { + my ($class, $url) = @_; + $url = canonicalize_url($url); + return $RA if ($RA && $RA->url eq $url); + + ::_req_svn(); + + $RA = undef; + my ($config, $baton, $callbacks) = prepare_config_once(); + my $self = SVN::Ra->new(url => $url, auth => $baton, + config => $config, + pool => SVN::Pool->new, + auth_provider_callbacks => $callbacks); + $RA = bless $self, $class; + + # Make sure its canonicalized + $self->url($url); + $self->{svn_path} = $url; + $self->{repos_root} = $self->get_repos_root; + $self->{svn_path} =~ s#^\Q$self->{repos_root}\E(/|$)##; + $self->{cache} = { check_path => { r => 0, data => {} }, + get_dir => { r => 0, data => {} } }; + + return $RA; +} + +sub url { + my $self = shift; + + if (@_) { + my $url = shift; + $self->{url} = canonicalize_url($url); + return; + } + + return $self->{url}; +} + +sub check_path { + my ($self, $path, $r) = @_; + my $cache = $self->{cache}->{check_path}; + if ($r == $cache->{r} && exists $cache->{data}->{$path}) { + return $cache->{data}->{$path}; + } + my $pool = SVN::Pool->new; + my $t = $self->SUPER::check_path($path, $r, $pool); + $pool->clear; + if ($r != $cache->{r}) { + %{$cache->{data}} = (); + $cache->{r} = $r; + } + $cache->{data}->{$path} = $t; +} + +sub get_dir { + my ($self, $dir, $r) = @_; + my $cache = $self->{cache}->{get_dir}; + if ($r == $cache->{r}) { + if (my $x = $cache->{data}->{$dir}) { + return wantarray ? @$x : $x->[0]; + } + } + my $pool = SVN::Pool->new; + my ($d, undef, $props); + + if (::compare_svn_version('1.4.0') >= 0) { + # n.b. in addition to being potentially more efficient, + # this works around what appears to be a bug in some + # SVN 1.8 versions + my $kind = 1; # SVN_DIRENT_KIND + ($d, undef, $props) = $self->get_dir2($dir, $r, $kind, $pool); + } else { + ($d, undef, $props) = $self->SUPER::get_dir($dir, $r, $pool); + } + my %dirents = map { $_ => { kind => $d->{$_}->kind } } keys %$d; + $pool->clear; + if ($r != $cache->{r}) { + %{$cache->{data}} = (); + $cache->{r} = $r; + } + $cache->{data}->{$dir} = [ \%dirents, $r, $props ]; + wantarray ? (\%dirents, $r, $props) : \%dirents; +} + +# get_log(paths, start, end, limit, +# discover_changed_paths, strict_node_history, receiver) +sub get_log { + my ($self, @args) = @_; + my $pool = SVN::Pool->new; + + # svn_log_changed_path_t objects passed to get_log are likely to be + # overwritten even if only the refs are copied to an external variable, + # so we should dup the structures in their entirety. Using an + # externally passed pool (instead of our temporary and quickly cleared + # pool in Git::SVN::Ra) does not help matters at all... + my $receiver = pop @args; + my $prefix = "/".$self->{svn_path}; + $prefix =~ s#/+($)##; + my $prefix_regex = qr#^\Q$prefix\E#; + push(@args, sub { + my ($paths) = $_[0]; + return &$receiver(@_) unless $paths; + $_[0] = (); + foreach my $p (keys %$paths) { + my $i = $paths->{$p}; + # Make path relative to our url, not repos_root + $p =~ s/$prefix_regex//; + my %s = map { $_ => $i->$_; } + qw/copyfrom_path copyfrom_rev action/; + if ($s{'copyfrom_path'}) { + $s{'copyfrom_path'} =~ s/$prefix_regex//; + $s{'copyfrom_path'} = canonicalize_path($s{'copyfrom_path'}); + } + $_[0]{$p} = \%s; + } + &$receiver(@_); + }); + + + # the limit parameter was not supported in SVN 1.1.x, so we + # drop it. Therefore, the receiver callback passed to it + # is made aware of this limitation by being wrapped if + # the limit passed to is being wrapped. + if (::compare_svn_version('1.2.0') <= 0) { + my $limit = splice(@args, 3, 1); + if ($limit > 0) { + my $receiver = pop @args; + push(@args, sub { &$receiver(@_) if (--$limit >= 0) }); + } + } + my $ret = $self->SUPER::get_log(@args, $pool); + $pool->clear; + $ret; +} + +# uncommon, only for ancient SVN (<= 1.4.2) +sub trees_match { + require IO::File; + require SVN::Client; + my ($self, $url1, $rev1, $url2, $rev2) = @_; + my $ctx = SVN::Client->new(auth => _auth_providers); + my $out = IO::File->new_tmpfile; + + # older SVN (1.1.x) doesn't take $pool as the last parameter for + # $ctx->diff(), so we'll create a default one + my $pool = SVN::Pool->new_default_sub; + + $ra_invalid = 1; # this will open a new SVN::Ra connection to $url1 + $ctx->diff([], $url1, $rev1, $url2, $rev2, 1, 1, 0, $out, $out); + $out->flush; + my $ret = (($out->stat)[7] == 0); + close $out or croak $!; + + $ret; +} + +sub get_commit_editor { + my ($self, $log, $cb, $pool) = @_; + + my @lock = (::compare_svn_version('1.2.0') >= 0) ? (undef, 0) : (); + $self->SUPER::get_commit_editor($log, $cb, @lock, $pool); +} + +sub gs_do_update { + my ($self, $rev_a, $rev_b, $gs, $editor) = @_; + my $new = ($rev_a == $rev_b); + my $path = $gs->path; + + if ($new && -e $gs->{index}) { + unlink $gs->{index} or die + "Couldn't unlink index: $gs->{index}: $!\n"; + } + my $pool = SVN::Pool->new; + $editor->set_path_strip($path); + my (@pc) = split m#/#, $path; + my $reporter = $self->do_update($rev_b, (@pc ? shift @pc : ''), + 1, $editor, $pool); + my @lock = (::compare_svn_version('1.2.0') >= 0) ? (undef) : (); + + # Since we can't rely on svn_ra_reparent being available, we'll + # just have to do some magic with set_path to make it so + # we only want a partial path. + my $sp = ''; + my $final = join('/', @pc); + while (@pc) { + $reporter->set_path($sp, $rev_b, 0, @lock, $pool); + $sp .= '/' if length $sp; + $sp .= shift @pc; + } + die "BUG: '$sp' != '$final'\n" if ($sp ne $final); + + $reporter->set_path($sp, $rev_a, $new, @lock, $pool); + + $reporter->finish_report($pool); + $pool->clear; + $editor->{git_commit_ok}; +} + +# this requires SVN 1.4.3 or later (do_switch didn't work before 1.4.3, and +# svn_ra_reparent didn't work before 1.4) +sub gs_do_switch { + my ($self, $rev_a, $rev_b, $gs, $url_b, $editor) = @_; + my $path = $gs->path; + my $pool = SVN::Pool->new; + + my $old_url = $self->url; + my $full_url = add_path_to_url( $self->url, $path ); + my ($ra, $reparented); + + if ($old_url =~ m#^svn(\+\w+)?://# || + ($full_url =~ m#^https?://# && + canonicalize_url($full_url) ne $full_url)) { + $_[0] = undef; + $self = undef; + $RA = undef; + $ra = Git::SVN::Ra->new($full_url); + $ra_invalid = 1; + } elsif ($old_url ne $full_url) { + SVN::_Ra::svn_ra_reparent( + $self->{session}, + canonicalize_url($full_url), + $pool + ); + $self->url($full_url); + $reparented = 1; + } + + $ra ||= $self; + $url_b = canonicalize_url($url_b); + my $reporter = $ra->do_switch($rev_b, '', 1, $url_b, $editor, $pool); + my @lock = (::compare_svn_version('1.2.0') >= 0) ? (undef) : (); + $reporter->set_path('', $rev_a, 0, @lock, $pool); + $reporter->finish_report($pool); + + if ($reparented) { + SVN::_Ra::svn_ra_reparent($self->{session}, $old_url, $pool); + $self->url($old_url); + } + + $pool->clear; + $editor->{git_commit_ok}; +} + +sub longest_common_path { + my ($gsv, $globs) = @_; + my %common; + my $common_max = scalar @$gsv; + + foreach my $gs (@$gsv) { + my @tmp = split m#/#, $gs->path; + my $p = ''; + foreach (@tmp) { + $p .= length($p) ? "/$_" : $_; + $common{$p} ||= 0; + $common{$p}++; + } + } + $globs ||= []; + $common_max += scalar @$globs; + foreach my $glob (@$globs) { + my @tmp = split m#/#, $glob->{path}->{left}; + my $p = ''; + foreach (@tmp) { + $p .= length($p) ? "/$_" : $_; + $common{$p} ||= 0; + $common{$p}++; + } + } + + my $longest_path = ''; + foreach (sort {length $b <=> length $a} keys %common) { + if ($common{$_} == $common_max) { + $longest_path = $_; + last; + } + } + $longest_path; +} + +sub gs_fetch_loop_common { + my ($self, $base, $head, $gsv, $globs) = @_; + return if ($base > $head); + # Make sure the cat_blob open2 FileHandle is created before calling + # SVN::Pool::new_default so that it does not incorrectly end up in the pool. + $::_repository->_open_cat_blob_if_needed; + my $gpool = SVN::Pool->new_default; + my $ra_url = $self->url; + my $reload_ra = sub { + $_[0] = undef; + $self = undef; + $RA = undef; + $gpool->clear; + $self = Git::SVN::Ra->new($ra_url); + $ra_invalid = undef; + }; + my $inc = $_log_window_size; + my ($min, $max) = ($base, $head < $base + $inc ? $head : $base + $inc); + my $longest_path = longest_common_path($gsv, $globs); + my $find_trailing_edge; + while (1) { + my %revs; + my $err; + my $err_handler = $SVN::Error::handler; + $SVN::Error::handler = sub { + ($err) = @_; + skip_unknown_revs($err); + }; + sub _cb { + my ($paths, $r, $author, $date, $log) = @_; + [ $paths, + { author => $author, date => $date, log => $log } ]; + } + $self->get_log([$longest_path], $min, $max, 0, 1, 1, + sub { $revs{$_[1]} = _cb(@_) }); + if ($err) { + print "Checked through r$max\r"; + } else { + $find_trailing_edge = 1; + } + if ($err and $find_trailing_edge) { + print STDERR "Path '$longest_path' ", + "was probably deleted:\n", + $err->expanded_message, + "\nWill attempt to follow ", + "revisions r$min .. r$max ", + "committed before the deletion\n"; + my $hi = $max; + while (--$hi >= $min) { + my $ok; + $self->get_log([$longest_path], $min, $hi, + 0, 1, 1, sub { + $ok = $_[1]; + $revs{$_[1]} = _cb(@_) }); + if ($ok) { + print STDERR "r$min .. r$ok OK\n"; + last; + } + } + $find_trailing_edge = 0; + } + $SVN::Error::handler = $err_handler; + + my %exists = map { $_->path => $_ } @$gsv; + foreach my $r (sort {$a <=> $b} keys %revs) { + my ($paths, $logged) = @{delete $revs{$r}}; + + foreach my $gs ($self->match_globs(\%exists, $paths, + $globs, $r)) { + if ($gs->rev_map_max >= $r) { + next; + } + next unless $gs->match_paths($paths, $r); + $gs->{logged_rev_props} = $logged; + if (my $last_commit = $gs->last_commit) { + $gs->assert_index_clean($last_commit); + } + my $log_entry = $gs->do_fetch($paths, $r); + if ($log_entry) { + $gs->do_git_commit($log_entry); + } + $Git::SVN::INDEX_FILES{$gs->{index}} = 1; + } + foreach my $g (@$globs) { + my $k = "svn-remote.$g->{remote}." . + "$g->{t}-maxRev"; + Git::SVN::tmp_config($k, $r); + } + $reload_ra->() if $ra_invalid; + } + # pre-fill the .rev_db since it'll eventually get filled in + # with '0' x40 if something new gets committed + foreach my $gs (@$gsv) { + next if $gs->rev_map_max >= $max; + next if defined $gs->rev_map_get($max); + $gs->rev_map_set($max, 0 x40); + } + foreach my $g (@$globs) { + my $k = "svn-remote.$g->{remote}.$g->{t}-maxRev"; + Git::SVN::tmp_config($k, $max); + } + last if $max >= $head; + $min = $max + 1; + $max += $inc; + $max = $head if ($max > $head); + + $reload_ra->(); + } + Git::SVN::gc(); +} + +sub get_dir_globbed { + my ($self, $left, $depth, $r) = @_; + + my @x = eval { $self->get_dir($left, $r) }; + return unless scalar @x == 3; + my $dirents = $x[0]; + my @finalents; + foreach my $de (keys %$dirents) { + next if $dirents->{$de}->{kind} != $SVN::Node::dir; + if ($depth > 1) { + my @args = ("$left/$de", $depth - 1, $r); + foreach my $dir ($self->get_dir_globbed(@args)) { + push @finalents, "$de/$dir"; + } + } else { + push @finalents, $de; + } + } + @finalents; +} + +# return value: 0 -- don't ignore, 1 -- ignore +sub is_ref_ignored { + my ($g, $p) = @_; + my $refname = $g->{ref}->full_path($p); + return 1 if defined($g->{ignore_refs_regex}) && + $refname =~ m!$g->{ignore_refs_regex}!; + return 0 unless defined($_ignore_refs_regex); + return 1 if $refname =~ m!$_ignore_refs_regex!o; + return 0; +} + +sub match_globs { + my ($self, $exists, $paths, $globs, $r) = @_; + + sub get_dir_check { + my ($self, $exists, $g, $r) = @_; + + my @dirs = $self->get_dir_globbed($g->{path}->{left}, + $g->{path}->{depth}, + $r); + + foreach my $de (@dirs) { + my $p = $g->{path}->full_path($de); + next if $exists->{$p}; + next if (length $g->{path}->{right} && + ($self->check_path($p, $r) != + $SVN::Node::dir)); + next unless $p =~ /$g->{path}->{regex}/; + $exists->{$p} = Git::SVN->init($self->url, $p, undef, + $g->{ref}->full_path($de), 1); + } + } + foreach my $g (@$globs) { + if (my $path = $paths->{"/$g->{path}->{left}"}) { + if ($path->{action} =~ /^[AR]$/) { + get_dir_check($self, $exists, $g, $r); + } + } + foreach (keys %$paths) { + if (/$g->{path}->{left_regex}/ && + !/$g->{path}->{regex}/) { + next if $paths->{$_}->{action} !~ /^[AR]$/; + get_dir_check($self, $exists, $g, $r); + } + next unless /$g->{path}->{regex}/; + my $p = $1; + my $pathname = $g->{path}->full_path($p); + next if is_ref_ignored($g, $p); + next if $exists->{$pathname}; + next if ($self->check_path($pathname, $r) != + $SVN::Node::dir); + $exists->{$pathname} = Git::SVN->init( + $self->url, $pathname, undef, + $g->{ref}->full_path($p), 1); + } + my $c = ''; + foreach (split m#/#, $g->{path}->{left}) { + $c .= "/$_"; + next unless ($paths->{$c} && + ($paths->{$c}->{action} =~ /^[AR]$/)); + get_dir_check($self, $exists, $g, $r); + } + } + values %$exists; +} + +sub minimize_url { + my ($self) = @_; + return $self->url if ($self->url eq $self->{repos_root}); + my $url = $self->{repos_root}; + my @components = split(m!/!, $self->{svn_path}); + my $c = ''; + do { + $url = add_path_to_url($url, $c); + eval { + my $ra = (ref $self)->new($url); + my $latest = $ra->get_latest_revnum; + $ra->get_log("", $latest, 0, 1, 0, 1, sub {}); + }; + } while ($@ && defined($c = shift @components)); + + return canonicalize_url($url); +} + +sub can_do_switch { + my $self = shift; + unless (defined $can_do_switch) { + my $pool = SVN::Pool->new; + my $rep = eval { + $self->do_switch(1, '', 0, $self->url, + SVN::Delta::Editor->new, $pool); + }; + if ($@) { + $can_do_switch = 0; + } else { + $rep->abort_report($pool); + $can_do_switch = 1; + } + $pool->clear; + } + $can_do_switch; +} + +sub skip_unknown_revs { + my ($err) = @_; + my $errno = $err->apr_err(); + # Maybe the branch we're tracking didn't + # exist when the repo started, so it's + # not an error if it doesn't, just continue + # + # Wonderfully consistent library, eh? + # 160013 - svn:// and file:// + # 175002 - http(s):// + # 175007 - http(s):// (this repo required authorization, too...) + # More codes may be discovered later... + if ($errno == 175007 || $errno == 175002 || $errno == 160013) { + my $err_key = $err->expanded_message; + # revision numbers change every time, filter them out + $err_key =~ s/\d+/\0/g; + $err_key = "$errno\0$err_key"; + unless ($ignored_err{$err_key}) { + warn "W: Ignoring error from SVN, path probably ", + "does not exist: ($errno): ", + $err->expanded_message,"\n"; + warn "W: Do not be alarmed at the above message ", + "git-svn is just searching aggressively for ", + "old history.\n", + "This may take a while on large repositories\n"; + $ignored_err{$err_key} = 1; + } + return; + } + die "Error from SVN, ($errno): ", $err->expanded_message,"\n"; +} + +1; +__END__ + +=head1 NAME + +Git::SVN::Ra - Subversion remote access functions for git-svn + +=head1 SYNOPSIS + + use Git::SVN::Ra; + + my $ra = Git::SVN::Ra->new($branchurl); + my ($dirents, $fetched_revnum, $props) = + $ra->get_dir('.', $SVN::Core::INVALID_REVNUM); + +=head1 DESCRIPTION + +This is a wrapper around the L module for use by B. +It fills in some default parameters (such as the authentication +scheme), smooths over incompatibilities between libsvn versions, adds +caching, and implements some functions specific to B. + +Do not use it unless you are developing git-svn. The interface will +change as git-svn evolves. + +=head1 DEPENDENCIES + +Subversion perl bindings, +L. + +C has not been tested using callers other than +B itself. + +=head1 SEE ALSO + +L. + +=head1 INCOMPATIBILITIES + +None reported. + +=head1 BUGS + +None. diff --git a/perl/Git/SVN/Utils.pm b/perl/Git/SVN/Utils.pm new file mode 100644 index 000000000000..3d1a0933a2eb --- /dev/null +++ b/perl/Git/SVN/Utils.pm @@ -0,0 +1,232 @@ +package Git::SVN::Utils; + +use strict; +use warnings; + +use SVN::Core; + +use base qw(Exporter); + +our @EXPORT_OK = qw( + fatal + can_compress + canonicalize_path + canonicalize_url + join_paths + add_path_to_url +); + + +=head1 NAME + +Git::SVN::Utils - utility functions used across Git::SVN + +=head1 SYNOPSIS + + use Git::SVN::Utils qw(functions to import); + +=head1 DESCRIPTION + +This module contains functions which are useful across many different +parts of Git::SVN. Mostly it's a place to put utility functions +rather than duplicate the code or have classes grabbing at other +classes. + +=head1 FUNCTIONS + +All functions can be imported only on request. + +=head3 fatal + + fatal(@message); + +Display a message and exit with a fatal error code. + +=cut + +# Note: not certain why this is in use instead of die. Probably because +# the exit code of die is 255? Doesn't appear to be used consistently. +sub fatal (@) { print STDERR "@_\n"; exit 1 } + + +=head3 can_compress + + my $can_compress = can_compress; + +Returns true if Compress::Zlib is available, false otherwise. + +=cut + +my $can_compress; +sub can_compress { + return $can_compress if defined $can_compress; + + return $can_compress = eval { require Compress::Zlib; }; +} + + +=head3 canonicalize_path + + my $canoncalized_path = canonicalize_path($path); + +Converts $path into a canonical form which is safe to pass to the SVN +API as a file path. + +=cut + +# Turn foo/../bar into bar +sub _collapse_dotdot { + my $path = shift; + + 1 while $path =~ s{/[^/]+/+\.\.}{}; + 1 while $path =~ s{[^/]+/+\.\./}{}; + 1 while $path =~ s{[^/]+/+\.\.}{}; + + return $path; +} + + +sub canonicalize_path { + my $path = shift; + my $rv; + + # The 1.7 way to do it + if ( defined &SVN::_Core::svn_dirent_canonicalize ) { + $path = _collapse_dotdot($path); + $rv = SVN::_Core::svn_dirent_canonicalize($path); + } + # The 1.6 way to do it + # This can return undef on subversion-perl-1.4.2-2.el5 (CentOS 5.2) + elsif ( defined &SVN::_Core::svn_path_canonicalize ) { + $path = _collapse_dotdot($path); + $rv = SVN::_Core::svn_path_canonicalize($path); + } + + return $rv if defined $rv; + + # No SVN API canonicalization is available, or the SVN API + # didn't return a successful result, do it ourselves + return _canonicalize_path_ourselves($path); +} + + +sub _canonicalize_path_ourselves { + my ($path) = @_; + my $dot_slash_added = 0; + if (substr($path, 0, 1) ne "/") { + $path = "./" . $path; + $dot_slash_added = 1; + } + $path =~ s#/+#/#g; + $path =~ s#/\.(?:/|$)#/#g; + $path = _collapse_dotdot($path); + $path =~ s#/$##g; + $path =~ s#^\./## if $dot_slash_added; + $path =~ s#^\.$##; + return $path; +} + + +=head3 canonicalize_url + + my $canonicalized_url = canonicalize_url($url); + +Converts $url into a canonical form which is safe to pass to the SVN +API as a URL. + +=cut + +sub canonicalize_url { + my $url = shift; + + # The 1.7 way to do it + if ( defined &SVN::_Core::svn_uri_canonicalize ) { + return SVN::_Core::svn_uri_canonicalize($url); + } + # There wasn't a 1.6 way to do it, so we do it ourself. + else { + return _canonicalize_url_ourselves($url); + } +} + + +sub _canonicalize_url_path { + my ($uri_path) = @_; + + my @parts; + foreach my $part (split m{/+}, $uri_path) { + $part =~ s/([^!\$%&'()*+,.\/\w:=\@_`~-]|%(?![a-fA-F0-9]{2}))/sprintf("%%%02X",ord($1))/eg; + push @parts, $part; + } + + return join('/', @parts); +} + +sub _canonicalize_url_ourselves { + my ($url) = @_; + if ($url =~ m#^([^:]+)://([^/]*)(.*)$#) { + my ($scheme, $domain, $uri) = ($1, $2, _canonicalize_url_path(canonicalize_path($3))); + $url = "$scheme://$domain$uri"; + } + $url; +} + + +=head3 join_paths + + my $new_path = join_paths(@paths); + +Appends @paths together into a single path. Any empty paths are ignored. + +=cut + +sub join_paths { + my @paths = @_; + + @paths = grep { defined $_ && length $_ } @paths; + + return '' unless @paths; + return $paths[0] if @paths == 1; + + my $new_path = shift @paths; + $new_path =~ s{/+$}{}; + + my $last_path = pop @paths; + $last_path =~ s{^/+}{}; + + for my $path (@paths) { + $path =~ s{^/+}{}; + $path =~ s{/+$}{}; + $new_path .= "/$path"; + } + + return $new_path .= "/$last_path"; +} + + +=head3 add_path_to_url + + my $new_url = add_path_to_url($url, $path); + +Appends $path onto the $url. If $path is empty, $url is returned unchanged. + +=cut + +sub add_path_to_url { + my($url, $path) = @_; + + return $url if !defined $path or !length $path; + + # Strip trailing and leading slashes so we don't + # wind up with http://x.com///path + $url =~ s{/+$}{}; + $path =~ s{^/+}{}; + + # If a path has a % in it, URI escape it so it's not + # mistaken for a URI escape later. + $path =~ s{%}{%25}g; + + return join '/', $url, $path; +} + +1; -- cgit 1.4.1