#!/usr/bin/perl -w $ID = q$Id: mvto,v 1.15 2006-12-03 22:02:25 eagle Exp $; # # mvto -- Move an AFS volume from anywhere, intelligently. # # Written by Russ Allbery # Based on code by Neil Crellin # Copyright 1998, 1999, 2001, 2003, 2004, 2005, 2006 # Board of Trustees, Leland Stanford Jr. University # # This program is free software; you may redistribute it and/or modify it # under the same terms as Perl itself. # # A smart vos move, or a vos move that assumes you know what you're doing, # depending on your point of view. mvto parses the output of vos examine # for a volume to figure out where it is and then puts it where you want it. # It also supports replicated volumes, and is able to figure out how the # replication pattern of a volume differs from what you want it to be and # correct it. ############################################################################## # Modules and declarations ############################################################################## require 5.004; use strict; use vars qw($ID $JUSTPRINT $THRESHOLD $VOS); use subs qw(system); use Date::Parse qw(str2time); use Getopt::Long qw(GetOptions); # The threshold up to which mvto is willing to fill a partition, expressed as # a ratio of its total available space. $THRESHOLD = '0.90'; # Find a usable version of vos. ($VOS) = grep { -x $_ } qw(/usr/bin/vos /usr/pubsw/bin/vos); $VOS ||= '/usr/pubsw/bin/vos'; ############################################################################## # Overrides ############################################################################## # We override system to honor the global $JUSTPRINT variable. It otherwise # works the same way as system normally does. sub system { if ($JUSTPRINT) { print "@_\n"; return 0; } else { CORE::system (@_) == 0 or die "$0: @_ failed (status " . ($? >> 8) . ")\n"; } } ############################################################################## # AFS information ############################################################################## # Given a server name and a partition, fully qualify both and return them as a # list of ($server, $partition). Accepts - as the partition to pick the least # loaded partition on that server, or a list of letters or letter ranges to # pick the least loaded of the partitions on the server from that range. sub findpartition { my ($server, $part) = @_; $server = 'afssvr' . $server if ($server =~ /^\d+$/); $part =~ s%^/?vicep%%; if ($part eq '.' || (length ($part) > 1 && $part =~ /^[a-z-]+$/)) { $part = 'a-z' if $part eq '.'; open (INFO, "$VOS partinfo $server |") or die "$0: can't fork: $!\n"; my @free; local $_; while () { if (m%^Free space on partition (/vicep[$part]): (\d+) K %) { push (@free, [ $1, $2 ]); } elsif (m%^Free space on partition (/vicep.)%) { next; } else { die "$0: vos partinfo said $_"; } } @free = sort { $$b[1] <=> $$a[1] } @free; $part = $free[0][0]; } else { $part = "/vicep$part"; } die "$0: invalid partition $part\n" if ($part !~ m%^/vicep[a-z]$%); return ($server, $part); } # Get the available space on one particular AFS partition, used as a safeguard # to make sure that we don't move a volume to somewhere where there isn't # sufficient space. Returns the space available before the partition reaches # $THRESHOLD. sub available_space { my ($server, $partition) = @_; return if $THRESHOLD >= 1.0; open (INFO, "$VOS partinfo $server $partition |") or die "$0: can't fork: $!\n"; local $_; while () { if (m%^Free space on \S+ \S+: (\d+) K blocks out of total (\d+)%) { my ($free, $total) = ($1, $2); close INFO; return ($free - $total * (1 - $THRESHOLD)); } else { die "$0: vos partinfo said $_"; } } die "$0: no valid output from vos partinfo\n"; } # Given a volume name, determines various characteristics of the volume and # returns them in a hash. 'size' gets the volume size in KB, 'rwserver' and # 'rwpart' get the server and partition for the read-write volume, 'ro' gets a # hash of server and partition values for the replicas, 'sites' gets a count # of the number of sites the volume is replicated on, and 'unreleased' gets a # boolean value saying whether there are unreleased changes. sub volinfo { my ($volume, $checkro) = @_; my (%results, $rotime, $rwtime); open (VEX, "$VOS examine $volume |") or die "$0: can't fork $VOS examine: $!\n"; local $_; $results{sites} = 0; while () { if (/^\Q$volume\E\s+\d+ (RW|RO|BK)\s+(\d+) K\s+On-line\s*$/) { die "$0: $volume is $1, not RW\n" unless $1 eq 'RW'; $results{size} = $2; } elsif (/^\s+server ([^.\s]+)\.\S+ partition (\S+) RW Site\s*/) { die "$0: saw two RW sites for $volume\n" if $results{rwserver}; $results{rwserver} = $1; $results{rwpart} = $2; } elsif (/^\s+server ([^.\s]+)\.\S+ partition (\S+) RO Site\s*/) { $results{ro}{$1} = $2; $results{sites}++; } elsif (/^\s+Last Update (.*)/) { my $tmp = $1; $rwtime = str2time($tmp); } } close VEX; die "$0: unable to parse vos examine $volume\n" unless ($results{rwserver} && $results{size}); if ($results{sites}) { open (VEX, "$VOS examine $volume.readonly |") or die "$0: can't fork $VOS examine for readonly: $!\n"; while () { if (/^\s+Last Update (.*)/) { my $tmp = $1; $rotime = str2time($tmp); } } close VEX; if ($rwtime > $rotime) { $results{unreleased} = 1 } } return %results; } ############################################################################## # Information display ############################################################################## # Print information about a volume so that the user can see what's going to be # moved. Takes the volume name and a reference to the hash of volume # information. sub print_volinfo { my ($volume, $volinfo) = @_; print "$volume on $$volinfo{rwserver} $$volinfo{rwpart}" . " ($$volinfo{size} KB)"; print " with unreleased changes" if $$volinfo{unreleased}; print "\n"; for (keys %{ $$volinfo{ro} }) { print " replica on $_ $$volinfo{ro}{$_}\n"; } print "\n"; } ############################################################################## # Volume moving ############################################################################## # Return a command to add a site for a volume on a given server and partition. sub addsite { my ($server, $part, $volume) = @_; return [ $VOS, 'addsite', $server, $part, $volume ]; } # Return a command to move a read/write volume from a given server and # partition to another server and partition. sub move { my ($volume, @locations) = @_; return [ $VOS, 'move', '-v', $volume, @locations ]; } # Return a command to back up a volume. sub backup { my ($volume) = @_; return [ $VOS, 'backup', $volume ]; } # Return a command to remove a volume. sub remove { my ($server, $part, $volume) = @_; return [ $VOS, 'remove', $server, $part, $volume ]; } # Return a command to release a volume. sub release { my ($volume) = @_; return [ $VOS, 'release', '-v', $volume ]; } # Build the commands required to move the read/write site of a volume. Takes # the volume name, the volume information hash, the server to move the volume # to, and the partition on that server to move the volume to, and returns a # flag saying whether the volume needs a release followed by a list of # commands, each of which is an anonymous giving a command to run. Modifies # the provided volinfo hash to delete the read-only replicas that have been # accounted for. sub move_rw { my ($volume, $volinfo, @location) = @_; my (@commands, $needrelease); my ($toserver, $topart) = findpartition (@location); my @to = ($toserver, $topart); my ($fromserver, $frompart) = ($$volinfo{rwserver}, $$volinfo{rwpart}); my @from = ($fromserver, $frompart); my %ro = %{ $$volinfo{ro} }; # If the volume is replicated and the read/write is already on the right # server, we won't actually move it. Just make sure there's also a # replica on the same partition if the volume is replicated. Otherwise, # move the read/write site and its corresponding replica if applicable. # If the volume is not replicated, we're always willing to move the # read/write site, even to another partition on the same server. if ($fromserver eq $toserver && $$volinfo{sites} > 0) { print "$volume is already on $fromserver\n"; if ($ro{$toserver}) { delete $$volinfo{ro}{$toserver}; } else { push (@commands, addsite (@to, $volume)); $needrelease++; } } else { if (available_space (@to) < $$volinfo{size}) { die "$0: moving $volume would overfill @to\n"; } push (@commands, move ($volume, @from, @to)); push (@commands, backup ($volume)); if ($$volinfo{sites} > 0) { if ($ro{$toserver} && $ro{$toserver} ne $topart) { my $ro = "$volume.readonly"; push (@commands, remove ($toserver, $ro{$toserver}, $ro)); delete $$volinfo{ro}{$toserver}; } if (!$ro{$toserver} || $ro{$toserver} ne $topart) { push (@commands, addsite (@to, $volume)); } } $needrelease++; } return ($needrelease, @commands); } # Build the commands required to move the read/write site of a volume. Takes # the volume name, the volume information hash, and a list of server and # partition pairs onto which to put the volume. Returns a flag saying whether # the volume needs a release followed by a list of commands, each of which is # an anonymous giving a command to run. Modifies the provided volinfo hash to # delete the read-only replicas that have been accounted for. sub move_ros { my ($volume, $volinfo, @location) = @_; my $replicas = 1; my %ro = %{ $$volinfo{ro} }; my ($needrelease, @commands); # Walk the list of replica sites. Delete out of the replica list in # volinfo any that are already in the right place, and generate commands # to move the rest. while (@location) { my ($server, $part) = findpartition (splice (@location, 0, 2)); if ($ro{$server}) { print "$volume is already on $server\n"; delete $$volinfo{ro}{$server}; } else { if (available_space ($server, $part) < $$volinfo{size}) { die "$0: adding $volume replica would overfill" . " $server $part\n"; } push (@commands, addsite ($server, $part, $volume)); $needrelease++; } $replicas++; } # Do sanity checking to make sure that we don't increase the number of # replicas. if ($$volinfo{sites} > 0 && $replicas != $$volinfo{sites}) { my $dir = ($replicas < $$volinfo{sites}) ? 'reduce' : 'increase'; die "$0: would $dir replication from $$volinfo{sites}" . " to $replicas sites\n"; } # Return the flag and the commands. return ($needrelease, @commands); } # Given a volume name, a flag saying whether to force releases, and then a # list of server and partition pairs onto which to put it, take the necessary # actions to move the volume. sub move_volume { my ($volume, $force, @location) = @_; my @rw = splice (@location, 0, 2); my @ro = @location; # Get and display information about the volume. my %volinfo = volinfo ($volume); print_volinfo ($volume, \%volinfo); # Make sure we're not newly replicating the volume as part of the move. die "$0: replica sites given and $volume is unreplicated\n" if (!$volinfo{sites} && @location > 2); # Build the list of commands to implement the move and also set the flag # saying whether we need to do a volume release. my ($needrelease, @commands) = move_rw ($volume, \%volinfo, @rw); my @result = move_ros ($volume, \%volinfo, @ro); $needrelease = shift (@result) || $needrelease; push (@commands, @result); # Refuse to release a volume with unreleased changes unless --force was # given on the command line. Note that there is a race condition here, so # this check is not completely reliable (someone may have changed the # volume since after we ran vos examine). $needrelease &&= ($volinfo{sites} > 0); die "$0: volume has unreleased changes, use --force to force a release\n" if ($volinfo{unreleased} && $needrelease && !$force); # Add the volume release to the commands if necessary. push (@commands, release ($volume)) if $needrelease; # Clean up any unwanted replicas. All wanted replicas have already been # deleted out of the hash. for (keys %{ $volinfo{ro} }) { push (@commands, remove ($_, $volinfo{ro}{$_}, "$volume.readonly")); } # Okay, run the commands. In order to get our spacing correct, print out # a newline if there are fewer commands than the number of sites plus two, # since that means that at least one replica was already on the right # server and we printed that out. if (@commands < $volinfo{sites} + 2) { print "\n"; } for (@commands) { print "@$_\n"; system @$_; } } # Given the name of the volume, a flag saying whether to force a release, a # server and and partition pair for the source, and a server and partition # pair for the destination, move a single site of a volume. sub move_single { my ($volume, $force, @location) = @_; my @source = findpartition (splice (@location, 0, 2)); my @dest = findpartition (@location); # Get and display information about the volume. my %volinfo = volinfo ($volume); print_volinfo ($volume, \%volinfo); # Build commands to move the appropriate portion of the volume. my ($needrelease, @commands); if ($source[0] eq $volinfo{rwserver} && $source[1] eq $volinfo{rwpart}) { if (available_space (@dest) < $volinfo{size}) { die "$0: moving $volume would overfill @dest\n"; } push (@commands, move ($volume, @source, @dest)); push (@commands, backup ($volume)); if ($volinfo{sites} > 0) { if ($volinfo{ro}{$dest[0]}) { die "$0: $volume already has a replica on on $dest[0]\n"; } push (@commands, addsite (@dest, $volume)); $needrelease++; } } elsif ($volinfo{sites} > 0 && $volinfo{ro}{$source[0]}) { if ($source[1] ne $volinfo{ro}{$source[0]}) { die "$0: $volume not on @source\n"; } if ($volinfo{ro}{$dest[0]}) { die "$0: $volume already has a replica on on $dest[0]\n"; } if (available_space (@dest) < $volinfo{size}) { die "$0: adding $volume replica would overfill @dest\n" } push (@commands, addsite (@dest, $volume)); $needrelease++; } else { die "$0: $volume not on @source\n"; } # Refuse to release a volume with unreleased changes unless --force was # given on the command line. Note that there is a race condition here, so # this check is not completely reliable (someone may have changed the # volume since after we ran vos examine). $needrelease &&= ($volinfo{sites} > 0); die "$0: volume has unreleased changes, use --force to force a release\n" if ($volinfo{unreleased} && $needrelease && !$force); # If we need a release, add that and also the removal of the old site. if ($needrelease) { push (@commands, release ($volume)); if ($volinfo{ro}{$source[0]}) { my $part = $volinfo{ro}{$source[0]}; push (@commands, remove ($source[0], $part, "$volume.readonly")); } } # Run the commands. for (@commands) { print "@$_\n"; system @$_; } } ############################################################################## # Implementation ############################################################################## # Usage message, in case the command line syntax is wrong. sub usage { die "Usage: $0 [-fns] [ ...]\n"; } # Trim extraneous garbage from the path. my $fullpath = $0; $0 =~ s%.*/%%; # Make sure we get output in the right order. $| = 1; # Parse our options. my ($help, $force, $list, $location, $single, $version); Getopt::Long::config ('bundling', 'no_ignore_case'); GetOptions ('f|force' => \$force, 'h|help' => \$help, 'l|list=s' => \$list, 'L|location-list=s' => \$location, 'n|dry-run|just-print' => \$JUSTPRINT, 's|single' => \$single, 't|threshold=f' => \$THRESHOLD, 'v|version' => \$version) or exit 1; if ($help) { print "Feeding myself to perldoc, please wait....\n"; exec ('perldoc', '-t', $fullpath); } elsif ($version) { my $version = join (' ', (split (' ', $ID))[1..3]); $version =~ s/,v\b//; $version =~ s/(\S+)$/($1)/; $version =~ tr%/%-%; print $version, "\n"; exit 0; } # Volume name is always the first argument unless -l or -L was given, and the # rest of our arguments must be server and partition pairs. my $volume; unless ($list || $location) { usage if (@ARGV < 3); $volume = shift; } usage if (@ARGV % 2 != 0); # It doesn't make sense to combine -l and -L. die "$0: both -l and -L may not be specified at the same time\n" if ($list && $location); # If -L is given, there should be no other command-line arguments. die "Usage: $0 [-fns] [-t ] -L \n" if ($location && @ARGV > 0); # If -s was given, there must be exactly four arguments left, a pair giving # the current server and partition, and a pair saying where that replica of # the volume should be moved to. die "Usage: $0 -s (-l | ) \n" if ($single && @ARGV != 4 && !$location); # Do the work. If -l or -L was given, this means looping on the volumes # listed in that file; otherwise, just move the single volume. $list ||= $location; if ($list) { open (LIST, $list) or die "$0: cannot open $list: $!\n"; my @volumes = ; close LIST; chomp @volumes; my $total = scalar @volumes; my $i = 0; for (@volumes) { $i++; my ($volume, @where); if ($location) { ($volume, @where) = split (' ', $_); } else { $volume = $_; @where = @ARGV; } print "MOVING $volume ($i of $total)\n\n"; if ($single) { move_single ($volume, $force, @where); } else { move_volume ($volume, $force, @where); } print "\n"; print "\n" unless ($i == $total); } print "FINISHED $list\n"; } else { if ($single) { move_single ($volume, $force, @ARGV); } else { move_volume ($volume, $force, @ARGV); } } __END__ ############################################################################## # Documentation ############################################################################## =head1 NAME mvto - Move an AFS volume from anywhere, intelligently =head1 SYNOPSIS mvto [B<-hfnsv>] [B<-t> I] I I I [I I ...] mvto [B<-fns>] B<-l> I [B<-t> I] I I [I I ...] mvto [B<-fns>] B<-L> I [B<-t> I] =head1 DESCRIPTION B is a smart B that uses B to determine where the volume is currently located and how it is currently replicated. It essentially allows the user to say "make the volume distribution look like this" and it will make the changes necessary to do that. For replicated volumes, the first server/partition pair is taken as the location of the read/write and every additional server/partition pair is taken as a site to put a replica. (One replica is automatically put on the same partition as the read/write, if any replication sites are specified, so the result will be a replication site on every server/partition pair given.) B will check the available space on the partition to which a volume is being moved and any partitions where replicas are being added and will decline to do anything if its operation would take that partition over 90% full. This threshold can be changed with the B<-t> option. If the volume is replicated and is already located on the same server as the destination, even if it's on a different partition, this is considered by B to be "good enough" and the volume will not be moved. Similarly with replication sites, if there is already a replication site on that server (even on a different partition), that replication site won't be moved or removed and will be counted as one of the replication sites for the volume. To move replicated volumes between partitions on the same server requires more finesse and special cases since one cannot have two replicas on the same server, so it should be done by hand. If any details about the replication of the volume had to be changed (and the volume is replicated), the volume will be released. In practice, this means that unless the volume is already located on all of the same servers given on the command line, already has a replication site on the same partition as the read/write, and already has the right number of replication sites, the volume will be released if replicated. If the volume needs to be released, B will check to see if it has any unreleased changes. If so, it will refuse to perform any operations unless the B<--force> (or B<-f>) command-line option is given to avoid accidentally releasing volumes with unreleased changes. Don't rely on this check completely, since the volume could be modified between when B checks and when it actually releases the volume. If the read/write volume has to be moved, B will run B on the volume after the move (since volume moves have a side effect of deleting the backup volume). Don't use this program on volumes that shouldn't have a backup volume. B will neither increase nor decrease the replication of a volume. If the number of replication sites should be changed, or if the volume is currently unreplicated and should be replicated, this should be done by hand before running B, using B and B. AFS servers may be specified as just a number; all numeric server names will have C prepended to them. As with all AFS commands, partitions may be specified as a simple letter, as C, or as C. More than 26 partitions on one server is not supported. Partitions may also be specified as C<.>, in which case the parition on that server with the most free space according to B is chosen, or as a string of letters and letter ranges such as C, in which case the partition of the set specified with the most free space is chosen. (In this example, the set is /vicepa, /vicepc, /vicepe through /vicepg, or /vicepm on the given sever.) B passes the verbose flag to most B commands it runs, and passes the B<-f> flag to B. =head1 OPTIONS =over 4 =item B<-h>, B<--help> Print out this documentation (which is done simply by feeding the script to C). =item B<-f>, B<--force> Release a volume if a release is required, even if that volume has unreleased changes. Without this flag, B will refuse to release a volume that has unreleased changes. =item B<-l> I, B<--list>=I Rather than take a single volume name on the command line, read in a list of volumes to act on from the file I. I should contain a simple list of AFS volume names, one per line, and each volume will be moved as if it were specified on the B command line, using the location information given on the command line. =item B<-L> I, B<--location-list>=I Similar to B<-l> described above, but rather than taking just a list of volumes, one per line, instead expect a list of volumes followed by server and partition information for where to put those volumes. The server and partition information should be separated by whitespace and will be interpreted exactly as if it were given on the command line. For example, running C with a file named F that contains: user.rra afssvr12 . user.neilc afssvr10 e would be equivalent to running the two commands: mvto user.rra afssvr12 . mvto user.neilc afssvr10 e Command-line arguments cannot be given in the I file, only server and partition information. The B<-s> flag may still be given on the command line and will affect how the location information in the I file is interpreted. =item B<-n>, B<--dry-run>, B<--just-print> Print out volume status information and the commands that B would run, but don't execute any of them. =item B<-s>, B<--single> Rather than being a complete specification of the location at which the volume should be put, the command-line arguments are taken to be two server/partition pairs. The first is the current location of a site of the volume (either a replica or the read/write with its replica), and the second pair is the location to move that site to. This allows B to be used to evacuate a single replication site server without having to know where the other sites for each of those volumes are located, while still taking advantage of B's understanding of B, creation of backup volumes, keeping a replica with the read/write volume, and so forth. It generally doesn't make any sense to use this option with unreplicated volumes unless it's used in conjunction with B<-L> with mixed RO and RW volumes. Otherwise, there's no reason not to use the regular syntax. What's special about this option is that it leaves all other sites of a volume alone. =item B<-t> I, B<--threshold>=I The safety threshold. B will not move a volume or add a replica to a partition if by doing so it would increase the usage of that partition above this treshold (specified as a ratio of used space to total space). The default is 0.9 (90% full). To disable this safety check, use C<-t 1.0>. =item B<-v>, B<--version> Print out the version of B and exit. =back =head1 EXAMPLES Move the volume ls.trip.windlord, wherever it is, to afssvr3 /vicepd: mvto ls.trip.windlord afssvr3 /vicepd Move the volume ls to afssvr5 /vicepa, with replication sites on that same partition, on afssvr6 /vicepk, and on afssvr10 /vicepb: mvto ls 5 a 6 k 10 b Move the volume pubsw to the partition on afssvr10 with the most free space, with one replication site on afssvr11 on whichever partition of the first three has the most free space. This volume will be released even if it has unreleased changes. mvto -f pubsw 10 . 11 a-c Move all of the volumes in the file F to the most empty partition on afssvr9. Note that the check for the most empty partition is done anew for each individual volume move, so the volumes will be spread out across the server. mvto -l evacuate afssvr9 . For the volume pubsw, move only the site (whether a replica or a read/write with replica) on afssvr10 a to the most empty partition on afssvr11: mvto -s pubsw afssvr10 /vicepa 11 . Do the same for every volume listed in the file F: mvto -l evacuate -s 10 vicepa 11 . Move every volume in the file F to the locations specified on that line of the file, aborting if moving a volume would bring any partition to over 85% full: mvto -t 0.85 -L locations =head1 BUGS If a replicated volume has no RO replica on the same server as its RW site, moving the RW site with C will increase the replication of the volume by adding a new RO site at the destination for the RW site without removing one of the other RO sites. =head1 AUTHORS Russ Allbery , based on a much simpler script by Neil Crellin that only handled unreplicated volumes. =head1 COPYRIGHT AND LICENSE Copyright 1998, 1999, 2001, 2003, 2004, 2005 Board of Trustees, Leland Stanford Jr. University. This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself. =head1 SEE ALSO vos(1), vos_addsite(1), vos_backup(1), vos_examine(1), vos_move(1), vos_release(1), vos_remove(1) The current version of this program is available from its web page at L. =cut