From 1b1e18cc10c2172d113b721f2603806780bda02c Mon Sep 17 00:00:00 2001 From: sageweil Date: Fri, 17 Aug 2007 16:39:38 +0000 Subject: [PATCH] git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1643 29311d96-e01e-0410-9327-a35deaab8ce9 --- branches/sage/mds/script/study_find.pl | 172 +++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100755 branches/sage/mds/script/study_find.pl diff --git a/branches/sage/mds/script/study_find.pl b/branches/sage/mds/script/study_find.pl new file mode 100755 index 0000000000000..7e4c760968896 --- /dev/null +++ b/branches/sage/mds/script/study_find.pl @@ -0,0 +1,172 @@ +#!/usr/bin/perl + +use strict; + +my $name = shift @ARGV; +my $dsfn = shift @ARGV; +my $fnlenfn = shift @ARGV; +my $logfn = shift @ARGV; + +my $nfiles = 0; +my $ndirs = 0; +my $nreg = 0; +my $nhardlinks = 0; +my %nlinks; +my %names; +my %dirsize; +my %fnlen; +my $nfnchars; + +my $bytes; +my $ebytes; + +# +# output generated with +# +# find . -path ./.snapshot -prune -o -exec ls -dilsn --time-style=+%s \{\} \; +# +# find output looks like this: +#4495744 4 drwxrwxrwx 24 0 0 4096 1187290970 . +#2996320 8 drwxr-xr-x 189 0 1000 8192 1186594257 ./jangle +#28378499 4 drwxr-x--x 4 1068885 52673 4096 1162938122 ./jangle/cymcruise +#28378500 4 drwx--S--- 5 1068885 52673 4096 1162938122 ./jangle/cymcruise/Maildir +#28378501 4 drwx------ 2 1068885 52673 4096 1162938122 ./jangle/cymcruise/Maildir/tmp +#28378502 4 drwx------ 2 1068885 52673 4096 1162938122 ./jangle/cymcruise/Maildir/new +#28378503 4 drwx------ 2 1068885 52673 4096 1162938122 ./jangle/cymcruise/Maildir/cur +#28378504 4 -rw-r--r-- 1 1068885 52673 260 943743700 ./jangle/cymcruise/.alias +#999425 4 drwxr-xr-x 92 1125 100 4096 1186523060 . +#999426 0 lrwxrwxrwx 1 0 0 5 1177701093 ./root -> /root +#1015809 4 drwxr-xr-x 4 1289 1000 4096 1174584949 ./andrea +#541007 4 drwxr-xr-x 3 0 0 4096 1173111449 ./andrea/lux +#5014055 4 drwx--S--- 11 70228 51207 4096 1172250346 ./andrea/lux/Maildir + +# dirs we're currently counting in +my %numindir; + +sub finish_dir { + my $curdir = shift @_; + #print "finish_dir $curdir\n"; + $dirsize{$numindir{$curdir}}++; + delete $numindir{$curdir}; +} + +my $curdir; +$ndirs = 1; +while (<>) { + #print; + chomp; + my ($ino, $blah, $mode, $blah, $nlink, $uid, $gid, $size, $mtime, @path) = split(/ /,$_); + my $file = join(' ',@path); + ($file) = split(/ \-\> /, $file); + my @bits = split(/\//, $file); + my $f = pop @bits; + my $dir = join('/', @bits); + #print "file = '$file', dir = '$dir', curdir = '$curdir'\n"; + + if ($dir ne $curdir) { + for my $d (keys %numindir) { + #print "? $d vs $dir\n"; + &finish_dir($d) if ($d ne substr($dir, 0, length($d))); + } + $curdir = $dir; + } + + my $esize = 0; + $esize = int (($size-1)/4096)*4096 + 4096 if $size > 0; + $esize += 160; # for the inode? + $bytes += $size; + $ebytes += $esize; + + $nfiles++; + $numindir{$dir}++; + + my $fnlen = length($f); + $fnlen{$fnlen}++; + $nfnchars += $fnlen; + + if ($mode =~ /^d/) { + $ndirs++; + # find does depth-first search, so assume we descend, so that on empty dir we "back out" above and &finish_dir. + $numindir{$file} = 0; + $curdir = $file; + } else { + $nreg++ if $mode =~ /^f/; + if ($nlink > 1) { + #system "ls -aldi $file"; + $nhardlinks++; + $nlinks{$nlink}++; + push(@{$names{$ino}->{$dir}}, $file); + } + } +} +for my $d (keys %numindir) { + &finish_dir($d); +} + + + +my $nsamedir = 0; +open(LOG, ">$logfn") if $logfn; +for my $ino (keys %names) { + print LOG "# $ino\n"; + my @dirs = keys %{$names{$ino}}; + my $insamedir = 1 if scalar(@dirs) == 1; + for my $dir (@dirs) { + print LOG "#\t$dir\n"; + for my $fn (@{$names{$ino}->{$dir}}) { + print LOG "#\t\t$fn\n"; + $nsamedir++ if $insamedir; + } + } +} +close LOG; + +sub do_cdf { + my $hash = shift @_; + my $num = shift @_; + my $sum = shift @_; + my $fn = shift @_; + + open(CDF, ">$fn") if $fn; + print CDF "# $name\n# val\tnum\n"; + + my $median; + my $p = 0; + for my $v (sort {$a <=> $b} keys %$hash) { + print CDF "$v\t$hash->{$v}\n"; + $p += $hash->{$v} * $v; + if (!(defined $median) && + $p >= ($sum/2)) { + $median = $v; + } + } + my $avg = sprintf("%.2f", $sum/$num); + print CDF "# avg $avg, median $median, sum $sum, num $num\n"; + return ($avg, $median); +} +close DSLOG; + + +my ($avgdirsize, $mediandirsize) = &do_cdf(\%dirsize, $ndirs, $nfiles, $dsfn); +my ($avgfnlen, $medianfnlen) = &do_cdf(\%fnlen, $nfiles, $nfnchars, $fnlenfn); + + +# stat fs +#my $df = `df $base`; +#my $line = (split(/\n/,$df))[1]; # second line +#my ($kb) = $df =~ /\s+\d+\s+(\d+)/; +my $gb = sprintf("%.1f",($ebytes / 1024 / 1024 / 1024)); + +# final line +my $pad = '# ' . (' ' x (length($name)-2)); +print "$pad\tgb\tfiles\tdirs\tdsavg\tdsmed\tfnavg\tfnmed\treg\tnl>1\tsmdr\tnlink=2\t=3\t=4\t...\n"; +print "$name\t$gb\t$nfiles\t$ndirs\t$avgdirsize\t$mediandirsize\t$avgfnlen\t$medianfnlen\t$nreg\t$nhardlinks\t$nsamedir"; +my $i = 2; +for (sort {$a <=> $b} keys %nlinks) { + while ($_ < $i) { + print "\t0"; + } + print "\t$nlinks{$_}"; + $i = $_ + 1; +} +print "\n"; -- 2.39.5