the status of $CNF{project

#! /sw/bin/perl -w # Copyright (c) 2003 Henk Penning, all rights reserved. # penning@cs.uu.nl, http://www.cs.uu.nl/staff/henkp.html # Version 1.1 was donated to the Apache Software Foundation 2003 Jan 28 # $Id: mirmon,v 1.38 2007/08/18 15:00:07 henkp Exp henkp $ # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), # to deal in the Software without restriction, including without limitation # the rights to use, copy, modify, merge, publish, distribute, sublicense, # and/or sell copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. # # Thanks to Klaus Heinz for sugestions ao htm_head my $PRG = 'mirmon' ; my $VER = '$Id: mirmon,v 1.38 2007/08/18 15:00:07 henkp Exp henkp $' ; use strict ; use IO::Pipe ; use IO::Select ; use Net::hostent ; my $DEF_CNF = "/etc/$PRG.conf" ; my %CNF = qw( timeout 300 max_probes 25 min_poll 1h max_poll 4h min_sync 1d max_sync 2d list_style plain put_histo top randomize 1 ) ; my @REQ_KEYS = qw( web_page state countries mirror_list probe project_name project_url icons ) ; my @OPT_KEYS = qw( project_logo min_poll min_sync max_sync list_style htm_top htm_foot htm_head put_histo ) ; my %CNF_KEYS ; for ( @REQ_KEYS, @OPT_KEYS, keys %CNF ) { $CNF_KEYS { $_ } ++ ; } my $TIM_PAT = '^(\d+)([smhd])$' ; my @LIST_STYLE = qw(plain apache) ; my @GET_OPTS = qw(all update) ; my @PUT_HGRAM = qw(top bottom nowhere) ; my $HIST = 14 ; my %APA_TYPES = () ; for ( qw(backup ftp http) ) { $APA_TYPES { $_ } ++ ; } my $prog = substr($0,rindex($0,'/')+1) ; my $Usage = <= 0 ; my %WGT ; my $GET = IO::Select -> new () ; my %URL ; my %RES ; my %OLD ; my %LST ; my %CCS ; my %HREF ; # sub exp_date { my @day = qw(Sun Mon Tue Wed Thu Fri Sat) ; my @mon = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec) ; my @gmt = gmtime time + 3600 ; sprintf "%s, %02d %s %4d %02d:%02d:%02d GMT" , $day [ $gmt [ 6 ] ] , $gmt [ 3 ] , $mon [ $gmt [ 4 ] ] , $gmt [ 5 ] + 1900 , @gmt [ 2, 1, 0 ] ; } sub find_conf { return $opt{c} if $opt{c} ; my $HOME = ( getpwuid $< ) [ 7 ] or Error "can get homedir '$<' ($!)" ; my @LIST = ( "$PRG.conf" , "$HOME/.$PRG.conf" , $DEF_CNF ) ; for my $conf ( @LIST ) { return $conf if -f $conf ; } Error sprintf "can't find a config file :\n %s" , join "\n ", @LIST ; } sub show_conf { print "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n" ; for my $key ( sort keys %CNF ) { next if $key =~ m/^_/ ; print "show_conf : $key = '$CNF{$key}'\n" ; } for my $key ( sort keys %HREF ) { printf "show_conf : for site '%s' use instead\n '%s'\n", $key, $HREF { $key } if $opt{v} ; } printf "show_conf : included '%s'\n", join "', '", @{ $CNF{_include} } ; print "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n" ; } sub get_conf ; sub get_conf { my $FILE = shift ; if ( grep $_ eq $FILE, @{ $CNF {_include} } ) { Error "already included : '$FILE'" ; } else { push @{ $CNF {_include} }, $FILE ; } open FILE, $FILE or Error "can't open '$FILE' ($!)" ; my $CONF = join "\n", grep /./, ; close FILE ; $CONF =~ s/\t/ /g ; # replace tabs $CONF =~ s/^[+ ]+// ; # delete leading space, plus $CONF =~ s/\n\n\s+/ /g ; # glue continuation lines $CONF =~ s/\n\n\+\s+//g ; # glue concatenation lines $CONF =~ s/\n\n\./\n/g ; # glue concatenation lines chop $CONF ; print "--$CONF--\n" if $opt{d} ; for ( grep ! /^#/, split /\n\n/, $CONF ) { my ($key,$val) = split ' ', $_, 2 ; $val = '' unless defined $val ; print "conf '$FILE' : key '$key', val '$val'\n" if $opt{d} ; if ( exists $CNF_KEYS { $key } ) { $CNF { $key } = $val ; } elsif ( $key eq 'site_url' ) { my ( $site, $url ) = split ' ' , $val ; $url .= '/' unless $url =~ m!/$! ; $HREF { lc $site } = $url ; printf "config : for site '%s' use instead\n '%s'\n", $site, $url if $opt{v} ; } elsif ( $key eq 'env' ) { my ( $x, $y ) = split ' ' , $val ; $ENV { $x } = $y ; printf "config : setenv '%s'\n '%s'\n", $x, $y if $opt{v} ; } elsif ( $key eq 'no_randomize' ) { $CNF { randomize } = 0 ; } elsif ( $key eq 'include' ) { get_conf $val ; } elsif ( $key eq 'show' ) { show_conf unless $opt{q} ; } elsif ( $key eq 'exit' ) { Error 'exit per config directive' ; } elsif ( $key eq 'max_age' ) { $CNF { max_sync } = $val ; } else { show_conf ; Error "unknown keyword '$key' (value '$val')" ; } } } sub get_conf_opt { my $err = '' ; get_conf find_conf ; $CNF { timeout } = $opt{t} if $opt{t} ; for my $key ( @REQ_KEYS ) { unless ( exists $CNF { $key } ) { $err .= "$prog error: missing config for '$key'\n" ; } } for my $key ( qw(min_poll max_poll max_sync min_sync) ) { my $max = $CNF { $key } ; unless ( $max =~ /$TIM_PAT/o ) { $err .= "$prog error: $key ($max) doesn't match /$TIM_PAT/\n" ; } } unless ( grep $CNF { list_style } eq $_, @LIST_STYLE ) { $err .= sprintf "%s : error: unknown 'list_style' '%s'\n", $prog, $CNF { list_style } ; } unless ( grep $CNF { put_histo } eq $_, @PUT_HGRAM ) { $err .= sprintf "%s : error: unknown 'put_histo' '%s'\n", $prog, $CNF { put_histo } ; } if ( $opt { get } and not grep $opt { get } eq $_, @GET_OPTS ) { $err .= sprintf "%s : error: unknown 'get option' '%s'\n", $prog, $opt { get } ; } Error $err if $err ; $opt{q} = 0 if $opt{v} ; } sub tim_to_s { my $tim = shift ; my %tab = ( 's' => 1, 'm' => 60, 'h' => 60 * 60, 'd' => 60 * 60 * 24 ) ; Error "wrong time '$tim'" unless $tim =~ /$TIM_PAT/o ; my $m = $1 ; my $u = $2 ; return $m * $tab { $u } ; } sub aprx_eq { my ( $t1, $t2 ) = @_ ; abs ( $t1 - $t2 ) < 60 ; } sub aprx_ge { my ( $t1, $t2 ) = @_ ; $t1 > $t2 or aprx_eq $t1, $t2 ; } sub aprx_le { my ( $t1, $t2 ) = @_ ; $t1 < $t2 or aprx_eq $t1, $t2 ; } sub aprx_gt { my ( $t1, $t2 ) = @_ ; $t1 > $t2 and not aprx_eq $t1, $t2 ; } sub aprx_lt { my ( $t1, $t2 ) = @_ ; $t1 < $t2 and not aprx_eq $t1, $t2 ; } sub pr_interval { my $s = shift ; my ( $magn, $unit ) ; my $mins = $s / 60 ; my $m = int ( $mins + 0.5 ) ; my $hours = $s / ( 60 * 60 ) ; my $h = int ( $hours + 0.5 ) ; if ( $s < 50 ) { $magn = $s ; $unit = 'second' ; } elsif ( $m < 50 ) { $magn = $m ; $unit = 'minute' ; } elsif ( $h < 36 ) { $magn = $h ; $unit = 'hour' ; } else { $magn = sprintf "%.1f", $hours / 24 ; $unit = 'day' ; } $unit .= 's' unless $magn == 1 ; return "$magn $unit" ; } sub max_age1 { ( tim_to_s $CNF { min_sync } ) + ( tim_to_s $CNF { max_poll } ) ; } sub max_age2 { ( tim_to_s $CNF { max_sync } ) + ( tim_to_s $CNF { max_poll } ) ; } sub max_vrfy { ( tim_to_s $CNF { min_poll } ) + ( tim_to_s $CNF { max_poll } ) ; } sub age_code { my $time = shift ; return 'z' unless $time =~ /^\d+$/ ; return ( ( aprx_ge ( $time, $^T - max_age1 ) ) ? 's' : ( aprx_ge ( $time, $^T - max_age2 ) ? 'b' : 'f' ) ) ; } sub err { my $url = shift ; my $stat = shift ; printf "*** %-10s %s\n", $stat, $url unless $opt{q} ; my ( $time, $vrfy, $hstp, $hsts ) ; if ( exists $OLD { $url } ) { $time = $OLD { $url } [ 0 ] ; $vrfy = $OLD { $url } [ 2 ] ; $hstp = substr $OLD { $url } [ 3 ], 1 - $HIST ; $hsts = $OLD { $url } [ 4 ] ; } else { $time = 'undef' ; $vrfy = 'undef' ; $hstp = '' ; $hsts = '' ; } $RES { $url } = [ $time, $stat, $vrfy, $hstp . 'f', $hsts, $^T ] ; } sub res { my $url = shift ; my $time = shift ; my $stat = shift ; my $hstp = ( exists $OLD { $url } ? substr ( $OLD { $url } [ 3 ], 1 - $HIST ) : '' ) ; my $hsts = ( exists $OLD { $url } ? $OLD { $url } [ 4 ] : '') ; printf "result %d %s\n", $time, $url if $opt{v} ; $RES { $url } = [ $time, $stat, $^T, $hstp . 's', $hsts, $^T ] ; } sub get_state { my $STT = shift ; open STT, $STT or Error "can't open '$STT' ($!)" ; while ( ) { chop ; my ( $url, $time, $stat, $vrfy, $hstp, $hsts, $lprb ) = split ' ' ; $stat =~ s/_/ /g ; $hstp = '' unless defined $hstp ; $hsts = '' unless defined $hsts ; $hsts = '' if $hsts eq 'undef' ; $lprb = 'undef' unless defined $lprb ; $OLD { $url } = [ $time, $stat, $vrfy, $hstp, $hsts, $lprb ] ; } close STT ; } sub check_hist { my $time = shift ; my $hsts = shift ; printf "check_hist: last '$time' hsts '$hsts'\n" if $opt{d} ; my $res = $hsts ; my ( $stmp, $hist ) ; if ( $hsts eq '' ) { $stmp = 0 ; $hist = '' ; } else { ( $stmp, $hist ) = split '-', $hsts ; } if ( aprx_le $stmp, $^T - tim_to_s '1d' ) { $res = sprintf "%s-%s%s" , $^T , substr ( $hist, 1 - $HIST ) , age_code ( $time ) ; } return $res ; } sub put_state { my $STT = shift ; my $TMP = "$STT.tmp" ; open TMP, ">$TMP" or Error "can't write '$TMP' ($!)" ; for my $url ( sort keys %RES ) { $RES { $url } [ 4 ] = check_hist $RES { $url } [ 0 ], $RES { $url } [ 4 ] ; my @OUT = @{ $RES { $url } } ; $OUT [ 1 ] =~ s/\s/_/g ; printf TMP "%s %s\n", $url, join ' ', @OUT or Error "can't print to $TMP ($!)" ; } close TMP ; if ( -z $TMP ) { Warn "wrote empty state file; keeping previous version" ; } else { rename $TMP, $STT or Error "can't rename '$TMP', '$STT' ($!)" ; } } sub get_ccs { my $CCS = shift ; open CCS, $CCS or Error "can't open '$CCS' ($!)" ; while ( ) { chop ; next if /^#/ ; my ( $code, $dash, $reg ) = split ' ', $_, 3 ; $CCS { lc $code } = lc $reg ; } close CCS ; } sub type_site { my $url = shift ; my ( $type, $site, $home ) ; if ( $url =~ m!^(ftp|http)://([^/:]+)(:\d+)?/! ) { $type = $1 ; $site = $2 ; $home = $& ; } return $type, $site, $home ; } sub type { my ( $t, $s, $h) = type_site $_[0] ; $t ; } sub site { my ( $t, $s, $h) = type_site $_[0] ; $s ; } sub home { my ( $t, $s, $h) = type_site $_[0] ; $h ; } sub get_list { my $LST = shift ; my ( $reg, $url ) ; open LST, $LST or Error "can't open '$LST' ($!)" ; while ( ) { chop ; next if /^#/ ; next if /^\s*$/ ; if ( $CNF { list_style } eq 'plain' ) { ( $reg, $url ) = split ' ' ; unless ( $url =~ m!/$! ) { print "*** mirmon appended '/' to $url\n" unless $opt{q} ; $url .= '/' ; } } elsif ( $CNF { list_style } eq 'apache' ) { my $apache_type ; ( $apache_type, $reg, $url ) = split ' ' ; unless ( defined $APA_TYPES { $apache_type } ) { print "*** strange type : $apache_type\n" unless $opt{q} ; next ; } unless ( $url =~ m!/$! ) { print "*** missing '/' in $url\n" unless $opt{q} ; $url .= '/' ; } } my $site = site $url ; my $type = type $url ; unless ( defined $site ) { print "*** strange url : '$url'\n" unless $opt{q} ; next ; } $LST { $url } = [ $type , $site, $reg ] ; } } sub url { sprintf '%s', $_[0], $_[1] ; } sub nam { sprintf '%s', $_[0], $_[1] ; } sub SMA { sprintf "%s", $_[0] ; } sub BLD { sprintf "%s", $_[0] ; } sub NSS { sprintf SMA('%s site%s'), $_[0], ( $_[0] == 1 ? '' : 's' ) ; } sub TAB { sprintf "%s

", $_[0] ; } sub TR { sprintf "%s\n", $_[0] ; } sub TH { sprintf "%s\n", $_[0] ; } sub TD { sprintf "%s\n", $_[0] ; } sub TDr { sprintf "%s\n", $_[0] ; } sub RED { sprintf "%s", $_[0] ; } sub GRN { sprintf '%s', $_[0] ; } sub htmlquote { my $x = shift ; $x =~ s/&/&/g ; $x =~ s//>/g ; return $x ; } sub diff { my $time = shift ; my $max = shift ; my $res ; if ( $time == $^T ) { $res = BLD 'renewed' ; } else { $res = pr_interval $^T - $time ; $res = BLD RED $res if aprx_lt $time, $max ; } return $res ; } sub img_sf_cnt { sprintf '

' , $CNF { icons }, $_[0], $_[1] ; } sub img_sf { img_sf_cnt $_[0], 1 ; } sub show_hist { my $hst = shift ; return '' unless $hst =~ m/^[sbfz]+$/ ; if ( length $hst == $HIST and $hst =~ /^(s*b)s*$/ ) { return img_sf_cnt 'sb', length $1 ; } elsif ( length $hst == $HIST and $hst =~ /^(s*f)s*$/ ) { return img_sf_cnt 'sf', length $1 ; } elsif ( length $hst == $HIST and $hst =~ /^(s*b)fs*$/ ) { return img_sf_cnt 'sbf', length $1 ; } my $res = '' ; my $cnt = 1 ; my $prf = substr $hst, 0, 1 ; $hst = substr $hst, 1 ; while ( $hst ne '' ) { if ( substr ( $prf, 0, 1 ) eq substr ( $hst, 0, 1 ) ) { $cnt ++ ; $hst = substr $hst, 1 ; } else { $res .= img_sf_cnt $prf, $cnt ; $prf = substr $hst, 0, 1 ; $hst = substr $hst, 1 ; $cnt = 1 ; } } $res .= img_sf_cnt $prf, $cnt if $cnt ; return $res ; } sub show_hist_age { my $hsts = shift ; my $time = shift ; return '' if $hsts eq '' ; my ( $t, $h ) = split '-', $hsts ; if ( aprx_lt $t, $^T ) { $h .= age_code $time ; } return show_hist substr $h, - $HIST ; } sub gen_histogram_probes { my ( $time, $stat, $vrfy, $hstp, $hsts, $lprb ) ; my %tab = () ; my %bad = () ; my $res = '' ; my $s_cnt = 0 ; my $f_cnt = 0 ; my $hr_min ; my $hr_max ; return '' unless scalar keys %RES ; for my $url ( keys %RES ) { ( $time, $stat, $vrfy, $hstp, $hsts, $lprb ) = @{ $RES { $url } } ; my $hr = int ( ( $^T - $lprb ) / 3600 + 0.5 ) ; $hr_min = $hr if ! defined $hr_min or $hr < $hr_min ; $hr_max = $hr if ! defined $hr_max or $hr > $hr_max ; if ( $stat eq 'ok' ) { $tab { $hr } ++ ; $s_cnt ++ ; } else { $bad { $hr } ++ ; $f_cnt ++ ; } } $res = TR ( TH ( 'hours ago' ) . TH ( 'succ' ) . TH ( 'fail' ) . TH sprintf ( '%s %s, %s %s' , $s_cnt , GRN ( 'successful' ) , $f_cnt , RED ( 'failed' ) ) ) ; my $max = 0 ; for my $x ( keys %tab ) { my $tot = $tab { $x } + ( $bad { $x } || 0 ) ; $max = $tot if $max < $tot ; } ; return "

\nnothing yet\n

\n" unless $max ; for my $hr ( $hr_min .. $hr_max ) { my $x = $tab { $hr } || 0 ; my $y = $bad { $hr } || 0 ; my $n = int ( $x / $max * $HIST ) ; my $b = int ( $y / $max * $HIST ) ; $res .= TR ( TDr ( $hr ) . TDr ( $x ) . TDr ( $y ) . TD ( ( $n ? img_sf_cnt ( 's', $n ) : '' ) . ( $b ? img_sf_cnt ( 'f', $b ) : '' ) . ( ( $n + $b ) ? '' : ' ' ) ) ) ; } return "

\n" . TAB ( $res ) . "

\n" ; } sub age_avg { my @tab = () ; for my $url ( keys %RES ) { my $time = $RES { $url } [ 0 ] ; push @tab, $^T - $time if $time =~ /^\d+$/ ; } my $cnt = @tab ; return undef if $cnt == 0 ; @tab = sort { $a <=> $b } @tab ; my $tot = 0 ; for my $age ( @tab ) { $tot += $age ; } my $mean = $tot / $cnt ; my $median ; if ( $cnt % 2 ) { my $mid = int ( $#tab / 2 ) ; $median = ( $tab [ $mid ] + $tab [ $mid + 1 ] ) / 2 ; } else { my $mid = int ( $#tab / 2 ) ; $median = $tab [ $mid ] ; } if ( @tab < 2 ) { return $mean, $median, undef ; } my $sum = 0 ; for my $age ( @tab ) { $sum += ( $age - $mean ) ** 2 ; } my $stddev = sqrt ( $sum / ( $cnt - 1 ) ) ; return $mean, $median, $stddev ; } sub gen_histogram { my $MAX_H = max_age1 ; my $MAX_h = 1 + ( ( 20 * 3600 <= $MAX_H and $MAX_H <= 36 * 3600 ) ? int ( $MAX_H / 3600 ) : 25 ) ; my $MAX_O = max_age2 ; my $MAX_o = int ( $MAX_O / 3600 + 0.5 ) ; my $H = 18 ; my %W = ( 'old' => 1, 'ded' => 1, 'bad' => 1 ) ; my %Wmx = ( 'old' => 5, 'ded' => 3, 'bad' => 3 ) ; my %tab ; my %hst ; my $res ; for ( my $x = 0 ; $x < $MAX_h ; $x ++ ) { $tab { $x } = 0 ; } $tab { old } = 0 ; $tab { ded } = 0 ; $tab { bad } = 0 ; for my $url ( keys %RES ) { my $time = $RES { $url } [ 0 ] ; if ( $time =~ /^\d+$/ ) { my $s = $^T - $time ; my $hr = int ( $s / $MAX_H * ( $MAX_h - 1 ) + 0.5 ) ; if ( $s <= $MAX_H ) { $tab { $hr } ++ ; } elsif ( $s <= $MAX_O ) { $tab { old } ++ ; } else { $tab { ded } ++ ; } } else { $tab { bad } ++ ; } } my $max = 0 ; for ( grep ! exists $Wmx { $_ }, keys %tab ) { $max = $tab { $_ } if $tab { $_ } > $max ; } my %bad ; for my $aux ( keys %Wmx ) { $bad { $aux } = $tab { $aux } ; if ( $bad { $aux } > $max ) { $W { $aux } = $Wmx { $aux } ; my $d = int ( $bad { $aux } / $W { $aux } ) ; for ( my $i = 1 ; $i < $W { $aux } ; $i++ ) { $tab { $aux . $i } = $d ; if ( $bad { $aux } % $Wmx { $aux } > $i ) { $tab { $aux . $i } ++ ; $tab { $aux } -- ; } } $tab { $aux } -= ( $W { $aux } - 1 ) * $d ; $max = $tab { $aux } if $max < $tab { $aux } ; } } # if ( $opt{v} ) # { for my $hr ( keys %tab ) # { printf "tab '%s' = '%s'\n", $hr, $tab { $hr } ; } # } return 'nothing yet' unless $max ; $H = $max if 8 <= $max and $max <= 26 ; for ( keys %tab ) { $hst { $_ } = int ( $H * $tab { $_ } / $max + 0.5 ) ; } my @keys = sort { $a <=> $b } grep /^\d+$/, keys %hst ; my $tab_hr = 0 ; for my $hr ( @keys ) { $tab_hr += $tab { $hr } ; } push @keys , grep ( m/^old/, sort keys %tab ) , grep ( m/^ded/, sort keys %tab ) , grep ( m/^bad/, sort keys %tab ) ; for ( my $h = $H ; $h > 0 ; $h -- ) { $res .= "\n" ; $res .= sprintf "↑\n" if $h == $H ; $res .= sprintf '%s' . "\n" , $H-6, NSS ( $max ) if $h == $H - 3 ; $res .= sprintf "↓\n" if $h == 3 ; for my $x ( @keys ) { $res .= sprintf "%s\n" , ( ( $hst { $x } >= $h ) ? img_sf ( $x =~ /^\d+$/ ? 's' : ( $x =~ /^old/ ? 'b' : ( $x =~ /^ded/ ? 'f' : 'z' ) ) ) : ( ( $h == 1 and $hst { $x } == 0 ) ? sprintf ( '

' , $CNF { icons } ) : '' ) ) ; } $res .= "\n" ; } my $HR = '

' ; $res .= "\n" ; $res .= sprintf "$HR\n", 1 ; $res .= sprintf "$HR\n", $MAX_h ; $res .= sprintf "$HR\n", $W { old } ; $res .= sprintf "$HR\n", $W { ded } ; $res .= sprintf "$HR\n", $W { bad } ; $res .= "\n" ; $res .= "\n" ; $res .= ' age → ' ; $res .= "|\n" ; $res .= sprintf ( '' . '← 0 ≤ age ≤ %s →' . "\n" , $MAX_h - 2, pr_interval ( $MAX_H ) ) ; $res .= "|\n" ; $res .= sprintf ( '' . ' %sh < %s ≤ %sh ' . "\n" , $W { old }, int($MAX_H/60/60) , BLD ( 'age' ), $MAX_o ) ; $res .= sprintf ( '' . ' old ' . "\n" , $W { ded } ) ; $res .= sprintf ( '' . ' bad ' . "\n" , $W { bad } ) ; $res .= "\n" ; my $FRMT = ' %s ' ; $res .= "\n" ; $res .= sprintf "$FRMT\n", 1, NSS scalar keys %RES ; $res .= "|\n" ; $res .= sprintf "$FRMT\n", $MAX_h - 2, NSS $tab_hr ; $res .= "|\n" ; $res .= sprintf "$FRMT\n", $W { old }, NSS $bad { old } ; $res .= sprintf "$FRMT\n", $W { ded }, NSS $bad { ded } ; $res .= sprintf "$FRMT\n", $W { bad }, NSS $bad { bad } ; $res .= "\n" ; $res = "\n$res\n

\n" ; $res = sprintf "%s

\n" , "\n$res\n" ; if ( $max == $H ) { $res .= sprintf "
units %s %s %s %s represent one mirror site.\n" , img_sf ( 's' ), img_sf ( 'f' ), img_sf ( 'b' ), img_sf ( 'z' ) ; } else { $res .= sprintf "
each %s %s %s %s unit represents %s mirror sites.\n" , img_sf ( 's' ) , img_sf ( 'f' ), img_sf ( 'b' ) , img_sf ( 'z' ) , sprintf ( "%.1f", $max / $H ) } return $res ; } sub revdom { my $dom = shift ; join '.', reverse split /\./, $dom ; } sub by_type_site { my $a_type = $a -> [ 0 ] ; my $b_type = $b -> [ 0 ] ; my $a_site = $a -> [ 2 ] ; my $b_site = $b -> [ 2 ] ; ( revdom $a_site ) cmp ( revdom $b_site ) or $a_type cmp $b_type ; } sub by_CCS { ( $CCS { $a } || $a ) cmp ( $CCS { $b } || $b ) ; } sub legend ; sub gen_page { my $PPP = shift ; my $TMP = "$PPP.tmp" ; my %tab ; my $refs ; for my $url ( keys %LST ) { my ( $type , $site, $reg ) = @{ $LST { $url } } ; push @{ $tab { $reg } }, [ $type, $url, $site ] ; } my $bad = 0 ; my $old = 0 ; my $unr = 0 ; my %stats ; my @stats ; my $ok = 0 ; for my $url ( keys %RES ) { my ( $time, $stat, $vrfy ) = @{ $RES { $url } } ; if ( $stat eq 'ok' ) { $ok ++ ; } else { $stats { $stat } ++ ; } if ( $time eq 'undef' ) { $bad ++ ; } elsif ( 'f' eq age_code $time ) { $old ++ ; } if ( $vrfy eq 'undef' or aprx_lt $vrfy, $^T - max_vrfy ) { $unr ++ ; } } my $STAT = sprintf "%d bad -- %d older than %s -- %s unreachable for more than %s" , $bad , $old , pr_interval ( max_age2 ) , $unr , pr_interval ( max_vrfy ) ; my $PROB = 'last probes : ' ; push @stats, "$ok were ok" if $ok ; for my $stat ( sort keys %stats ) { push @stats, sprintf "%s had %s", $stats { $stat }, RED $stat ; } $PROB .= join ', ', @stats ; my ( $mean, $median, $stddev ) = age_avg ; my $AVGS = "mean mirror age is " ; unless ( defined $mean ) { $AVGS = "undefined" ; } else { $AVGS .= sprintf "%s", pr_interval $mean ; if ( defined $stddev ) { $AVGS .= sprintf ", std_dev %s", pr_interval $stddev ; } $AVGS .= sprintf ", median %s", pr_interval $median ; } for my $reg ( sort keys %tab ) { $refs .= sprintf " %s \n" , url "#$reg" , "$reg" ; } my $COLS = 5 ; my $LOGO = $CNF { project_logo } ? url ( $CNF { project_url } , sprintf ( '

' , $CNF { project_logo } , $CNF { project_name } ) ) : '' ; my $HTOP = $CNF{htm_top} ? $CNF{htm_top} . "\n" : '' ; my $FOOT = $CNF{htm_foot} ? $CNF{htm_foot} . "\n" : '' ; my $HEAD = $CNF{htm_head} ? $CNF{htm_head} . "\n" : '' ; my $TITL = url $CNF{project_url}, $CNF{project_name} ; my $EXPD = exp_date ; open PPP, ">$TMP" or Error "can't write $TMP ($!)" ; print PPP '' ; print PPP "\n" ; print PPP "\n" ; print PPP "the status of $CNF{project_name} mirrors\n" ; printf PPP "%s\n", '' ; print PPP "\n" ; print PPP "\n" ; print PPP $HEAD if $HEAD ; print PPP "\n" ; print PPP "\n" ; print PPP $LOGO ; print PPP "

the status of $TITL mirrors

\n" ; print PPP "\n" ; printf PPP "\n" , scalar gmtime $^T ; printf PPP "\n" , scalar gmtime ( $opt{get} ? $^T : ( stat $CNF { state } ) [9] ) ; print PPP "

date	:	%s (GMT)
last check	:	%s (GMT)

\n" ; printf PPP "

\n", $HTOP if $HTOP ; if ( $CNF { put_histo } eq 'top' ) { print PPP "

age histogram

\n" ; print PPP "

\n" ; print PPP gen_histogram ; print PPP "

\n" ; } print PPP "

regions

\n" ; print PPP "

\n" ; print PPP "\n" ; printf PPP "%s\n", $refs ; print PPP "\n" ; print PPP "

\n" ; print PPP "

report

\n" ; my $attr1 = "COLSPAN=$COLS BGCOLOR=\"LIME\"" ; my $attr2 = 'BGCOLOR="AQUA"' ; print PPP "

\n" ; print PPP "\n" ; printf PPP "\n" , scalar keys %LST , scalar keys %tab ; printf PPP "\n", $STAT ; printf PPP "\n", $PROB ; printf PPP "\n", $AVGS ; print PPP "\n" ; printf PPP " \n" , $CNF { project_name } ; printf PPP " \n", 'type' ; printf PPP " \n", 'mirror age,
daily stats' ; printf PPP " \n", 'last probe,
probe stats' ; printf PPP " \n", 'last stat' ; print PPP "\n" ; for my $reg ( sort by_CCS keys %tab ) { my $itms = $tab { $reg } ; my $ccs = exists $CCS { $reg } ? $CCS { $reg } : $reg ; $ccs = nam $reg, ( scalar @{ $itms } > 6 ? sprintf "%s - %d sites" , $ccs, scalar @{ $itms } : $ccs ) ; my $attr3 = "COLSPAN=$COLS BGCOLOR=\"YELLOW\"" ; printf PPP "\n" ; for my $itm ( sort by_type_site @{ $itms } ) { my ( $type, $url, $site ) = @{ $itm } ; my ( $time, $stat, $hstp, $hsts, $vrfy ) ; my ( $pr_time, $pr_last, $pr_hstp, $pr_hsts ) ; print PPP "\n" ; printf PPP " \n" . " \n" , url ( $url , $site ) , url ( home ( $url ), '@' ) , $type ; if ( exists $RES { $url } ) { ( $time, $stat, $vrfy, $hstp, $hsts ) = @{ $RES { $url } } ; $pr_time = $time =~ /^\d+$/ ? diff $time, $^T - max_age2 : ' ' ; $pr_last = $vrfy =~ /^\d+$/ ? diff $vrfy, $^T - max_vrfy : ' ' ; $pr_hstp = show_hist $hstp ; $pr_hsts = show_hist_age $hsts, $time ; } else { ( $pr_time, $pr_last, $pr_hstp, $pr_hsts, $stat ) = ( ' ', ' ', '', '', ' ' ) ; } $stat = RED $stat if $stat ne 'ok' ; printf PPP " \n" , $pr_time, $pr_hsts ; printf PPP " \n" , $pr_last, $pr_hstp ; printf PPP " \n", $stat ; print PPP "\n" ; } } print PPP "
%d sites in %d regions
%s
%s
%s
%s site -- home %s %s %s %s
$ccs
%s %s %s %s
%s %s
%s %s
\n" ; print PPP "

%d sites in %d regions
%s
%s
%s
%s site -- home	%s	%s	%s	%s
$ccs
%s %s	%s	%s %s	%s %s	%s

\n" ; if ( $CNF { put_histo } eq 'bottom' ) { print PPP "

age histogram

\n" ; print PPP "

\n" ; print PPP gen_histogram ; print PPP "

\n" ; } print PPP legend ; print PPP "

probe results

\n" ; print PPP gen_histogram_probes ; print PPP "

software

\n" ; print PPP "

\n" ; my $MIR_IMG = sprintf '' , $CNF { icons } ; print PPP sprintf "\n" , 'http://www.cs.uu.nl/people/henkp/mirmon/', $MIR_IMG ; print PPP "\n" ; print PPP "
%s $VER

%s	$VER

\n" ; print PPP $FOOT ; print PPP "\n" ; print PPP "" ; if ( print PPP "\n" ) { close PPP ; if ( -z $TMP ) { Warn "wrote empty html file; keeping previous version" ; } else { rename $TMP, $PPP or Error "can't rename $TMP, $PPP ($!)" ; } } else { Error "can't print to $TMP ($!)" ; } } sub legend { return <legend

project site -- home

project site is an url. The href is the href for the site in the list of mirrors, usually the root of the mirrored file tree. The text is the site of that url.
home (represented by the @-symbol) is an url pointing to the document root of the site. This pointer is useful if the project site url is invalid, possibly because the mirror site moved the archive.

type

Indicates the type (ftp or http) of the project site and home urls.

mirror age, daily stats

The mirror age is based upon the last successful probe.
Once a day the status of a mirror site is determined. The status (represented by a colored block) is appended to the right of the status history (right is recent). More precise, the status block is appended if the last status block was appended 24 (or more) hours ago.
The status of a mirror depends on its age and a few configuration parameters :

status age

this project in general

min max min max

fresh 0 @{[$CNF{min_sync}]} + @{[$CNF{max_poll}]} 0 min_sync + max_poll

oldish @{[$CNF{min_sync}]} + @{[$CNF{max_poll}]} @{[$CNF{max_sync}]} + @{[$CNF{max_poll}]} min_sync + max_poll max_sync + max_poll

old @{[$CNF{max_sync}]} + @{[$CNF{max_poll}]} ∞ max_sync + max_poll ∞

bad the site or mirror tree was never found

status	age
this project	in general
min	max	min	max
fresh	0	@{[$CNF{min_sync}]} + @{[$CNF{max_poll}]}	0	min_sync + max_poll
oldish	@{[$CNF{min_sync}]} + @{[$CNF{max_poll}]}	@{[$CNF{max_sync}]} + @{[$CNF{max_poll}]}	min_sync + max_poll	max_sync + max_poll
old	@{[$CNF{max_sync}]} + @{[$CNF{max_poll}]}	∞	max_sync + max_poll	∞
bad	the site or mirror tree was never found

last probe, probe stats

Last probe indicates when the last successful probe was made. Probe stats gives the probe history (right is recent). A probe is either a success or a failure.

last stat

Last stat gives the status of the last probe.

LEGENDA } sub start_date { my $url = shift ; my $CMD = shift ; my $TIMEOUT = $CNF { timeout } ; my $src = $HREF { lc site $url } || $url ; $CMD =~ s/%TIMEOUT%/$TIMEOUT/g ; $CMD =~ s/%URL%/$src/g ; printf "*** SUBSTITUTE site %s\n+ url %s\n+ %s\n", site($url), $HREF { lc site $url }, $CMD if $HREF { lc site $url } and $opt{v} ; my $WGT = new IO::Pipe ; my $res = $WGT -> reader ( split ' ', $CMD ) ; if ( $res ) { $WGT -> blocking ( 0 ) ; $GET -> add ( $WGT ) ; $URL { $WGT } = $url ; } else { err $url, 'no pipe' ; } } sub get_date { my $WGT = shift ; my $url = $URL { $WGT } ; my $time = undef ; $WGT -> blocking ( 1 ) ; unless ( $WGT -> eof () ) { $time = $WGT -> getline () ; } $GET -> remove ( $WGT ) ; $WGT -> flush ; $WGT -> close ; return err $url, 'no time' unless defined $time ; return err $url, "empty" if $time =~ /^\s*$/ ; $time = ( split ' ', $time ) [ 0 ] ; if ( $time !~ /^\d+$/ ) { $time = htmlquote $time ; $time = substr ( $time, 0, 15 ) . '..' if length $time > 15 ; err $url, "'$time'" ; } else { res $url, $time, 'ok' ; } } sub get_dates { my $CMD = shift ; my @QUE ; my $PAR = $CNF { max_probes } ; my $cnt_LST = scalar keys %LST ; for my $url ( sort keys %LST ) { if ( $opt{get} eq 'all' or ! exists $OLD { $url } ) { push @QUE, $url ; } elsif ( $opt{get} eq 'update' ) { my $stat = $OLD { $url } [ 1 ] ; my $vrfy = $OLD { $url } [ 2 ] ; my $lprb = $OLD { $url } [ 5 ] ; if ( ( $lprb eq 'undef' or aprx_le $lprb, $^T - tim_to_s $CNF { min_poll } ) and ( $stat ne 'ok' or aprx_le $vrfy, $^T - tim_to_s $CNF { max_poll } ) ) { push @QUE, $url ; } elsif ( $CNF { randomize } and 0 == int rand $cnt_LST ) { push @QUE, $url ; } else { $RES { $url } = $OLD { $url } ; } } else { Error "unknown opt_get '$opt{get}'" ; } } while ( @QUE ) { while ( $GET -> count () < $PAR and @QUE ) { my $url = shift @QUE ; if ( gethost site $url ) { start_date $url, $CMD ; } else { err $url, 'site not found' ; } } my @can_read = $GET -> can_read ( 0 ) ; printf "que %d, get %d, can %d\n", scalar @QUE, $GET -> count (), scalar @can_read if $opt{v} ; for my $can_read ( @can_read ) { get_date $can_read ; } sleep 1 ; } my $stop = time + $CNF { timeout } + 10 ; while ( $GET -> count () and time < $stop ) { sleep 1 ; my @can_read = $GET -> can_read ( 0 ) ; printf "wait %2d, get %d, can %d\n", $stop - scalar time, $GET -> count (), scalar @can_read if $opt{v} ; for my $can_read ( @can_read ) { get_date $can_read ; } } for my $WGT ( $GET -> handles () ) { my $url = $URL { $WGT } ; err $url, 'hangs' ; } } get_conf_opt ; get_ccs $CNF { countries } ; get_state $CNF { state } ; get_list $CNF { mirror_list } ; if ( $opt{get} ) { get_dates $CNF { probe } ; put_state $CNF { state } ; } else { %RES = %OLD } gen_page $CNF { web_page } ; __END__ =pod =head1 NAME mirmon - monitor the state of mirrors =head1 SYNOPSIS mirmon [ -v ] [ -q ] [ -t timeout ] [ -get opt ] [ -c conf ] =head1 OPTIONS option v : be verbose option q : be quiet option t : set timeout [ default 300 ] ; option get : 'all' : probe all sites : 'update' : probe a selection of the sites (see doc) option c : configuration file ; default list : ./mirmon.conf $HOME/.mirmon.conf /etc/mirmon.conf ------------------------------------------------------------------- Documentation : the program contains 'pod' style documentation. Extract the doc with 'pod2text mirmon' or 'pod2html mirmon OUT', etc. ------------------------------------------------------------------- =head1 USAGE The program is intended to be run by cron every hour. 42 * * * * perl /path/to/mirmon -q -get update It quietly probes a subset of the sites in a given list, writes the results in the 'state' file and generates a web page with the results. The subset contains the sites that are new, bad and/or not probed for a specified time. When no 'get' option is specified, the program just generates a new web page from the last known state. The program checks the mirrors by running a (user specified) program on a pipe. A (user specified) number of probes is run in parallel using nonblocking IO. When something can be read from the pipe, it switches the pipe to blocking IO and reads one line from the pipe. Then it flushes and closes the pipe. No attempt is made to kill the probe. The probe should return something that looks like "1043625600\n", that is, a timestamp followed by a newline. The exit status of the probe is ignored. =head1 CONFIG FILE =head2 location A config file can be specified with the -c option. If -c is not used, the program looks for a config file in -- ./mirmon.conf -- $HOME/.mirmon.conf -- /etc/mirmon.conf =head2 syntax A config file looks like this : +-------------------------------------------------- |# lines that start with '#' are comment |# blank lines are ignored too |# tabs are replaced by a space | |# the config entries are 'key' and 'value' pairs |# a 'key' begins in column 1 |# the 'value' is the rest of the line |somekey A_val B_val ... |otherkey X_val Y_val ... | |# indented lines are glued |# the next three lines mean 'somekey part1 part2 part3' |somekey part1 | part2 | part3 | |# lines starting with a '+' are concatenated |# the next three lines mean 'somekey part1part2part3' |somekey part1 |+ part2 |+ part3 | |# lines starting with a '.' are glued too |# don't use a '.' on a line by itself |# 'somekey' gets the value "part1\n part2\n part3" |somekey part1 |. part2 |. part3 +-------------------------------------------------- =head1 CONFIG FILE : required entries =head2 project_name Specify a short plaintext name for the project. project_name Apache project_name CTAN =head2 project_url Specify an url pointing to the 'home' of the project. project_url http://www.apache.org/ =head2 mirror_list Specify the file containing the mirrors to probe. Two formats are supported : -- plain : lines like us http://www.tux.org/ nl http://apache.cs.uu.nl/dist/ -- apache : lines like those in the apache mirrors.list ftp us ftp://ftp.tux.org/pub/net/apache/dist/ user@tux.org http nl http://apache.cs.uu.nl/dist/ user@cs.uu.nl Specify the required format with 'list_style' (see below). The default style is 'plain'. If the url part of a line doesn't end in a slash ('/'), mirmon adds a slash and issues a warning unless it is in quiet mode. =head2 web_page Specify where the html report page is written. =head2 icons Specify the directory where the icons can be found. =head2 probe Specify the program+args to probe the mirrors. Example: probe /sw/bin/wget -q -O - -T %TIMEOUT% -t 1 %URL%TIME Before the program is started, %TIMEOUT% and %URL% are substituted with the proper timeout and url values. Here it is assumed that each hour the root server writes a timestamp in /path/to/archive/TIME, for instance with a crontab entry like 42 * * * * perl -e 'printf "%s\n", time' > /path/to/archive/TIME Mirmon reads one line of output from the probe and interprets the first word on that line as a timestamp ; for example : 1043625600 1043625600 Mon Jan 27 00:00:00 2003 1043625600 www.apache.org Mon Jan 27 00:00:00 2003 =head2 state Specify where the file containing the state is written. The program reads this file on startup and writes the file when mirrors are probed (-get is specified). =head2 countries Specify the file containing the country codes; The file should contain lines like us - united states nl - netherlands The mirmon package contains a recent ISO list. =head1 CONFIG FILE : optional entries =head2 max_probes Optionally specify the number of parallel probes (default 25). =head2 timeout Optionally specify the timeout for the probes (default 300). After the last probe is started, the program waits for + 10 seconds, cleans up and exits. =head2 project_logo Optionally specify (the SRC of the IMG of) a logo to be placed top right on the page. project_logo /icons/apache.gif project_logo http://www.apache.org/icons/... =head2 htm_head Optionally specify some HTML to be placed before . htm_head =head2 htm_top Optionally specify some HTML to be placed near the top of the page. The supplied text is placed between

and

. htm_top testing 1, 2, 3 =head2 htm_foot Optionally specify HTML to be placed near the bottom of the page. htm_foot

=head2 put_histo top|bottom|nowhere Optionally specify where the age histogram must be placed. The default is 'top'. =head2 min_poll For 'min_poll' see next item. A is a number followed by a unit 's' (seconds), or 'm' (minutes), or 'h' (hours), or 'd' (days). For example '3d' (three days) or '36h' (36 hours). =head2 max_poll Optionally specify the maximum probe interval. When the program is called with option '-get update', all sites are probed which are : -- new : the site appears in the list, but there is no known state -- bad : the last probe of the site was unsuccessful -- old : the last probe was more than 'max_poll' ago. Sites are not probed if the last probe was less than 'min_poll' ago. So, if you specify min_poll 4h max_poll 12h the 'reachable' sites are probed twice daily and the 'unreachable' sites are probed at most six times a day. The default 'min_poll' is '1h' (1 hour). The default 'max_poll' is '4h' (4 hours). =head2 min_sync Optionally specify how often the mirrors are required to make an update. The default 'min_sync' is '1d' (1 day). =head2 max_sync Optionally specify the maximum allowable sync interval. Sites exceeding the limit will be considered 'old'. The default 'max_sync' is '2d' (2 days). =head2 no_randomize With a low probablility, mirmon probes mirrors that would otherwise not be probed. In the long run, this balances the number of mirror probes over the hourly mirmon runs. Specifically, if there are N mirrors in the list and some mirmon run would probe K sites, on average (N-K)/N extra sites will be probed. If you don't want this behaviour, use 'no_randomize'. =head2 list_style plain|apache Optionally specify the format ('plain' or 'apache') of the mirror-list. See the description of 'mirror_list' above. The default list_style is 'plain'. =head2 site_url Optionally specify a substitute url for a site. When access to a site is restricted (in Australia, for instance), another (sometimes secret) url can be used to probe the site. The of an url is the part between '://' and the first '/'. =head2 env Optionally specify an environment variable. =head2 include Optionally specify a file to include. The specified file is processed 'in situ'. After the specified file is read and processed, config processing is resumed in the file where the 'include' was encountered. The 'include' depth is unlimited. However, it is a fatal error to include a file twice under the same name. =head2 show When the config processor encounters the 'show' command, it dumps the content of the current config to standout, if option -v is specified. This is intented for debugging. =head2 exit When the config processor encounters the 'exit' command, it terminates the program. This is intented for debugging. =head1 STATE FILE FORMAT The state file consists of lines; one line per site. Each line consists of white space separated fields. The seven fields are : =head2 field 1 : url The url as given in the mirror list. =head2 field 2 : age The age of the site, or 'undef' if no probe was ever successful. =head2 field 3 : status last probe The status of the last probe. =head2 field 4 : time last succesful probe The timestamp of the last succesful probe or 'undef' if the site was never successfully probed. =head2 field 5 : probe history The probe history is a list of 's' (for success) and 'f' (for failure) characters indicating the result of the probe. New results are appended whenever the site is probed. =head2 field 6 : state history The state history consists of a timestamp, a '-' char, and a list of chars indicating a past status: 's' (fresh), 'b' (oldish), 'f' (old) or 'z' (bad). The timestamp indicates when the state history was last updated. The state history is updated when the state file is updated and the last update of the history state was 24 (or more) hours ago. The status is determined by the site's age and a few configuration parameters. The details are explained in the legend of the report page. =head2 field 7 : last probe The timestamp of the last probe. =head1 INSTALLATION =over =item * The '#!' path for perl is probably wrong. =back =head1 AUTHOR =begin html

© 2003 Henk P. Penning, Computer Science Department, Utrecht University
$Id: mirmon,v 1.38 2007/08/18 15:00:07 henkp Exp henkp $