#!/usr/bin/perl -sw

sub help {
    print q{
Name:
   niceload - run a program when the load is below a certain limit

Usage:
   niceload [-v] [-n=nice] [-l=load] [-t=time] [-s=time|-f=factor] command
   niceload [-v] [-n=nice] [-l=load] [-t=time] [-s=time|-f=factor] -p=PID

Description:
   niceload will run a program when the load average is below a 
   certain limit. When the limit is reached the program will be 
   suspended for some time. Then resumed again for some time.
   Then the load load average is checked again and we start over.

   If the load is 3.00 then the default settings will run a program
   like this:
      run 1 second, suspend (3.00-1.00) seconds, run 1 second, 
      suspend (3.00-1.00) seconds, run 1 second, ...

   -n nice-level (see nice(1))
   -l is the maximal load average before suspending command (Default:
      1.00)
   -t is the seconds to sleep before checking load again (Default: 1)
   -s is the seconds to suspend command when load average is too high
   -f sets -s dynamically as load average over limit * factor
      (Default: 1)
   -p is the process ID of a running process
   -v print some extra output

Tip:
   Use -v until you know what you are doing.

Example:
   In terminal 1 run 'top'. In terminal 2 run:

     niceload perl -e '$|=1;do{$l==$r or print "."; $l=$r}until(($r=time-$^T)>50)'

   This will print a '.' every second for 50 seconds and eat a lot of
   CPU. When the load rises to 1.0 the process is suspended.

Example:
   Run updatedb but keep load below 2.0:

     niceload -l=2 updatedb

Example:
   Run rsync but keep load below 3.4. If load reaches 7 sleep 
   for (7-3.4)*12 seconds:

     niceload -l=3.4 -f=12 rsync -Ha /home/ /backup/home/

Author:
   Copyright 2004-11-19 Ole Tange http://ole.tange.dk
   Copyright 2004-11-22 Ole Tange http://ole.tange.dk
   Copyright 2004-11-25 Ole Tange http://ole.tange.dk
   Copyright 2005-01-11 Ole Tange http://ole.tange.dk
   Copyright 2007-05-31 Ole Tange http://ole.tange.dk (ported to SunOS 5.10)
   Licensed under GPL
   Inspired by work from paulh at hamjudo.com
};

}

if($f and $s) {
    # You cannot have -s and -f
    help();
    exit;
}

my $nice       = $n || 0; # -n=0 Nice level (Default: 0)
my $max_load   = $l || 1; # -l=1 Max acceptable load average (Default: 1)
my $check_time = $t || 1; # -t=1 Seconds between checking load average (Default: 1)
my $wait_factor;
my $wait_time;
if($s) {
    $wait_time = $s;      # -s=sec Seconds to suspend process when load average is too high
} else {
    $wait_factor=$f || 1; # -f=1 compute wait_time dynamically as (load - limit) * factor
}
my $processid  = $p;      # Control this PID (Default: control the command)
my $verbose = $v || $debug;

@program = @ARGV;
$SIG{CHLD} = \&REAPER;

if($processid) {
    $Child::fork = $processid;
    init_signal_handling_attached_child();
    my $child_pgrp = getpgrp $Child::fork;
    suspend_resume($max_load,$check_time,$wait_time,$wait_factor,$child_pgrp);
} elsif(@ARGV) {
    if($Child::fork = fork) {
	sleep 1; # Give child time to setpgrp(0,0);
	init_signal_handling_my_child();
	my $child_pgrp = getpgrp $Child::fork;
	suspend_resume($max_load,$check_time,$wait_time,$wait_factor,$child_pgrp);
    } else {
	setpgrp(0,0);
	$debug and debug("Child pid: $$, pgrp: ",getpgrp $$,"\n");
	if($nice) {
	    unshift(@program,"nice","-n",$nice);
	}
	$debug and debug("@program\n");
	system(@program);
	$debug and debug("Child exit\n");
	exit;
    }
} else {
    help();
    exit;
}

sub debug {
    print STDERR @_;
}


sub init_signal_handling_attached_child {
    $SIG{INT}=\&sigint_attached_child;
}

sub sigint_attached_child {
    # Let the attached child continue when detaching
    kill_child_CONT();
    exit;
}

sub init_signal_handling_my_child {
    $SIG{INT}=\&kill_child_INT;
    $SIG{TSTP}=\&kill_child_TSTP;
    $SIG{CONT}=\&kill_child_CONT;
}

use POSIX ":sys_wait_h";

sub REAPER {
    my $stiff;
    while (($stiff = waitpid(-1, &WNOHANG)) > 0) {
        # do something with $stiff if you want
    }
    $SIG{CHLD} = \&REAPER;                  # install *after* calling waitpid
}

sub kill_child_CONT {
    $debug and debug("SIGCONT received. Killing $Child::fork\n");
    kill CONT => -getpgrp($Child::fork);
}

sub kill_child_TSTP {
    $debug and debug("SIGTSTP received. Killing $Child::fork and self\n");
    kill TSTP => -getpgrp($Child::fork);
    kill STOP => -$$;
}

sub kill_child_INT {
    $debug and debug("SIGINT received. Killing $Child::fork Exit\n");
    kill INT => -getpgrp($Child::fork);
    exit;
}

sub suspend_resume {
    my ($max_load,$check_time,$wait_time,$wait_factor,@pids) = @_;
    $debug and debug("suspend_resume these @pids\n");
    resume_pids(@pids);
    while (pids_exist(@pids)) {
	if ( loadavg() > $max_load ) {
	    if($wait_factor) {
		$wait_time = (loadavg()-$max_load) * $wait_factor;
	    }
	    $verbose and debug("suspending for $wait_time seconds\n");
	    suspend_pids(@pids);
	    sleep 1; # for some reason this statement is skipped
	    sleep $wait_time;
	    resume_pids(@pids);
	}
	$verbose and debug("running for $check_time second(s)\n");
	sleep($check_time);
    }
}
    
sub pids_exist {
    my (@pids) = @_;
    my ($exists) = 0;
    for $pid (@pids) {
	if(-e "/proc/".$pid)  { $exists++ }
	#if(kill 0 => $Child::fork) { $exists++ }
    }
    return $exists;
}

sub loadavg {
    my ($loadavg);
    if(open(IN,"/proc/loadavg")) {
	# Linux specific (but fast)
	my $upString = <IN>;
	if($upString =~ m/^(\d+\.\d+)/) {
	    $loadavg = $1;
	} else {
	    die;
	}
	close IN;
    } elsif (open(IN,"uptime|")) {
	my $upString = <IN>;
	if($upString =~ m/average.\s*(\d+\.\d+)/) {
	    $loadavg = $1;
	} else {
	    die;
	}	
	close IN;
    }
    return $loadavg;
}

sub suspend_pids {
    my @pids = @_;
    signal_pids("STOP",@pids);
}

sub resume_pids {
    my @pids = @_;
    signal_pids("CONT",@pids);
}

sub signal_pids {
    my ($signal,@pids) = @_;

    # local $SIG{$signal} = 'IGNORE';
    for $pid (@pids) {
	kill $signal => -$pid; # stop PID group
    }
}

$v=$f=$l=$h=$n=$t=$s=$p=$h=$processid; # Ignore perl -w
