package TKL::Task;

use Data::Dumper;
use TKL;
use strict;
use vars qw(@ISA);
use Getopt::Std;
use English;

@ISA = qw(TKL::File);

###################################
# Parsing of command-line arguments
# Actually nothing to do with the task files,
# but we need common interfaces to all robots.
# Normally you can just call the access functions
# below, they parse args as needed. But if you 
# have private options you parse somewhere else,
# call parse_cmd_args directly, with a string that
# defines the args you wish to be ignored here. 
# This saves a runtime warning.

my $spooldir;   # -D arg
my $pidfile;    # -p arg 
my $outfile;    # -o arg
my $verbose;    # -v 
my $cmd_args_parsed=0;

sub parse_cmd_args {
    return if $cmd_args_parsed;
    my $ignore=shift; # arguments that are parsed elsewhere 'xz:'
    $ignore="" unless defined($ignore);
    $cmd_args_parsed=1;
    my %args;
    my $program= $PROGRAM_NAME;
    $program =~ s/.*\/([^\/]+)$/$1/;
    getopts("D:s:l:r:v".$ignore, \%args);
    $verbose=defined($args{"v"});
    $spooldir=$args{"D"};
    die "No spooldir defined\n" unless (defined($spooldir));
    die "Spooldir $spooldir is not there" unless (-d $spooldir);
    die "Spooldir $spooldir is not writable" unless (-w $spooldir);
    $pidfile=$args{"p"};
    $pidfile="$program.pid" unless defined($pidfile);
    $pidfile="$spooldir/$pidfile" unless ($pidfile =~ /^\//);
    $outfile=$args{"o"};
    $outfile="$program.log" unless defined($outfile);
    $outfile="$spooldir/$outfile" unless ($outfile =~ /^\//);

}# parse_cmd_args

###########################
# Interface to arguments

sub getspooldir()
{
   parse_cmd_args();
   return $spooldir; 
}

sub getpidfile()
{
   parse_cmd_args();
   return $pidfile;
}

sub getlogfile()
{
   parse_cmd_args();
   return $outfile;
}
sub getverbose()
{
   parse_cmd_args();
   return $verbose;
}

########################
# Log file handling

sub logf
{ # 'log' is reserved for logarithms ;-(
    my $date=localtime();
    my $msg=join (' ',@_);
    open LOG, ">>$outfile" 
      or die "Could not write into log $outfile $!";
    print LOG "$date [$PID] $msg\n";
    close LOG;
    if ($verbose) { print "$msg\n"; }
}

sub debug
{
    return unless ($verbose);
    logf(@_);
}

#############################
# PID file handling

sub set_pid_file
{
    if ( -f $pidfile)
    {
        open F, $pidfile 
          or die "Can not read existing pidfile $pidfile: $!";
        my $otherpid=<F>;
        chomp($otherpid);
        close F;
        debug("TKL::Task::set_pid_file: "
	      ."PID file exists and points to process $otherpid");
        if ( $otherpid )
        { 
            if ( kill(0,$otherpid))
            {
                die "Program already running\n".
                    "PID=$otherpid, from file $pidfile\n";
            } else
            {
                logf("TKL::Task::set_pid_file: Found old PID file $pidfile"
		     .". Process $otherpid not running");
            }
        }
    }
    open F, ">$pidfile"
      or die "Can not open PID file $pidfile for writing: $!\n";
    print F "$PID\n"
      or die "Can not write PID file $pidfile:  $!\n";
    close F;
}

#############################
# General file checks

sub check_read_write_file {
    my $f = shift; 
    return ((-f $f) && (-w $f));
}
sub check_read_write_dir {
    my ($d) = @_;
    return ((-d $d) && (-w $d));
}

sub check_url {
    my ($url) = @_;
    return $url;
}


############################
# Spool file handling

sub gettasklist
{
  my @filelist;
  debug("TKL::Task::gettasklist: entering");
  foreach my $f ( <$spooldir/*.spl> ){
      if ( !check_read_write_file($f) )
      {
          debug("TKL::Task::gettasklist: no rw perms for spool file "
		.$f.". Skipping");
          next;
      }
      open F, $f;
      my $root=<F>;
      my $task=<F>;
      if ( (!$root) || (!$task))
      {
          debug("TKL::Task::gettasklist: bad spool file $f. Skipping");
          next;
      }
      chomp($root);
      chomp($task);
      my $tf = "$root/$task";
      if (! -f $tf)
      {
          debug("TKL::Task::gettasklist: no task at "
		.$tf." (from spool $f). Skipping");
          # FIXME - Should we delete this spool file here ??
          next;
      }
      if (! check_read_write_file($tf) )
      {
          debug("TKL::Task::gettasklist: unwritable task file "
		.$tf." (from spool $f). Skipping");
          next;
      }
      # ok, now I believe we have a decent file
      # you MUST pass $root, otherwise newing outside of portal fails
      my $tkl = new TKL::Task(file=>$tf,root=>$root,spoolfile=>$f);
      push @filelist, $tkl;
  }
  return @filelist;
}

##############################
# Task file handling (at last)

# check a lot of details about the task.
# If all well, return a hash with the key elements
# if not, returns an empty hash
sub verify_task
{
    my $self=shift;
    my $f = $self->{file}; # just to save typing
    my %taskinfo;
    debug("TKL::Task::verify_task: verifying $f");
    if (!$self->file_exists())
    {
        debug("TKL::Task::verify_task: task file $f has disappeared. Skipping");
        return %taskinfo;
    }
    if (!$self->file_writable())
    {
        debug("TKL::Task::verify_task: task file $f not writable. Skipping");
        return %taskinfo;
    }
    

    # get task type - take differencs between oaitask and task into account
    my $dom = $self->document_element();
    my $docelement = $dom->nodeName();
    my $type = "";

    if ($docelement eq "task"){
	$type = $dom->findvalue("tasktype");
	if (!$type)
	{
	    debug("TKL::Task::verify_task: task file $f has no type. Skipping");
	    return %taskinfo;
	}
    } elsif ($docelement eq "oaitask") {
	$type = "oai";
    } else {
        debug("TKL::Task::verify_task: task file $f is not a task. Skipping");
        return %taskinfo;
    }


    # get status
    my $status = $dom->findvalue("status");
    if ( !$status )
    {
        debug("TKL::Task::verify_task: task file $f has no status. Skipping");
    }
    
    # check url
    my $url = $dom->findvalue("url");  
    if (!check_url($url))
    {
        debug("TKL::Task::verify_task: task $f has a bad url: $url "
	      ." Skipping");
        return %taskinfo;
    }

    # check target
    my $target = $dom->findvalue("target");
    my $targetdir = $self->find_portal_root()."/".$target;  
    if (!check_read_write_dir($targetdir))
    {
        debug("TKL::Task::verify_task: task $f has relative $target"
	      .". Expands to unwriteble directory $targetdir}."
	      ."  Skipping");
        return %taskinfo;
    }

    # Figure out the handler path relative to the portal root
    my $handler = $dom->findvalue("handler");
    my $task_file = $self->portal_filename;
    $task_file =~ s/\/*[^\/]*$//;
    my $proot = $self->find_portal_root();
    my $abs_handler = "$proot/$task_file/handlers/$handler";

    if (!-x $abs_handler) {
	debug("TKL::Task::veryfy_task: Task handler [$abs_handler] is non-existent or not-executable");
	return %taskinfo;
    }

    # get postprocessing xslt stylesheet
    my $xslt = $self->find_portal_root()."/authorities/oai/".$dom->findvalue("xslt");
    debug("TKL::Task::verify_task: task $f uses postprocessing xslt: $xslt ");
    #if (!check_xslt($xslt))
    #{
    #    debug("TKL::Task::verify_task: task $f has a bad xslt: $xslt "
    #	      ." Skipping");
    #    return %taskinfo;
    #}
    
    my $set = $dom->findvalue("set");
    $set =~ s/^\s+//;
    $set =~ s/\s+$//;

    my $prefix = $dom->findvalue("prefix");
    $prefix =~ s/^\s+//;
    $prefix =~ s/\s+$//;
    $prefix =~ s/[^A-Za-z_\-]/_/g;

    $self->debug("TKL::Task::verify_task: verify $f ok:"
		 ." t=$type s=$status u=$url d=$targetdir xslt=$xslt");

    $taskinfo{"type"}=$type;
    $taskinfo{"prefix"}=$prefix;
    $taskinfo{"status"}=$status;
    $taskinfo{"url"}=$url;
    $taskinfo{"target"}=$target;
    $taskinfo{"targetdir"}=$targetdir;
    $taskinfo{"xslt"}=$xslt;
    $taskinfo{"handler"}=$abs_handler;
    $taskinfo{"set"}=$set;
    return %taskinfo;
}

sub change_task_status
{
    my $self=shift;
    my $newstat=shift;
    my $oai_dom = $self->document_element();
    my $oai_xmlnode = $oai_dom->findnodes("status")->[0];
    my $oai_xmltext = $oai_dom->findvalue("status");

    my @oai_xml_subnodes =  $oai_xmlnode->childNodes;
    foreach my $child ( @oai_xml_subnodes ) 
    {
       $oai_xmlnode->removeChild($child);
    }
    my $oai_textnode = XML::LibXML::Text->new( $newstat );
    $oai_xmlnode->appendChild($oai_textnode);
    $self->storefile($oai_dom);
    debug("TKL::Task::change_task_status: Changed status of "
	  .$self->{file}." to $newstat");
}


1;


__END__

=head1 NAME

TKL::Task - Perl package implementing methods for accessing a TKL version of a harvest task.

=head1 SYNOPSIS

  use TKL::Task;

  my $tkl = new TKL::Task();

=head1 DESCRIPTION

Access to the task files, and related things all robots are supposed 
to be needing.

=head1 AUTHOR

Anders Sønderberg Mortensen <sondberg@indexdata.dk>
Heikki Levanto <heikki@indexdata.dk>
Indexdata
2003/06/26

=head1 SEE-ALSO

perl(1).

=cut

