#!/usr/bin/perl -w
# $Id: parserd.in 2062 2008-08-27 16:45:40Z clerger $

package Parserd;

use 5.006;
use strict;
#use warnings;
#use Carp;

#use sigtrap;
use IO::Socket;
##use Net::hostent;              # for OO version of gethostbyaddr
use IPC::Open2;
##use IPC::Open3;

use IPC::Run qw/start kill_kill signal/;

use POSIX qw(setsid strftime :sys_wait_h);

##use FindBin qw($Bin $Script);
use Net::Server::Fork;
use Net::Server::SIG qw(register_sig check_sigs);
## use Net::Server;
use vars qw(@ISA $VERSION);

use IO::Handle;
#use IO::Pipe;
use IO::String;
##use IO::Poll qw(POLLRDNORM POLLWRNORM POLLIN POLLHUP);

use Time::HiRes qw( usleep gettimeofday tv_interval);

use IO::Compress::Bzip2 qw(bzip2 $Bzip2Error);

@ISA = qw(Net::Server::Fork);
$VERSION = '0.0.1';

use Forest::LP::Parser;
use Forest::RCG::Parser;
use Forest::XTAG::Parser;
use Forest::XML::Writer;
use Forest::HTML::Writer;
use Forest::Dot::Writer;
use Forest::Dependency::Writer;
use Forest::Dependency::Reader;
use Forest::Tagger::Writer;

use AppConfig qw/:argcount :expand/;

my $config = AppConfig->new({ CREATE           => '_(cmd|options|label|lexer|forest|examples|origin|grammar|easy|language|info|disambiguate)',
			      GLOBALS          => { ARGCOUNT => ARGCOUNT_ONE }
			    },
			      # package variables
                              "package=f",
                              "path=f@"        => { DEFAULT => [qw{/bin /usr/bin}] },
			      # Server Admin variables
                              "port|p=i"       => { DEFAULT => 8999 },
                              "setsid!"        => { DEFAULT => 1 },
			      "background!"    => { DEFAULT => 0 },
                              "verbose|v!"     => { DEFAULT => 0 },
                              "log_file=f"     => { DEFAULT => '/var/log/parserd.log'},
                              "log_level=i"    => { DEFAULT => 3},
                              "maxclients=i"   => { DEFAULT => 1 },
			    "maxsize=i"     => { DEFAULT => 400 },
                              "user=s"         => { DEFAULT => 'nobody'},
                              "group=s"        => { DEFAULT => 'nogroup'},
                              "pid_file=f"     => { DEFAULT => '/var/run/parserd.pid'},
                              "syslog_ident=s" => { DEFAULT => 'parserd' },
                              "admin=s"        => { DEFAULT => 'parserd'},
                              "server!"        => { DEFAULT => 0 },
			      "cache=i"        => { DEFAULT => 5 },
			      "launchd!"       => { DEFAULT => 0 },
			      # parserd specifics
			      "parsers=s"
			    );

my $conffile = "/etc/parserd.conf";

if (@ARGV && $ARGV[0] =~ m/^--config/) {
  shift @ARGV;
  $conffile = shift @ARGV;
  ##  print STDERR "USING config file $conffile\n";
}

# read configuration file
if (-r $conffile) {
  $config->file("$conffile")
    || die "can't open or process configuration file $conffile";
}

$config->args();                # parse remaining args

if ($config->server()){
  # Securize
  delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
}

if ($config->launchd()) {
  $config->setsid(0);
  $config->log_file(0);
}

# setting path
$ENV{'PATH'} = join(':',@{$config->path()});

my $verbose = $config->verbose();
my $parser_handle = IO::String->new;

print "PATH $ENV{PATH}\n" if $verbose;

### set up some server parameters
sub configure_hook {
  my $self = shift;
  unless ($config->launchd()) {
    open(STDIN, '</dev/null') || die "Can't close STDIN [$!]";
    open(STDOUT,'>/dev/null') || die "Can't close STDOUT [$!]";
    open(STDERR,'>/dev/null') || die "Can't close STDERR [$!]";
    #  open(STDERR,'>&STDOUT')   || die "Can't close STDERR [$!]";
  }
  $self->MyInit();
}

sub log {
    my $self  = shift;
    my $level = shift;
    my $msg   = shift;
    my $date  = strftime "[%F %H:%M:%S]", localtime;
    $self->SUPER::log($level,"$date ($$) $msg");
}

### this occurs after the request has been processed
### this is server type specific (actually applies to all by INET)
sub post_accept {
  my $self = shift;
  my $prop = $self->{server};

  $self->log(1,"New connection");
  ### duplicate some handles and flush them
  ### maybe we should save these somewhere - maybe not
  if( defined $prop->{client} ){
    *MYSTDIN  = \*{ $prop->{client} };
    *MYSTDOUT = \*{ $prop->{client} } if ! $prop->{client}->isa('IO::Socket::SSL');
##    *MYSTDERR = \*{ $prop->{client} } if ! $prop->{client}->isa('IO::Socket::SSL');
    MYSTDIN->autoflush(1);
    MYSTDOUT->autoflush(1);
##    MYSTDERR->autoflush(1);
    select(STDOUT);
  }else{
    $self->log(1,"Client socket information could not be determined!");
  }
  
  ### keep track of the requests
  $prop->{requests} ++;

}

### user customizable hook
sub post_client_connection_hook {
  my $self = shift;
  $self->log(1,"Post connection");
  $self->killgroup;
}

######################################################################
#    Configuration

my %cfg = (
	   'cmd'          => { 'mandatory' => 1},
	   'label'        => { 'mandatory' => 1},
	   'options'      => { 'default'   => [], 
			       'action'    => \&split_options },
	   'forest'       => { 'default'   => 'lp' },
	   'examples'     => { 'default'   => [],
			       'action'    => \&open_examples },
	   'lexer'        => { 'default'   => \&lexer, 
			       'action'    => \&get_lexer },
	   'grammar'      => {},
	   'easy'         => {},
	   'disambiguate' => {},
	   'language'     => { 'default' => 'french' },
	   'info'         => {}
	  );

sub MyInit {
  my $self = shift;

  $self->{last} = {
		   'time'     => 0, 
		   'ftime' => 0,
		   'sentence' => '',
		   'parser'   => '',
		   'forest'   => '',
		   'lexer'    => '',
		   'sid'      => undef,
		  };

  $self->{cache} = {};
  $self->{cachelist} = [];

  my $lastkeys = join('|',keys %{$self->{last}});
  $self->{lastkeys} = qr/($lastkeys)\b/o;

  $self->{parser} = {};
  foreach my $parser (split(/\s+/,$config->parsers())) {
    my %info = ();
    my %vars = $config->varlist("^${parser}_" => 1);
    foreach my $key (keys %cfg) {
      my $cfg = $cfg{$key};
##      my $label = $parser."_$key";
##      $config->define($label);
      if ( defined($cfg->{'mandatory'}) && !exists $vars{$key}) {
	die "Bad configuration file";
      } elsif (exists $vars{$key}) {
	my $val = $vars{$key};
	$val = &{$cfg->{'action'}}($self,$val,\%info) if (defined $cfg->{'action'});
	$info{$key} = $val;
      } elsif (defined $cfg->{'default'}) {
	$info{$key} = $cfg->{'default'};
      } 
    }
    $self->{parser}{$parser} = { %info };
  }

  my $parsers = join('|',keys %{$self->{parser}});
  $self->{parsers} = qr/($parsers)\b/o;
}

######################################################################


my %inputs = ( 'lp'          => \&Forest::LP::Parser::parse,
	       'rcg'         => \&Forest::RCG::Parser::parse,
	       'xtag'        => \&Forest::XTAG::Parser::parse,
	       'xrcg'        => \&Forest::LP::XRCG::Parser::parse,
#	       'line' => \&Forest::Line::Parser::parse,
	       );

my %outputs = (	'html'       => \&html_output,
		'xml'        => \&xml_output,
		'dot'        => \&dot_output,
		'dependency' => \&dependency_output,
		'xmldep'     => \&xmldep_output,
		'lpdep'      => \&lpdep_output,
		'stats'      => \&stats_output,
		'depnorm'    => \&depnorm_output,
		'tagger'     => \&tagger_output,
		'yesno'      => \&yesno_output,
		'easy'       => \&easy_output,
#		'line' => \&line_output,
		);

my $inputs = join('|',keys %inputs);
$inputs = qr/($inputs)/o;

my %options = ( 'forest' => { 'value'   => 'xml',
			      'default' => 'xml',
			      'help'    => 'forest format',
			      'values'  => [ sort ('raw',keys %outputs) ]
			      },
		'disambiguate' => { 'value'   => 'no',
				    'default' => 'no',
				    'help'    => 'forest disambiguate',
				    'values'  => [qw/no yes/]
				  },
		'mode'         => { 'value'   => 'std',
				    'default' => 'std',
				    'help'    => 'mode for returning data',
				    'values'  => [qw/std data bzip2/]
		                  },
		'adj_max'  => { 'value' => 10 ,
				'default' => 10,
				'help' => 'max adjunctions per node',
				'values' => 'any positive number'
			      }

	      );

######################################################################
# Server Loop with forking

sub set_soft_restart {
  my $self = shift;
  $SIG{'HUP'} = sub { $self->log(1,"Got signal HUP");
		      $self->{need_restart} = 1;
		    };
}

sub process_request {
    my $self=shift;
    $self->{timeout} ||= 60;  # by default give the user 60 seconds to type a line
    $self->{need_restart} = 0;

    register_sig(
		 CHLD =>  sub { while ((my $pid = waitpid(-1, WNOHANG)) > 0) {
		   delete $self->{child}{$pid};
		 }
			      },
		);

     $SIG{'INT'} 
       = $SIG{'TERM'} 
	 = $SIG{'PIPE'} 
	   = $SIG{'QUIT'} = sub { $self->log(1,"Got signal");
				  $self->killgroup;
				  exit;
				};

    $self->set_soft_restart;	# on HUP signals
      
    eval {
	local $SIG{ALRM} = sub { 
#	  print MYSTDOUT "Sorry! Timed Out!\n";
#	  $self->killgroup;
	  die "Timed Out!\n"; 
	};
##	local $SIG{CHLD} = 'IGNORE';
	my $previous_alarm = alarm($self->{timeout});
	my $prompt='';
	my $o;
	my $v;

	$self->{child} = {};

	while( <MYSTDIN> ){
	    s/\r?\n$//;
	    $prompt='yes', next unless /\S/;       # blank line
	    if    (/^(quit|exit)/oi)    { last; }
	    elsif (($o,$v) = /^set\s+(\w+)\s+(\w+)/oi) {
		$self->usage(), next 
		    unless (defined $options{$o} 
			    && ((ref($options{$o}{values}) ne 'ARRAY')
				|| grep( /^$v$/, @{$options{$o}{'values'}})));
		$options{$o}{'value'} = $v;
	    } elsif (($o) = /^show\s+(\w+)/) {
		$self->usage(), next unless (defined $options{$o});
		print MYSTDOUT "option $o: $options{$o}{value}\n";
	    } elsif (($o) = /^reset\s+(\w+)/) {
		$self->usage(), next unless (defined $options{$o});
		$options{$o}{'value'} = $options{$o}{'default'};
	    } elsif (($o) = /^input\s+$inputs\s+(\w+)\s+(.*)/) {
		$self->handle_input_forest($1,$2,$3);
	    } elsif (/^grammar\s+$self->{parsers}/) {
		next unless defined $self->{parser}{$1}{'grammar'};
		print MYSTDOUT "$self->{parser}{$1}{grammar}\n";
	    } elsif (/^examples\s+$self->{parsers}/) {
		next unless defined $self->{parser}{$1}{'examples'};
		foreach my $sentence (@{$self->{parser}{$1}{'examples'}}) {
		    print MYSTDOUT "$sentence\n";
		}
	    } elsif (/^language\s+$self->{parsers}/) {
		next unless defined $self->{parser}{$1}{'language'};
		print MYSTDOUT "$self->{parser}{$1}{'language'}\n";
	    } elsif (/^info\s+$self->{parsers}/) {
		next unless defined $self->{parser}{$1}{'info'};
		print MYSTDOUT "$self->{parser}{$1}{'info'}\n";
	    } elsif (/^timeout\s+(\d+)/) {
	      $self->{timeout} = $1;
	    } elsif (/^last\s+forest\s+(\w+)\s+(\w+)/) {
	      my $outputformat = $1;
	      my $mode = $2;
	      my $forest = $self->{last}{forest};
	      $self->log(3,"Require last forest $outputformat $mode");
	      next unless $forest;
	      if ($mode ne $options{mode}{value}) {
		$self->{saved_mode} = $options{mode}{value};
		$options{mode}{value} = $mode;
	      }
	      alarm($self->{timeout}); # reset timeout to its max if new sentence to process
	      my $t0 = [gettimeofday];  
	      $forest=$self->try_disambiguate($forest,
					      $self->{last}{parser},
					      $self->{last}{sentence},
					      $self->{last}{time}
					     );
	      $self->emit_forest($outputs{$outputformat},$forest,$self->{last}{parser});
	      my $elapsed = tv_interval($t0);
	      $self->{last}{ftime} = $elapsed;
	      $self->{cmd_time} = $elapsed;
	    } elsif (/^last $self->{lastkeys}/) {
		my $key = $1;
		if ($key eq 'forest') {
		    my $forest = $self->{last}{forest};
		    $self->log(3,"Require last forest $options{forest}{value}");
		    next unless $forest;
		    my $outputformat = $options{'forest'}{'value'};

		    alarm($self->{timeout}); # reset timeout to its max if new sentence to process
		    my $t0 = [gettimeofday];  
		    $forest=$self->try_disambiguate($forest,
						    $self->{last}{parser},
						    $self->{last}{sentence},
						    $self->{last}{time}
						    );
		    $self->emit_forest($outputs{$outputformat},$forest,$self->{last}{parser});
		    my $elapsed = tv_interval($t0);
		    $self->{last}{ftime} = $elapsed;
		    $self->{cmd_time} = $elapsed;
		} else {
		    next unless (defined $self->{last}{$key});
		    print MYSTDOUT "last $key=$self->{last}{$key}\n";
		}
	    } elsif (/^clean\s+cache/){
	      $self->{cache}     = {};
	      $self->{cachelist} = [];
	    } elsif (/^limit\s+cache\s+(\d+)/) {
	      $config->set('cache',$1);
	    } elsif (s/^$self->{parsers}//oi ) {
		$self->handle_parser($1,$_);
		##		last;
	    } elsif (s/^(\d+(?:\.\d+)?)\s+$self->{parsers}//oi) {
		$self->handle_parser($2,$_,$1);
	    } elsif (s/^dag\s+(\d+(?:\.\d+)?)\s+$self->{parsers}\s*//oi) {
		my $sid      = $1;
		my $parser   = $2;
		my $sentence = $_;
		my @dag      = ();
		while (<MYSTDIN>) {
		    chomp;
		    push(@dag,$_);
		    last if  ($_ =~ /^##\s*(DAG|MAF)\s+END/);
			  }
		    $self->handle_parser($parser,join("\n",@dag),$sid,$sentence);
		} else {
		    $self->usage();
		}
		alarm($self->{timeout});
	    } continue {
		$self->{child} = {};
		my $mode = $options{mode}{'value'};
		if ( $mode eq 'data' || $mode eq 'bzip2') {
		  my $time = '';
		  if (exists $self->{cmd_time}) {
		    $time = " time=$self->{cmd_time}";
		    delete $self->{cmd_time};
		  }
		  print MYSTDOUT "<END_DATA>$time\n" ;
		}
		if (exists $self->{saved_mode}) {
		  $options{mode}{value} = $self->{saved_mode};
		  delete $self->{saved_mode};
		}
		if ($self->{need_restart}) {
		  $self->log(1,"Stop after receiving HUP");
		  last;
		}
		if ($prompt) {
		    print MYSTDOUT "Welcome to $0; type help for command list.\n";
		    print MYSTDOUT "Command? ";
		}

	    }
	    alarm($previous_alarm);
	};

    if( $@=~/Timed\s+Out/io ){
      print MYSTDOUT "Sorry! Timed Out!\n";
      ## print MYSTDOUT "ALIVE @{$self->{child}}.\n";
      ##	  my $ctl= kill 14, @{$self->{child}}; 
      $self->log(3,"Timeout");
      $self->killgroup;
      ## print MYSTDOUT "KILL $ctl\n";
      return;
    } elsif ($@) {
      $self->log("Something strange: $@");
      $self->killgroup;
    }
}

sub usage {
  my $self=shift;
  print MYSTDOUT <<EOF ;

$0 is a server of parsers. Given a parser and a sentence, it returns
the derivation forest for the sentence. When possible, this forest is
emitted in XML.

Contact: Eric de la Clergerie <Eric.De_La_Clergerie\@inria.fr>

Commands: 
      quit 
    | help 
    | <parser> <sentence>
    | show <option>
    | set <option> <value>
    | reset <option>
    | (examples|grammar|language|info) <parser>
    | last (time|sentence|parser|forest|ftime)
    | clean cache
    | limit cache <limit>

Parsers:
EOF

    foreach my $parser (sort keys %{$self->{parser}}) {
	print MYSTDOUT "\t$parser\t-- $self->{parser}{$parser}{label}\n";
    }

print MYSTDOUT <<EOF;

Options:
EOF
   
    foreach my $option (sort keys %options) {
      my $values = (ref($options{$option}{values}) eq 'ARRAY') 
	? $options{$option}{values}
	  : [ $options{$option}{values} ]
	    ; 
	print MYSTDOUT <<EOF;
\t$option\t-- $options{$option}{help}
\t\tcurrent value: $options{$option}{value}
\t\tall values   : @$values
\t\tdefault      : $options{$option}{default}

EOF

    }

}

######################################################################
# Misc. Functions

# Parsing and producing a forest

sub handle_parser {
  my $self     = shift;
  my $parser   = shift;
  my $sentence = shift;
  my $sid      = shift;
 
  if ($sentence =~ /^\s*$/) {
    print "sentence? ";
    $sentence = <MYSTDIN>;
  }

  my $xsentence = shift || $sentence;

  return if ($config->get('cache') && $self->try_cache($sentence,$xsentence,$parser,$sid));

  alarm($self->{timeout}); # reset timeout to its max if new sentence to process

  $self->{last}{sid} = $sid;

  $self->log(3,"Running lexer on $xsentence");
  my $lexinfo = &{$self->{parser}{$parser}{lexer}}($self,0,$sentence,$parser);

  my $time = -1;
  my $output='';
  my $error_log='';
  
  ##  $self->log(4,"lexinfo: $lexinfo");

  while (1) {

    if (!exists $self->{persistent}{parser}{$parser}) {
      my $pparser = $self->{persistent}{parser}{$parser} = {};
      my $info = $self->{parser}{$parser};
      my $cmd = [$info->{cmd},@{$info->{options}}];
      $pparser->{in} = '';
      $pparser->{out} = '';
      $pparser->{err} = '';
      $pparser->{key} = $parser;
      $pparser->{nb} = 0;
      $pparser->{time} = 0;
      $self->log(3,"Starting persistent parser $parser");
      my $pid = $pparser->{pid} = 
	start $cmd,
	  '<',\$pparser->{in},
	    '>pty>',\$pparser->{out},
	      '2>',\$pparser->{err}
		|| die "couldn't run parser $parser";
    }
    my $pparser = $self->{persistent}{parser}{$parser};
    my $h = $pparser->{pid};
    
    eval {
      $self->log(3,"Using persistent parser $parser");
      $pparser->{in} = $lexinfo;
      $h->pump until ($pparser->{out} =~ /^Time\s+(\d+)/mo);
      $time = $self->{last}{time} = $1 / 1000;
      $output = $pparser->{out};
      $error_log .= $pparser->{err};
      $pparser->{out} = '';
      $pparser->{err} = '';
      my $nb = $pparser->{nb}++;
      my $atime = $pparser->{time} += $time;
      $self->log(3,"Done parsing (time=$time nb=$nb atime=$atime)");
      if ( $time > 2 || $nb > 200 || $atime > 20) {
	## kill persistent parsers when there is a risk
	## they use too much memory
	## because we do not directly access the memory, we use indirect hints
	$self->log(3,"Kill persistent parser $parser (time=$time nb=$nb atime=$atime)");
	kill_kill $h, grace => 1;
	delete $self->{persistent}{parser}{$parser};
      }
    };

    if ($@ =~ /Resource temporarily unavailable/) {
      ## Sometimes we got this message
      ## which seems to means the parser is not ready reading the input data
      ## => better to kill the parser and restart it
      my $x = $@;
      $self->log(1,"Retry with persistent parser $parser: $x");
      my $h = $pparser->{pid};
      kill_kill $h, grace=>1;
      delete $self->{persistent}{parser}{$parser};
      undef $pparser;
      next;
    } elsif ($@ || $error_log) {
      ## we wish some info about other cases of errors found in the parser
      my $x = $@;
      $self->log(1,"*** Error somewhere with parser $parser: $error_log\n") if ($error_log);
      $self->log(1,"Need restart persistent parser $parser: $x");
      # in that case, we raise an error that will kill the connection
      die $x;
    } else {
      last;
    }
  }

  $parser_handle->open($output);

  $self->handle_forest($parser_handle,
		       $self->{parser}{$parser}{'forest'},
		       $parser,
		       $xsentence,
		       $time,
		       $self->{parser}{$parser}{'grammar'},
		      );

  ## no cache if cache=0
  $self->push_cache if ($config->get('cache'));
  $self->{cmd_time} = $time;
}

sub try_cache {
  my $self      = shift;
  my $sentence  = shift;
  my $xsentence = shift;
  my $parser    = shift;
  my $sid       = shift;
  
  return 0 unless  (exists $self->{cache}{"$sentence"}{$sid}{$parser});
  my $info   = $self->{cache}{"$sentence"}{$sid}{$parser};
  my $forest = $info->{forest};
  return 1 unless $forest;
  $self->{last} = { sid      => $sid,
		    parser   => $parser,
		    forest   => $forest,
		    time     => $info->{time},
		    lexer    => $info->{lexer},
		    sentence => $sentence
		  };
  my $outputformat = $options{'forest'}{'value'};
  my $sparser = $self->{last}{parser};
  $forest=$self->try_disambiguate($forest,$sparser,$sentence,$info->{time});
  $self->emit_forest($outputs{$outputformat},$forest,$sparser);
  return 1;
}

sub push_cache {
  my $self     = shift;
  
  my $last     = $self->{last};
  my $sentence = $last->{sentence};
  my $parser   = $last->{parser};
  my $sid      = $last->{sid};
  my $new      = { time   => $last->{time},
		   lexer  => $last->{lexer},
		   forest => $last->{forest}
	       };
  my $size     = @{$self->{cachelist}};
  my $xsize    = (keys %{$self->{cache}});
  $self->log(4,"Size cache is $size/$xsize");
  if ($size > $config->get('cache')) {
    ## remove first element from cache
    my $first = shift @{$self->{cachelist}};
    delete $self->{cache}{$first->{sentence}};
  }
  if ($xsize > 5*$config->get('cache')) {
    $self->log(4,"*** Cache is growing too large: cleaning");
    $self->{cache}     = {};
    $self->{cachelist} = [];
  }
  push(@{$self->{cachelist}},
       { sentence => $sentence, 
	 sid      => $sid, 
	 parser   => $parser }
       );
  $self->{cache}{$sentence}{$sid}{$parser} = $new;
}

sub handle_error{
  my $self   = shift;
  my $stderr = shift;
  while (<$stderr>) {
    return $1 if /^user\s+(\S+)/;
  }
  return 0;
}

				# Reading directly the forest

sub handle_input_forest {
  my $self     = shift;
  #    my $handle = shift;
  my $input    = shift;
  my $parser   = shift;
  my $sentence = shift;

  chop $sentence if ($sentence =~ /\n$/);

  # this seems to be necessary
  $| = 1; # $| = 0; # Flush
  pipe(*READER, *WRITER) || die "couldn't pipe";

  while (<STDIN>) {
    last if /^END INPUT FOREST/;
    print WRITER;
  }
  close WRITER || die "cannot close: $!";
  
  $self->handle_forest(\*READER,$input,$parser,$sentence,-1,undef);
}

				# Parsing the forest and emitting it in the selected format

sub handle_forest {
  my $self        = shift;
  my $handle      = shift;
  my $inputformat = shift;
  my $sparser     = shift;
  my $parser      = $self->{parser}{$sparser}{label} || $sparser;
  my $sentence    = shift;
  my $time        = shift;
  my $grammar     = shift;

  $self->{last}{time}     = $time;
  $self->{last}{sentence} = $sentence;
  $self->{last}{parser}   = $sparser;

  my $outputformat = $options{'forest'}{'value'};
  my $t0 = [gettimeofday];  
  if ($outputformat eq 'raw') {
    $self->{last}{forest} = '';
    print MYSTDOUT "TIME $time\nPARSER $parser\nSENTENCE $sentence\n";
    raw_output($handle);
  } else {
    $self->log(3,"Reading forest");
    my $forest = &{$inputs{$inputformat}}($handle,
					  { keep => $options{adj_max}{value} }
# 					  'sentence' => $sentence,
# 					  'time' => $time,
# 					  'parser' => $parser
					 );
    ## save non disambiguated forests
    $self->{last}{forest} = $forest;
    $forest->{grammar} =  $grammar if (defined $grammar);
    $forest=$self->try_disambiguate($forest,$sparser,$sentence,$time);
    $self->emit_forest($outputs{$outputformat},$forest,$sparser);
  }
  close $handle || die "cannot close: $!";
  my $elapsed = tv_interval($t0);
  $self->{last}{ftime} = $elapsed;
}

sub emit_forest {
  my $self=shift;
  my ($handler,$forest,$sparser) = @_;
  ##  $self->log(3,"Printing forest");

  ## immediate kill if forest grows too large and take too much time
  $SIG{HUP} = sub { $self->log(1,"Got signal HUP: immediate restart");
		    $self->killgroup;
		    exit;
		  };
  if ($options{mode}{value} eq 'bzip2') {
    my $buffer = capture(sub {$handler->($forest,$self,$sparser)});
    my $l = length($buffer);
    print MYSTDOUT <<EOF;
Bzip2: length=$l
$buffer
EOF
  } else {
    open(STDOUT,">&MYSTDOUT") || die "can't dup MYSTDOUT: $!";
    $handler->($forest,$self,$sparser);
    close(STDOUT);
    open(STDOUT,'>/dev/null') || die "Can't close STDOUT [$!]";
  }
  $self->log(3,"Done forest");

  ## softer kill if size grows too large outside forest handling
  $self->set_soft_restart;

}

sub try_disambiguate {
  my $self     = shift;
  my $forest   = shift;
  my $sparser  = shift;
  my $parser   = $self->{parser}{$sparser}{label} || $sparser;
  my $sentence = shift;
  my $time     = shift;

  if (($options{disambiguate}{value} eq 'yes') 
      && 
      exists $self->{parser}{$sparser}{disambiguate}
     ) {
    $self->log(3,"Disambiguate forest");
    # apply disambiguation
#    open(STDOUT,">/tmp/forest.dep.xml") || die "can open: $!";
#    $forest->xmldep;
#    close(STDOUT);
    my $cmd = $self->{parser}{$sparser}{disambiguate};
    ##    $self->log(3,"Disambiguate cmd: $cmd");
    my $pid = open2(*DISOUT,*DISIN,$cmd) || die "couldn't run disambiguate cmd";
    $self->add_child($pid);
    DISOUT->autoflush(1);
    DISIN->autoflush(1);
    open(STDOUT,">&DISIN") || die "can't dup DISIN: [$!]";
    $forest->xmldep;
    
    close STDOUT || die "can't close: [$!]";
    close DISIN || die "can't close: [$!]";
    $forest = Forest::Dependency::Reader::parse(\*DISOUT,
						'sentence' => $sentence,
						'time' => $time,
						'parser' => $parser
					       );
    close DISOUT || die "can't close: [$!]";
    $self->wait_end_child($pid);
  }
  return $forest;
}


				# Lexers
sub dummy_lexer {
    my $self = shift;
  my $writer   = shift;
  my $sentence = shift;
  return "$sentence";
}

sub lexer {

    my $self     = shift;
    my $writer   = shift;
    my $sentence = shift;
    my $i        = 0;

    my $lexinfo  = '';

    foreach ($sentence =~ /\S+/g) {
        last if (/\#/);
        if (/^(\d+)$/) {
            $lexinfo .= "'C'($i,$1,". ++$i . ").\n";
        } else {
	    $lexinfo .= "'C'($i,'$_',". ++$i .").\n" ;
        }
      }
    $lexinfo .= "'N'($i).\n";
    return $lexinfo;
}

sub remote_lexer {
    my $self     = shift;
    my $writer   = shift;
    my $sentence = shift;
    my $parser   = shift;
    my @lexer    = @{$self->{parser}{$parser}{'remote_lexer'}};
    $self->log(3,"In remote lexer sentence = $sentence parser=$parser");
    open2(*LEXOUT,*LEXIN,@lexer) || die "couldn't run lexer";
    LEXOUT->autoflush(1);
    LEXIN->autoflush(1);
    print LEXIN "$sentence";
    close LEXIN || die "can't close: $!";
    my $lexinfo = <LEXOUT>;
    close LEXOUT || die "can't close: $!";
    return $lexinfo;
}

sub persistent_lexer {
  my $self     = shift;
  my $writer   = shift;
  my $sentence = shift;
  my $parser   = shift;
  my $lexer    = $self->{parser}{$parser}{'remote_lexer'};
  my $key      = join(' ',@$lexer);
  if (!exists $self->{persistent}{lexer}{$key}) {
    ## Start a persistent lexer
    ## More than one one parser may share the same lexer
    my $plexer = $self->{persistent}{lexer}{$key} = {};
    $self->log(4,"Starting persistent lexer for parser=$parser");
    $plexer->{lexin}  = '';
    $plexer->{lexout} = '';
    $plexer->{lexerr} = '';
    $plexer->{pid}    = 
      start $lexer, 
	'<',\$plexer->{lexin},
	  '>pty>',\$plexer->{lexout},
	    '2>',\$plexer->{lexerr},
	      || die "couldn't run lexer";
  }
  ## use a persistent lexer
  my $plexer = $self->{persistent}{lexer}{$key};
  $self->log(4,"Using persistent lexer for parser=$parser");
  $plexer->{lexin} = "$sentence\n";
  if ($plexer->{pid}->pumpable) {
##    $plexer->{pid}->pump until ($plexer->{lexout} =~ /SENTENCE\s+DIV/o);
    $plexer->{pid}->pump until ($plexer->{lexout} =~ /^%%%EOF/mo);
    $plexer->{lexout} =~ s/\r\n/\n/og;
    $self->log(4,"Get from lexer\n$plexer->{lexout}");
    my $lexinfo = $plexer->{lexout};
    $self->{last}{lexer} = "\n$plexer->{lexout}";
    $plexer->{lexout} = '';
    return $lexinfo;
  } else {
    $self->log(1,"Trouble pumping from lexer: $plexer->{lexerr}\n");
    delete $self->{persistent}{lexer}{$key};
    return '';
  }
}

				# Writing forests

sub raw_output {
    my $handle = shift;
    print MYSTDOUT <$handle>;
}

sub xml_output {
    my $forest = shift;
    $forest->pretty_print(0);
}

sub html_output {
    my $forest = shift;
    $forest->html;
}

sub dot_output {
    my $forest = shift;
    $forest->dot;
}

sub dependency_output {
    my $forest = shift;
    $forest->dependency;
}

sub xmldep_output {
    my $forest = shift;
    $forest->xmldep;
}

sub lpdep_output {
    my $forest = shift;
    $forest->lpdep;
}

sub tagger_output {
    my $forest = shift;
    $forest->tagger;
}

sub yesno_output {
  my $forest = shift;
  my $status = "Failure";
  if ($forest->{nb} >= 0) {
    $status = "Success";
    $status = "Partial" if ($forest && $forest->{mode} && $forest->{mode} eq 'robust');
  }
  print MYSTDOUT "$status\n";
}

sub stats_output {
    my $forest = shift;
    $forest->stats;
}

sub depnorm_output {
    my $forest = shift;
    $forest->dependency_normalize;
}

sub line_output {
    my $forest = shift;
    $forest->line;
}

sub easy_output {
  my ($forest,$self,$parser) = @_;
  if (!exists $self->{parser}{$parser}{easy}) {
    print "EASy output format not available for this parser:\n\t$parser\n";
    return;
  }
  alarm($self->{timeout}); # reset timeout to its max if conversion
  my $easycmd = $self->{parser}{$parser}{easy};
  $easycmd .= " -e $self->{last}{sid}" if (defined $self->{last}{sid});
  my $pid = open2(*EASYOUT,*EASYIN,$easycmd) || die "couldn't run easycmd";
  $self->add_child($pid);
  EASYOUT->autoflush(1);
  EASYIN->autoflush(1);
  open(STDOUT,">&EASYIN") || die "can't dup EASYIN: $!";
  $forest->xmldep;
  close STDOUT || die "can't close: $!";
  close EASYIN || die "can't close: $!";
  open(STDOUT,">&MYSTDOUT") || die "can't dup MYSTDOUT: $!";
  while (<EASYOUT>) {
    next if /^\s*$/;
    print;
  }
  close EASYOUT || die "can't close: $!";
  $self->wait_end_child($pid);
}

sub clean_sentence {
    my $sentence=shift;
    $sentence =~ s/\s+/ /og;
    $sentence =~ s/^\s+//og;
    $sentence =~ s/\s+$//og;
    return $sentence;
}

######################################################################
# Handling configuration file

sub split_options {
  my $self    = shift;
  my $options = shift;
  return [ split(/\s+/,$options) ];
}

sub get_lexer {
  my $self  = shift;
  my $lexer = shift;
  my $info  = shift;
  my @lexer = split(/\s+/,$lexer);
  $lexer    = shift @lexer;
  if ($lexer eq 'default') {
	return \&lexer;
      } elsif ($lexer eq 'remote') {
	$info->{'remote_lexer'} = [ @lexer ] ;
	return \&remote_lexer;
      } elsif ($lexer eq 'persistent') {
	$info->{'remote_lexer'} = [ @lexer ] ;
	return \&persistent_lexer;
      } elsif ($lexer eq 'dummy') {
	return \&dummy_lexer;
      } else {
	die "bad configuration file";
      }
}

sub open_examples {
  my $self = shift;
  my $file = shift;
  open(EXAMPLES,"<$file") || return [];
  my @sentences = ();
  while (<EXAMPLES>) {
    next if (/^\d*\s*$/);
    next if (/^\#/);
    push(@sentences, clean_sentence($_));
  }
  close(EXAMPLES);
  return [@sentences];
}

sub notify_cfg {
  my $state = shift;
  my $var   = shift;
  my $val   = shift;
  
  print "$var set to $val\n";
}

######################################################################
# Start the server

my $user = $config->user;
my $group = $config->group;

print "Running the server user=$user group=$group\n";

__PACKAGE__->run(
		 port         => $config->port(),
                 log_file     => $config->log_file() ? $config->log_file : undef,
                 syslog_ident => $config->syslog_ident(),
                 log_level    => $config->log_level(),
                 user         => $config->user(),
                 group        => $config->group(),
                 pid_file     => $config->pid_file(),
                 setsid       => $config->setsid ? 1 : undef,
                 background   => $config->background ? 1 : undef,
                 maxclients   => $config->maxclients()
		 );

exit;

#use Proc::ProcessTable;
# sub killgroup {
#   my $self   = shift;
#   my $signal = shift;
#   $signal    = 9 unless (defined $signal);
#   my @pids   = keys %{$self->{child}};
#   foreach my $pid (@pids) {
#     my @xp = grep {$_->ppid == $pid} @{$self->{jobs}->table};
#     foreach my $p (@xp) {
#       kill $signal => $p->pid;
#     }
#     kill $signal =>  $pid;
#     delete $self->{child}{$pid};
#   }
# }

## Need to kill descendant because of command time
## killing 'time' do not kill descendant of it !
sub killgroup {
  my $self   = shift;
  my $signal = shift;
  $signal    = 9 if (!defined $signal);

  foreach my $pparser (values %{$self->{persistent}{parser}}) {
    $self->log(3,"Post connection: kill persistent parser $pparser->{key}");
    my $h = $pparser->{pid};
##    $h->signal("TERM");
##    $h->finish;
    kill_kill $h, grace => 1;
  }
  delete $self->{persistent}{parser};

  if (my @pids = keys %{$self->{child}}) {
    ## Then look for live descendant and kill them
    my $jobs   = `/bin/ps al`;
    my @jobs = split(/\n/,$jobs);
    
    my @descrLine = split (/\s+/,shift @jobs);
    my $cpt = 0;
    my ($ppidInd,$pidInd);
    foreach my $elem (@descrLine){
      if($elem eq 'PPID'){
	$ppidInd = $cpt;
      }elsif($elem eq 'PID'){
	$pidInd = $cpt;
      }
      $cpt++;
    }
  
    if(!defined($ppidInd) || !defined($pidInd)){
      $self->log(4,"/bin/ps doesn't answer me the data I expect, there might be some zombies descendants");
      kill $signal, @pids;
      return;
    }
    
    foreach (@jobs) {
      s/^\s*//og;
      my ($pid,$ppid) = (split(/\s+/,$_))[$pidInd,$ppidInd];
      
      foreach my $child (@pids){
	if(($child eq $pid) || ($child eq $ppid)){ 
	  print "Going to kill $pid\n";
	  kill $signal, $pid;
	}
      }
    }
  }
}

sub wait_end_child {
  my $self = shift;
  my $pid  = shift;
  waitpid($pid,0);
  delete $self->{child}{$pid};
}

sub add_child {
  my $self = shift;
  my $pid  = shift;
  $self->{child}{$pid} = 1;
}


sub capture (&;@) {
  my $code = shift;
  tie *STDOUT, 'MyCapture';
  $code->();
  close(STDOUT);
  my $x = <STDOUT>;
  untie *STDOUT;
  return $x;
}


sub run_client_connection {
  my $self = shift;
  my $xpid = fork;
  if (not defined $xpid) {
    $self->log(1,"Bad fork [$!]");
    sleep(5);
  } elsif ($xpid) {
    ## the client is forked
    ## the parent process monitors the connection socket
    ## when the socket is closed on the client side, we kill the client process on the server

    ### close the main sock, we still have
    ### the client handle, this will allow us
    ### to HUP the parent at any time
    $_ = undef foreach @{ $self->{server}->{sock} };

    ### restore sigs (for the child)
    $SIG{HUP} 
      = $SIG{INT} = $SIG{TERM} = $SIG{QUIT} = 'DEFAULT';
    $SIG{PIPE} = 'IGNORE';
    $SIG{CHLD} =  sub { while (waitpid(-1, WNOHANG) > 0) {}};
    
    delete $self->{server}->{children};
    my $max = $config->get('maxsize') * 1024;	# kill client when larger than maxsize
    while () {
      sleep(10);
      my $s = process_size($xpid);
      if ( $s > $max) {
	$self->log(1,"Warning connection $xpid: size too large $s");
	kill HUP => $xpid;
	last
      } elsif ($s == -1) {
	# the client seems to be dead !
	$self->log(1,"Killing connection $xpid: seems to be dead");
	kill 9 => $xpid;
	last;
      }
    }
    waitpid($xpid,0);
    exit;
  } else {
    ## the real client process on server side
    $self->SUPER::run_client_connection;
  }
}



sub process_size {
  ## inspired from Process::MaxSize
  ## Thanks !
  my $pid = shift;
  my @s = `/bin/ps wwaxo 'pid,rss'`;
  shift @s;
  foreach my $l (@s) {
    $l =~ s/^\s+//g;
    my ($id,$s) = split(/\s+/,$l);
    return $s  if ($id == $pid);
  }
  return -1;
}


1;

package MyCapture;

use strict;
use IO::Compress::Bzip2 qw(bzip2 $Bzip2Error) ;

sub TIEHANDLE {
  my $class = shift;
  my $buffer = '';
  my $z =new IO::Compress::Bzip2 \$buffer
    or die "bzip2 failed: $Bzip2Error\n";

  bless { buffer => \$buffer,
	  bzip2 => $z
	}, $class;
}

sub WRITE {
  shift->{bzip2}->syswrite(@_);
}

sub PRINT {
  shift->{bzip2}->print(@_);
}

sub PRINTF {
  shift->{bzip2}->printf(@_);
}

sub CLOSE {
  shift->{bzip2}->close;
}

sub READLINE {
  my $b = shift->{buffer};
  return $$b;
}

1;

__END__

=head1 NAME

parserd - A perl script to run a daemon server of parsers

=head1 SYNOPSIS

parserd [<port>];

=head1 DESCRIPTION

parserd starts a daemonized server of parsers. The default port is
8999. It is configured to access several parsers built with DyALog,
but other can easily be added.

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2000-2008, INRIA.

This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

=head1 AUTHOR

Eric de la Clergerie, Eric.De_La_Clergerie@inria.fr

=head1 SEE ALSO

perl (1)
parser_client (1)
parser.cgi 

DyALog <http://alpage.inria.fr/~clerger>

=cut
