#!/usr/bin/perl -w

#####################################
# An HTTP Socket Server for LaTeXML #
#####################################

# Created and provided by Deyan Ginev
#   released in the Public Domain

eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
  if 0;    # not running under some shell

# General packages
use strict;
use warnings;
use FindBin;

our $VERSION = '1.4.2';

my $RealBin_safe;
BEGIN {
  $FindBin::RealBin =~ /^([^\0]+)\z/;    # Valid Unix path TODO: Windows, revisit regexp
  $RealBin_safe = $1;
  die 'Fatal:IO:tainted RealBin was tainted! Failing...' unless ($RealBin_safe && (-e $RealBin_safe . '/latexmls'));
}
# TODO: We probably want file cat for things like /../lib instead of spelling out a Unix path
use lib $RealBin_safe. "/../lib";
# Server-specific
use HTTP::Daemon;
use POSIX qw( setsid );
use URI::Escape qw(uri_unescape);
use Encode;
use JSON::XS qw(encode_json decode_json);
# LaTeXML packages
use LaTeXML;
use LaTeXML::Common::Config;
use LaTeXML::Util::Pathname;
#**********************************************************************

# Bookkeeping
my $ARGV_SAVED = [@ARGV],    #Save, so that we reinvoke when flushed
# Read default options
my $g_opts = LaTeXML::Common::Config->new('input_limit' => 100, expire => 600);
$g_opts->read(\@ARGV);

# Set up the server
# Global constants can be read into fixed variables:
my $address = $g_opts->get('address') || 'localhost';
my $port = $g_opts->get('port') || '3334';
my $server = setup_server($address, $port);
my $server_port = $server->sockport();
my $input_limit = $g_opts->get('input_limit');
my $input_counter = 0;
print STDERR "LaTeXML server listening on $server_port\n";
#**********************************************************************
# Daemonize,
#   if we expect to auto-reboot and want standalone robustness
daemonize($input_limit);
#**********************************************************************
# Set up the processing.

# Install signal-handlers
local $SIG{'ALRM'} = 'timeout_handler';
local $SIG{'TERM'} = 'term_handler';
local $SIG{'INT'}  = 'term_handler';

alarm($g_opts->get('expire'));    #Timeout if inactive for too long...
#**********************************************************************
# Process files on demand:

while (my $client = $server->accept()) {
  while (my $request = $client->get_request) {
    if (!$request->content) {
      $client->send_response(HTTP::Response->new(400, 'Fatal:http:request Empty request?!'));
      $client->close;
      next;
    } elsif ($request->method ne 'POST') {
      $client->send_response(HTTP::Response->new(405, 'Fatal:http:request Only POST allowed'));
      $client->close;
      next;
    }
    # Bookkeeping...
    alarm(0);    # We are processing, ignore timeout here

    # Thaw request into options:
    my $opts = LaTeXML::Common::Config->new();
    my $config_build_return = eval {
      # Split, and make sure keys with no values get an empty string stub
      my $keyvals = [
        map { $$_[0], $$_[1] ? decode('utf-8',uri_unescape($$_[1])) : '' }
        map { [split(/=/,$_,2)] } # Split key=values
        map { split(/\&/,$_)} # Split URI options
        $request->content ];

      $opts->read_keyvals($keyvals,silent=>1); };
    if (!$config_build_return || $@) {
      # Wrong options, report error.
      $@ = "See 'latexmlc --help' for the full options specification" unless $@;
      $client->send_response(HTTP::Response->new(400, "Fatal:http:request You have used illegal or ill-formed options in your request.\n Details:$@"));
      $client->close;
      next;
    }
    # Local if peerhost is localhost:
    $opts->set('local', $client->peerhost eq '127.0.0.1');
    $opts->set('source', $opts->get('tex')) if (!($opts->get('source')) && $opts->get('tex'));
    $opts->delete('tex');
    # Prepare TeX source
    my $source = $opts->get('source');
    $opts->delete('source');

    my $base = $opts->get('base');
    if ($base && !pathname_is_url($base)) {
      my $canonical_base = pathname_canonical($base);
      if ($canonical_base ne pathname_cwd()) {
        chdir $canonical_base
          or croak("Fatal:server:chdir Can't chdir to $canonical_base: $!");
        $LaTeXML::Util::Pathname::cached_cwd = $canonical_base;
        # Prevent spurious warning; we may not use the cached_cwd, but we really want to have it around
        $LaTeXML::Util::Pathname::cached_cwd if 0;
      }
    }

    # We now have a LaTeXML options object - $opts.
    # Make sure the g_opts are included as defaults
    foreach ($g_opts->keys) {
      $opts->set($_, $g_opts->get($_)) unless defined $$opts{opts}{$_};
    }
    # Print to regular STDERR if debugging is requested
    *STDERR = *STDERR_SAVED if ($LaTeXML::DEBUG);

    # Boot/find a daemon:
    my $converter = LaTeXML->get_converter($opts);
    # Initialize if needed
    $converter->initialize_session unless $$converter{ready};

    #Send a request:
    $input_counter++ if $input_limit;
    my $response = $converter->convert($source);

    # Send back the serialized result, log and status:
    # ensure these are well-typed for consumers that will be doing strict JSON
    $$response{status_code} = int($$response{status_code}); # ensure integer
    $$response{result} = "" if ! defined $$response{result}; # avoid null
    $client->send_response(HTTP::Response->new(200, 'OK', undef, encode_json($response)));

    #Flush daemon, every $input_limit files:
    if ($input_limit &&
      ($input_counter >= $input_limit)) {
      $client->close;
      exec("$RealBin_safe/latexmls", @$ARGV_SAVED)
        or croak("Fatal:server:restart Daemon autoflush Failed!");
    }

    # Terminate immediately if requested
    timeout_handler() if ($opts->get('expire') == -1);

    # Continue looking for a new request, timeout if inactive for too long...
    alarm($opts->get('expire'));
  }
  $client->close;
  undef($client);
}

sub timeout_handler {
  print STDERR "Daemon timed out after inactivity.\n";
  print STDERR "Self destructing...\n";
  term_handler();
  return;
}

sub term_handler {
  print STDERR "Exiting...\n";
  exit;
}

# TODO: Think about if the croaks can't be done better, so that the client
#       receives the error/log message? or is that overkill?
sub daemonize {
  my $restartable = shift;
  #This causes more problems than it solves (e.g. sTeX setup)
  #chdir '/'                 or croak "Can't chdir to /: $!";
  open STDIN, '<', '/dev/null'
    or croak("Fatal:server:daemonize  Can't read /dev/null: $!");
  open STDOUT, '>', '/dev/null'
    or croak("Fatal:server:daemonize  Can't write to /dev/null: $!");
  if ($restartable && $restartable > 0) {
    defined(my $pid = fork)
      or croak("Fatal:server:daemonize  Can't fork: $!");
    exit if $pid;
    setsid
      or croak("Fatal:server:daemonize  Can't start a new session: $!"); }
  *STDERR_SAVED = *STDERR;
  *STDERR       = *STDOUT
    or croak("Fatal:server:daemonize Can't dup stdout: $!");
  return; }

sub setup_server {
  my ($localaddress, $localport) = @_;
  my $http_server = HTTP::Daemon->new(
    LocalAddr => $localaddress,
    LocalPort => $localport,
    Listen    => SOMAXCONN,
    Proto     => 'tcp',
    Reuse     => 1,
  );
  croak("Fatal:server:init can't setup server on $localaddress:$localport") unless $http_server;
  return $http_server;
}

sub croak {
  print STDERR $_[0], "\n";
  exit 1;
}

#**********************************************************************
__END__

=pod

=head1 NAME

C<latexmls> - An HTTP server for daemonized LaTeXML processing.

=head1 SYNOPSYS

See the OPTIONS section in L<LaTeXML::Common::Config> for usage information.
Also consult latexmls --help

=head1 SEE ALSO

L<latexmlc>, L<LaTeXML::Common::Config>

=head1 AUTHOR

Deyan Ginev <deyan.ginev@nist.gov>

=head1 COPYRIGHT

Public domain software, produced as part of work done by the
United States Government & not subject to copyright in the US.

=cut
#**********************************************************************