#!/usr/bin/perl -w
use warnings;
use strict;

use IO::Socket;
use WordNet::QueryData;
use WordNet::Tools;
use WordNet::SenseRelate::AllWords;
use WordNet::Similarity;
use Getopt::Long;

my $wnlocation = '/usr/local/WordNet-3.0/dict';
my $localhost = '127.0.0.1';
my $localport = 32323;
my $logfile;

my $help;
my $version;
my $client;
my $text;
my @tokens;
my $line;
my @sentences;
my $sentence;
my $temp; # for accessing %options hash

my @context;
my $contextfile="./user_data/tmp_client_input.txt";
my $contextflag=0;
my $stoplistflag=0;
my $leskconfigfile="./user_data/lesk-stoplist.conf";
my $vectorconfigfile="./user_data/vector-stoplist.conf";
my $windowSize;
my $format;
my $scheme;
my $i=0;
my $j=0;


# result variables
my $status; # to store the status of system commands to create 
			#and move directories...
my $val;	# for reading each word after disambiguation


my $ok = GetOptions ('logfile=s' => \$logfile,
			 'wnlocation=s' => \$wnlocation, 	
			 'port=i' => \$localport,
		     help => \$help,
		     version => \$version,
		     );
$ok or exit 1;

if ($help) {
    showUsage ("Long");
    exit;
}

if ($version) {
    print "allwords_server.pl - WordNet::SenseRelate::AllWords web interface server\n";
    print 'Last modified by : $Id: allwords_server.pl,v 1.39 2009/05/27 19:56:57 kvarada Exp $';
    print "\n";
    exit;
}

unless (defined $logfile) {
    print STDERR "The --logfile argument is required. This is the logfile path for allwords_server.pl log\n";
    showUsage ();
    exit 1;
}

my $success = open LFH, ">>$logfile";
if(!$success)
{
	print "\nCannot open $logfile for writing: $!";
}
else
{
	print "\nWriting log in $logfile";
}
	print LFH "WordNet Location => $wnlocation\n";

#. ....................................
#
# Creating WordNet::QueryData object.
#
#......................................

my $qd = WordNet::QueryData->new($wnlocation);
$qd or die "\nCouldn't construct WordNet::QueryData object"; 

print LFH "\nWordNet::QueryData object sucessfully created";

my %options;
my $stopword;
my $stopwordflag=0;
my $istagged=0;
my $showversion=0;
my $usr_dir;
my $tracefilename;
my $resultfilename;
my $doc_base = "../../htdocs/allwords/user_data";

# This is the name of the logfile of AllWords.pm. The file will be 
# stored in directory of the webserver
#
#........................................................................
#
# Compoundifying is done using compoundify method of WordNet::Tools.
#
#........................................................................

my $wntools = WordNet::Tools->new($qd);
$wntools or die "\nCouldn't construct WordNet::Tools object"; 
print LFH "\nWordNet::SenseRelate::Tools object sucessfully created";

my $sock = IO::Socket::INET->new(
		   LocalPort => $localport,
		   Listen => SOMAXCONN,
		   Reuse => 1,
		   Type => SOCK_STREAM
) or die "Could not bind to network port: $! \n";

print LFH "\nSocket created with following details \nLocalHost => $localhost\nLocalPort => $localport\nProto => tcp";

print LFH "\n[Server $0 accepting clients]\n";
while ($client = $sock->accept()){	
   $client->autoflush(1);	
   print LFH "\nClient $client is accepted\n";	
   %options= (wordnet => $qd, wntools => $wntools);
   @sentences=();
   $sentence="";
   $text="";
   $contextflag=0;
   $stoplistflag=0;
   while(defined ($line = <$client>))
   {	
	chomp($line);
	@tokens=split(/:/,$line);
		if( $line =~ /<version information>:/) 
		{
		    # get version information
		    my $qdver = $qd->VERSION ();
			my $wnver = $wntools->hashCode ();
		    my $simver = $WordNet::Similarity::VERSION;
			my $allwordsver = $WordNet::SenseRelate::AllWords::VERSION;
			print LFH "\nv WordNet $wnver";
			print LFH "\nv WordNet::QueryData $qdver";
		    print LFH "\nv WordNet::Similarity $simver";
			print LFH "\nv WordNet::SenseRelate::AllWords $allwordsver";

			print $client "v WordNet $wnver\n";
			print $client "v WordNet::QueryData $qdver\n";
			print $client "v WordNet::Similarity $simver\n";
			print $client "v WordNet::SenseRelate::AllWords $allwordsver\n";
			$showversion=1;
			print LFH "\nShow verrion flag => $showversion";
			close($client);	
			last;
		}
		elsif($line =~ /<start-of-context>/)
		{
			$contextflag=1;
			open (CFH, '>>', "$contextfile") or die "Cannot open $contextfile : $!";				
		} 
		elsif($line =~ /<end-of-context>/)
		{
			$contextflag=0;
			close CFH;
		}
		elsif($contextflag == 1 && $line =~ /<con>/)
	    {
			print CFH $tokens[1];	
			print CFH "\n";
	    }
		elsif ($line =~  /<Document Base>:/)
	    {
			$doc_base=$tokens[1];
			print LFH "\nDocument Base => $doc_base";
	    }
		elsif ($line =~  /<User Directory>:/)
	    {
			$usr_dir="$tokens[1]"."_server";
			print LFH "\nUser Directory => $usr_dir";
			$status=system("mkdir $usr_dir");
			$status == 0 ? print LFH "\n created dir $usr_dir.":print LFH "\nDir already present or error creating dir $usr_dir"; 
			$contextfile="$usr_dir"."/context.txt";
			$tracefilename="$usr_dir"."/trace.txt";
			$resultfilename="$usr_dir"."/results.txt";
			print LFH "\nTrace file name => $tracefilename";
	    }
		elsif ($line =~ /<Contextfile>:/)
		{
			$showversion=0;
			print LFH "\nContextfile => $contextfile";
			open (CFH, '<', "$contextfile") or die "Cannot open $contextfile: $!";				
			while(<CFH>)
			{
				$text=$text.$_;
			}
			$text =~ s/\r+//g;	
			@sentences = split(/\n+/,$text);
			close CFH;
	    }
		elsif ($line =~ /<Window size>:/)
		{
			$windowSize=$tokens[1];
			print LFH "\nWindow Size => $windowSize";
	    }elsif ($line =~ /<Format>:/)
	    {
			$format=$tokens[1];
			$istagged = ($format eq 'tagged') ? 1 : 0;
			print LFH "\nformat => $format";
			$istagged eq 1 ? print LFH "\ntagged text => YES": print LFH "\ntagged text => NO" ;
			$options{wnformat} = 1 if $format eq 'wntagged';
			$options{wnformat} ? print LFH "\nwntagged text => YES": print LFH "\nwntagged text => NO" ;

	    }elsif ($line =~ /<Scheme>:/)
	    {
			$scheme=$tokens[1];
			print LFH "\nscheme => $scheme";
	    }elsif ($line =~ /<trace>:/)
	    {
			$options{trace} = $tokens[1];
	    }elsif ($line =~ /<pairScore>:/)
	    {	
			$options{pairScore} = $tokens[1];
	    }elsif ($line =~ /<forcepos>:/)
	    {	
			$options{forcepos} = 1;
	    }elsif ($line =~ /<nocompoundify>:/)
	    {	
			$options{nocompoundify} = 1;
	    }elsif ($line =~ /<usemono>:/)
	    {	
			$options{usemono} = 1;
	    }elsif ($line =~ /<backoff>:/)
	    {	
			$options{backoff} = 1;
	    }elsif ($line =~ /<measure>:/)
	    {	
			$options{measure} = "WordNet::Similarity::"."$tokens[1]";

		}elsif ($line =~ /<contextScore>:/)
	    {	
			$options{contextScore} = $tokens[1];
	    }elsif ($line =~ /<stoplist>:/)
	    {	
			$options{stoplist} = "$usr_dir/"."$tokens[1]";
	    }elsif($line =~ /<start-of-stoplist>/)
		{
			$stoplistflag=1;
			open (SFH, '>>', "$options{stoplist}") or die "Cannot open $options{stoplist} : $!";				
		} 
		elsif($line =~ /<end-of-stoplist>/)
		{
			$stoplistflag=0;
			close SFH;
		}
	    elsif($stoplistflag == 1 && $line =~ /<stp>/ && defined $tokens[1])
	    {
			print SFH $tokens[1];	
			print SFH "\n";
	    }
		elsif($line eq "<End>\0012")
		{
			last;
		}
   }
if (!$showversion) {
$options{config} = $leskconfigfile if ($options{measure} eq "WordNet::Similarity::lesk");
$options{config} = $vectorconfigfile if ($options{measure} eq "WordNet::Similarity::vector");

print LFH "\nThe options are: \n";
foreach $temp (keys(%options)) 
{ 
	print LFH "$temp=>".$options{$temp} . "\n";
} 	


   my $obj = WordNet::SenseRelate::AllWords->new(%options);
   $obj ? print LFH "\nWordNet::SenseRelate::AllWords object successfully created":print LFH "\nCouldn't construct WordNet::SenseRelate::AllWords object";

   open RFH, '>', $resultfilename or print "Cannot open $resultfilename for writing: $!";
   foreach $sentence (@sentences) {
	   chomp($sentence);
	   @context=split(/ +/,$sentence);

	#.....................................................................
	#
	# This is the call to disambigute the sentence which client has sent
	#
	#.....................................................................

		my @res = $obj->disambiguate (window => $windowSize,
					  scheme => $scheme,
					  tagged => $istagged,
					  context => [@context]);

	#........................................................................
	#
	# AllWords.pm returns words with suffixes attached to it. 
	# If #o is attached, the word is a stopword
	# If #ND is attached the word is not defined in WordNet
	# If #NR is attached no relatedness found with the surrounding words
	# If #IT is attached, the word has invalid tag
	# Otherwise, the chosen sense along with the part of speech is sent to
	# the client
	#
	#........................................................................
	
	print RFH join (' ', @context), "\n";
	print RFH join (' ', @res), "\n";

	print LFH join (' ', @context), "\n";
	print LFH join (' ', @res), "\n";

	print $client join (' ', @context), "\015\012";
	print $client join (' ', @res), "\015\012";
	for($i=0,$j=0; $i<=$#res ; $i++,$j++)
	{
		   my $val;
  		   my $tagindex=index($res[$i],"#");
		   my $tag=substr $res[$i], $tagindex;
		   
		   if($format eq 'raw')
		   {
			if($res[$i] =~ /\_/ && $context[$j] !~ /\_/){
				my $count = ($res[$i] =~ tr/\_//);
				$val=$res[$i];
				$j=$j+$count;
			 }else{
				$val=$context[$j].$tag;
			 }
		   }
		   elsif($format eq 'tagged')
		   {
			 my ($tw,$tt)= ( $context[$j] =~ /(\S+)\/(\S+)/);
			 $val=$tw.$tag;
		   }	
  		   elsif($format eq 'wntagged')
		   {
			 my ($tw,$tt)= split /\#/, $context[$j];
			 $val=$tw.$tag;
		   }	

			if($val =~ /\#o/ )
			{
				print LFH "\n$val : stopword\n";
				print RFH "\n$val : stopword\n";
				print $client "\n$val : stopword\015\012";
			}
			elsif($val =~ /\#ND/) 
			{
				print LFH "\n$val : not in WordNet\n";
				print RFH "\n$val : not in WordNet\n";
				print $client "\n$val : not in WordNet\015\012";
			}
			elsif($val =~ /\#NR/)
			{
				print LFH "\n$val: No relatedness found with the surrounding words\n";
				print RFH "\n$val: No relatedness found with the surrounding words\n";
				print $client "\n$val: No relatedness found with the surrounding words\015\012";

			}
			elsif($val =~ /\#IT/)
			{
				print LFH "\n$val: Invalid Tag\n";
				print RFH "\n$val: Invalid Tag\n";
				print $client "\n$val: Invalid Tag\015\012";

			}
			elsif($val =~ /\#NT/)
			{
				print LFH "\n$val: No Tag\n";
				print RFH "\n$val: No Tag\n";
				print $client "\n$val: No Tag\015\012";
			}

			elsif($val =~ /\#CL/)
			{
				print LFH "\n$val: Closed Class Word\n";
				print RFH "\n$val: Closed Class Word\n";
				print $client "\n$val: Closed Class Word\015\012";
			}
			elsif($val =~ /\#MW/)
			{
				print LFH "\n$val: Missing Word\n";
				print RFH "\n$val: Missing Word\n";
				print $client "\n$val: Missing Word\015\012";
			}
			else
			{
				my ($gloss) = $qd->querySense ($res[$i], "glos");
				print LFH "\n$val : $gloss\n";
				print RFH "\n$val : $gloss\n";
				print $client "\n$val : $gloss\015\012";
			}
		}
		if ($options{trace}) {
				open TFH, '>', $tracefilename or print "Cannot open $tracefilename for writing: $!";
				print TFH join (' ', @res), "\n";
				print $client "<start-of-trace>\015\012";
				print $client join (' ', @res), "\015\012";
				my $tstr = $obj->getTrace();
				print TFH "$tstr \n";
				print $client "$tstr \015\012";
				print $client "<end-of-trace>\015\012";
				print LFH "$tstr \n";
				close TFH;
		}

   	}	
		close RFH;
		close($client);	
	}
}


sub showUsage
{
    my $long = shift;
    print "Usage: allwords_server.pl --logfile FILE \n";
    print "              [--wnlocation WordNet path] [--port PORT] \n";
    print "              | {--help | --version}\n";

    if ($long) {
	print "Options:\n";
	print "\t--logfile FILE             logfile path for allwords.pl log\n";
	print "\t--wnlocation WordNet path  WordNet path\n";
	print "\t--port PORTNUMBER          Specify the port PORTNUMBER for the server to listen on \n";
	print "\t--help                     show this help message\n";
	print "\t--version                  show version information\n";
    }
}

=head1 NAME

allwords_server.pl - [Web] The server for allwords.cgi and version.cgi

=head1 DESCRIPTION

This script implements the backend of the web interface for 
WordNet::SenseRelate::AllWords

This script listens to a port waiting for a request form allwords.cgi
or version.cgi. If disambiguation request is made by allwords.cgi, the
server first gets input options from allwords.cgi. Then it creates 
AllWords object. Using AllWords object and input options disambiguate 
method is called. The result returned by disambiguate is checked and 
appropriate message is sent back to allwords.cgi client. 

Client-Server Communication
The server loads all the required modules and listens to the port 32323. 
The client sends informtation with a preamble to know the server what kind
of input data it is going to get. For example, the client reads the text 
to be disambiguated from the user and sends the context file to the server 
as below

<start-of-context>
context-line 1
context-line 2
context-line 3
.
.
.
<end-of-context>

The tags <start-of-context> and <end-of-context> are not going to conflict 
with the text to be disambiguated as we clean the text before disambiguation
and hence the characters '<' and '>' will be removed from the text.

If the version information is requested, appropriate version information
of the respective components is fetched and is passed to version.cgi client.

If the client requests for trace level, then trace output is fetched calling
getTrace() method of AllWords.pm.

Along with sending all information to the client, the server also stores all 
the input data and result files on the server machine in a unique directory 
for each client. 

=head1 AUTHORS

 Varada Kolhatkar, University of Minnesota, Duluth
 kolha002 at d.umn.edu

 Ted Pedersen, University of Minnesota, Duluth
 tpederse at d.umn.edu

This document last modified by : 
$Id: allwords_server.pl,v 1.39 2009/05/27 19:56:57 kvarada Exp $ 

=head1 SEE ALSO

allwords.cgi, version.cgi, README.web.pod

=head1 COPYRIGHT AND LICENSE

Copyright (c) 2008, Varada Kolhatkar, Ted Pedersen, Jason Michelizzi

Permission is granted to copy, distribute and/or modify this document
under the terms of the GNU Free Documentation License, Version 1.2
or any later version published by the Free Software Foundation;
with no Invariant Sections, no Front-Cover Texts, and no Back-Cover
Texts.

Note: a copy of the GNU Free Documentation License is available on
the web at L<http://www.gnu.org/copyleft/fdl.html> and is included in
this distribution as FDL.txt.

=cut