The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.

NAME

Ufal::Parsito - bindings to Parsito library http://ufal.mff.cuni.cz/parsito.

SYNOPSIS

  use Ufal::Parsito;

  my $parser_file = '...';
  my $parser = Ufal::Parsito::Parser::load($parser_file) or die "Cannot load parser from file '$parser_file'\n";

  my $conllu_input = Ufal::Parsito::TreeInputFormat::newInputFormat("conllu");
  my $conllu_output = Ufal::Parsito::TreeOutputFormat::newOutputFormat("conllu");
  my $tree = Ufal::Parsito::Tree->new();

  $conllu_input->setText(join('', <>));
  while ($conllu_input->nextTree($tree)) {
    $parser->parse($tree);

    my $output = $conllu_output->writeTree($tree, $conllu_input);
    print $output;
  }
  length($conllu_input->lastError()) and die "Cannot read input CoNLL-U: " . $conllu_input->lastError();

REQUIREMENTS

To compile the module, C++11 compiler is needed, either g++ 4.7 or newer, alternatively clang 3.2 or newer.

DESCRIPTION

Ufal::Parsito is a Perl binding to Parsito library http://ufal.mff.cuni.cz/parsito.

The bindings is a straightforward conversion of the C++ bindings API. Vectors do not have native Perl interface, see Ufal::Parsito::Children source for reference. Static methods and enumerations are available only through the module, not through object instance.

Wrapped C++ API

The C++ API being wrapped follows. For a API reference of the original C++ API, see L\<http://ufal.mff.cuni.cz/parsito/api-reference\>.

  Helper Structures
  -----------------
  
    typedef vector<int> Children;
  
    class Node {
     public:
      int id;          // 0 is root, >0 is sentence node, <0 is undefined
      string form;    // form
      string lemma;   // lemma
      string upostag; // universal part-of-speech tag
      string xpostag; // language-specific part-of-speech tag
      string feats;   // list of morphological features
      int head;       // head, 0 is root, <0 is without parent
      string deprel;  // dependency relation to the head
      string deps;    // secondary dependencies
      string misc;    // miscellaneous information
  
      Children children;
  
      node(int id = -1, string form = string());
    };
    typedef std::vector<node> Nodes;
  
  
  Main Classes
  ------------
  
    class Tree {
     public:
      Tree();
  
      Nodes nodes;
  
      bool empty();
      void clear();
      node& addNode(string form);
      void setHead(int id, int head, string deprel);
      void unlinkAllNodes();
  
      static const std::string root_form;
    }
  
    class TreeInputFormat {
     public:
      virtual void setText(string text);
      virtual bool nextTree(tree& t) = 0;
      string lastError() const;
  
      // Static factory methods
      static TreeInputFormat* newInputFormat(string name);
      static TreeInputFormat* newConlluInputFormat();
    };
  
    class TreeOutputFormat {
     public:
  
      virtual string writeTree(const tree& t, const tree_input_format* additional_info = nullptr);
  
      // Static factory methods
      static TreeOutputFormat* newOutputFormat(string name);
      static TreeOutputFormat* newConlluOutputFormat();
    };
  
    class Parser {
     public:
      virtual void parse(tree& t, unsigned beam_size = 0) const;
  
      enum { NO_CACHE = 0, FULL_CACHE = 2147483647};
      static Parser* load(string file, unsigned cache = 1000);
    };
  
    class Version {
     public:
      unsigned major;
      unsigned minor;
      unsigned patch;
      string prerelease;
  
      static Version current();
    };

Examples

run_parsito

Simple example performing parsing and CoNLL-U reading and writing.

  use strict;
  use open qw(:std :utf8);
  
  use Ufal::Parsito;
  
  
  @ARGV >= 1 or die "Usage: $0 parser_file\n";
  
  print STDERR "Loading parser: ";
  my $parser = Ufal::Parsito::Parser::load($ARGV[0]);
  $parser or die "Cannot load parser from file '$ARGV[0]'\n";
  print STDERR "done\n";
  shift @ARGV;
  
  my $conllu_input = Ufal::Parsito::TreeInputFormat::newInputFormat("conllu");
  my $conllu_output = Ufal::Parsito::TreeOutputFormat::newOutputFormat("conllu");
  my $tree = Ufal::Parsito::Tree->new();
  
  for (my $not_eof = 1; $not_eof; ) {
    my $text = '';
  
    # Read block
    while (1) {
      my $line = <>;
      last unless ($not_eof = defined $line);
      $text .= $line;
      chomp($line);
      last unless length $line;
    }
  
    # Tag
    $conllu_input->setText($text);
    while ($conllu_input->nextTree($tree)) {
      $parser->parse($tree);
  
      my $output = $conllu_output->writeTree($tree, $conllu_input);
      print $output;
    }
    length($conllu_input->lastError()) and die "Cannot read input CoNLL-U: " . $conllu_input->lastError();
  }

AUTHORS

Milan Straka <straka@ufal.mff.cuni.cz>

COPYRIGHT AND LICENCE

Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of Mathematics and Physics, Charles University in Prague, Czech Republic.

This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.