BEGIN {
our
(
$VERSION
,
@ISA
,
@EXPORT
,
@EXPORT_OK
,
%EXPORT_TAGS
);
$VERSION
= 0.01;
@ISA
=
qw(Exporter)
;
@EXPORT
=
qw(
&pos_prop
&pos_of
)
;
%EXPORT_TAGS
= ();
@EXPORT_OK
=
qw()
;
}
our
@EXPORT_OK
;
our
$data
= {};
our
%config
;
our
@functions
=
qw{
sql_get_part_of_speech_property
pos_prop
short_save_in_memory
pos_of
find_word_type
}
;
sub
sql_get_part_of_speech_property {
my
(
$word
) =
@_
;
return
{
type
=>
''
,
genus
=>
''
}
if
!
$data
->{modes}{use_sql};
eval
q{
use DBI;
my $db_string = "DBI:mysql:database=" . $config{'mysql'}
{
'database'
};
if
(
$config
{
'mysql'
}{
'host'
} ) {
$db_string
.=
';host='
.
$config
{
'mysql'
}{
'host'
};
}
my
$user
=
$config
{
'mysql'
}{
'user'
};
my
$password
=
$config
{
'mysql'
}{
'password'
};
my
$dbh
= DBI->
connect
(
$db_string
,
$user
,
$password
)
or (
say
(
"not connected to "
,
$config
{
'mysql'
}{
'database'
},
", user "
,
$config
{
'mysql'
}{
'user'
},
", password "
,
$config
{
'mysql'
}{
'user'
},
": "
)
and
return
{
type
=>
''
,
genus
=>
''
}
);
my
$sql
=
qq{SELECT type, genus FROM part_of_speech WHERE word = '$word' }
;
my
$sth
=
$dbh
->prepare(
$sql
);
$sth
->execute();
while
(
$sth
&& (
my
$data
=
$sth
->fetchrow_arrayref ) ) {
my
$return_hash
= {
type
=>
$data
->[0],
genus
=>
$data
->[1] };
print
Dumper
$return_hash
;
$sth
->finish();
$dbh
->disconnect();
return
$return_hash
;
}
$sth
->finish();
$dbh
->disconnect();
}
if
!$::batch;
return
{
type
=>
''
,
genus
=>
''
};
}
sub
pos_prop {
my
(
$CLIENT_ref
,
$word
,
$count
,
$not_guess
,
$not_ask
,
$write_it_into_database
)
=
@_
;
if
( !
$data
->{abilities}->{
'tagger'
} ) {
my
$sock
= ${ connect_to(
data
=>
*data
,
name
=>
'tagger'
) };
print
{
$sock
}
'GET<;;>genus<;;>'
,
join
(
'<;;>'
,
@_
) .
"\n"
;
my
$result
= <
$sock
>;
chomp
$result
;
close
$sock
;
eval
{
local
$SIG
{
'__DIE__'
};
$result
= thaw( r_unescape(
$result
) );
};
if
($@) {
say
'error in that(): '
, $@;
}
return
$result
;
}
AI::FreeHAL::Engine::try_use_lowlatency_mode();
my
$CLIENT
=
undef
;
eval
'$CLIENT = ${$CLIENT_ref};'
;
$count
||= 0;
$write_it_into_database
||= 0;
$word
=~ s/^ein(.?)[_\s]//igm;
$word
=~ s/[-]//gm;
$word
=~ s/^(und|or|and|oder)\s+//gm;
$word
=~ s/_/ /igm;
$word
=~ s/^\s+//igm;
$word
=~ s/\s+$//igm;
$word
=
ucfirst
lc
$word
;
if
(
$has_no_genus
{
$word
} ) {
part_of_speech_get_memory()->{
$word
}->{
'genus'
} =
q{q}
;
}
part_of_speech_get_memory()->{
$word
} = part_of_speech_get_entry(
$word
)
if
!$::batch && !part_of_speech_get_memory()->{
$word
};
my
$last_word
= (
split
/\s|[_]/,
$word
)[-1];
if
(
$last_word
&&
$word
ne
$last_word
)
{
my
$props_for_last_word
=
pos_prop(
$CLIENT_ref
,
$last_word
, 0,
$not_guess
,
$not_ask
,
$write_it_into_database
);
if
(
$props_for_last_word
->{
'genus'
} ) {
return
$props_for_last_word
;
}
}
my
$type_str
=
q{}
;
if
( !
$not_guess
&& pos_of(
$CLIENT_ref
,
$word
, 0, 1, 0,
undef
, 0, 0 ) ==
$data
->{const}{NOUN}
&& !
$has_no_genus
{
lc
$word
} )
{
if
(
(
!
defined
part_of_speech_get_memory()->{
$word
}->{
'genus'
}
|| (
part_of_speech_get_memory()->{
$word
}->{
'genus'
}
? part_of_speech_get_memory()->{
$word
}->{
'genus'
}
:
q{-}
) eq
'-'
|| (
length
( part_of_speech_get_memory()->{
$word
}->{
'genus'
} ) ==
0
&&
defined
part_of_speech_get_memory()->{
$word
}->{
'genus'
} )
|| ( LANGUAGE() eq
'de'
&& part_of_speech_get_memory()->{
$word
}->{
'genus'
} eq
'q'
)
)
&& ( !( ( sql_get_part_of_speech_property
$word
)->{genus} )
&& !( ( sql_get_part_of_speech_property
lc
$word
)->{genus} ) )
)
{
if
( !
$has_no_genus
{
lc
$word
} ) {
my
$first_word
= (
split
/\s|[_]/,
$word
)[0];
$first_word
=~ s/_$//igm;
$last_word
=~ s/_$//igm;
my
$wt
=
pos_of(
$CLIENT_ref
,
$word
, 0, 1, 0,
undef
, 0, 0 );
my
$first_wt
=
$word
eq
$last_word
?
$wt
: pos_of(
$CLIENT_ref
,
$first_word
, 0, 1, 0,
undef
, 0,
0 );
my
$last_wt
=
$word
eq
$last_word
?
$wt
: pos_of(
$CLIENT_ref
,
$last_word
, 0, 1, 0,
undef
, 0,
0 );
if
(
$count
> 5 ) {
$type_str
=
'perhaps_s'
;
}
elsif
( (
$first_wt
==
$data
->{const}{ADJ} ||
$first_wt
==
$ART
)
&&
$first_word
=~ /^[a-zA-Z]+nen$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ernet$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+urm$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+tter$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ek$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+urti$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( (
$first_wt
==
$data
->{const}{ADJ} ||
$first_wt
==
$ART
)
&&
$first_word
=~ /^[a-zA-Z]+nem$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( (
$first_wt
==
$data
->{const}{ADJ} ||
$first_wt
==
$ART
)
&&
$first_word
=~ /^[a-zA-Z]+ner$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
(
$last_word
=~ /^[a-zA-Z]+re$/ ) {
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+adt$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
(
$first_word
=~ /^[a-zA-Z]+ige$/ ) {
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+in$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+innen$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+cken$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+iz$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ahn$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+aus$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+rom$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+amm$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+uhr$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ann$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+aph$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ext$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+od$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ohn$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+plan$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ahlen$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ua$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+uehr$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ot$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+erb$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ity$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ey$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+weg$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ind$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]*?hut$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]*?mut$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ut$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+na$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+aetz$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ruck$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ssi$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ail$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+til$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+opf$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*lauch
$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*auch
$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*och
$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ip$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ell$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*fehl
$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*ie
$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]*?[bt]uch$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]*?ucht$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+mpf$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ift$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ausch$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]*(st|r|erb)and$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]*(l)and$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*and
$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*ient
$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+urst$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+unst$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+eil$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+wort$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+
sort
$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+to$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*zug
$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*ad
$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*al
$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*koll
$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*att
$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*iki
$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*fest
$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*schuh
$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+all$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+lust$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ust$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+uehl$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+rief$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+rst$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+epp$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+kt$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+nsa$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*zeit
$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*bahn
$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+beit$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ilz$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+eis$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ei[gk]$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+(ta|um)$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+off$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ohl$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+[ae]rz$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+a[xgst]$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ef$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+
do
$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+go$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+iff$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+eld$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ied$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+rist$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ist$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+af$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ock$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+uck$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]*?ha[n]t$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+raut$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]*?luft$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+huft$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+aut$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+urz$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+esen$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+acken$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+uhl$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+uv$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+orf$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+olk$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+raft$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+id$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ack$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+weck$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ueck$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+atz$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ank$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+and$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+itz$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ett$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+la$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ma$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+und$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ereich$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+rich$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+eich$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ard$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*ast
$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+uff$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+iet$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*zeug
$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*ing
$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*isch
$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]*[rw]icht$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*ap
$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]*[d]icht$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*icht
$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*acht
$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*leid
$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ol$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+nch$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ahl$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+aeck$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+net$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+et$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+eo$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ex$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+erl$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+nia$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+olg$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]
*utsch
$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+
log
$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ang$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+amt$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+yp$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ko$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+alg$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+[nkr]itt$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+tab$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]*[rtmg]at$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+aupt$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+a[l]$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+olt$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+old$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ald$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+el$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ien$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+auf$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+rma$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ra$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ert$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ngen$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+erk$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+gen$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+[rhs]o$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ben$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+ten$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+olf$/ )
{
$type_str
=
'perhaps_m'
;
}
elsif
(LANGUAGE() eq
'de'
&& (
$first_wt
==
$data
->{const}{ADJ} ||
$first_wt
==
$ART
)
&&
$first_word
=~ /^[a-zA-Z]+ne$/ )
{
$type_str
=
'perhaps_f'
;
}
elsif
(LANGUAGE() eq
'de'
&&
$first_wt
==
$ART
&&
lc
$first_word
eq
'der'
)
{
$type_str
=
'perhaps_m'
;
}
elsif
(LANGUAGE() eq
'de'
&&
$first_wt
==
$ART
&&
lc
$first_word
eq
'die'
)
{
$type_str
=
'perhaps_f'
;
}
elsif
(LANGUAGE() eq
'de'
&&
$first_wt
==
$ART
&&
lc
$first_word
eq
'das'
)
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /.+?(ismus|ling|or|ant)$/i )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /.+?e$/i )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /.+?er$/i )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /.+?ers$/i )
{
$type_str
=
'perhaps_m'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /.+?ens$/i )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /.+?(on|um)$/i )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /.+?(on|um)s$/i )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /.+?ung$/i )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /.+?au$/i )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /.+?nis$/i )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
length
(
$last_word
) == 1 )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~
/.+?(ung|heit|keit|schaft|ei|enz|ie|ik|ion|taet|ur)$/i )
{
$type_str
=
'perhaps_f'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+st$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+e$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /^[a-zA-Z]+en$/ )
{
$type_str
=
'perhaps_s'
;
}
elsif
( LANGUAGE() eq
'de'
&&
$last_word
=~ /.+?(chen|lein|um|ment)$/i )
{
$type_str
=
'perhaps_s'
;
}
elsif
( $::batch || !
$AI::SemanticNetwork::initialized
) {
$type_str
=
'perhaps_s'
;
}
elsif
(
$last_wt
==
$data
->{const}{NOUN} ) {
my
$downloaded_genus
= download_genus(
$last_word
);
if
(
$downloaded_genus
) {
$type_str
=
$downloaded_genus
;
}
elsif
( LANGUAGE() eq
'en'
&& !
$AI::SemanticNetwork::initialized
)
{
$type_str
=
'perhaps_s'
;
}
elsif
(
$last_word
eq
lc
$last_word
) {
$type_str
=
'perhaps_s'
;
}
elsif
( !
$AI::SemanticNetwork::initialized
&&
$in_cgi_mode
)
{
$type_str
=
'perhaps_s'
;
}
elsif
(
$not_ask
) {
$type_str
=
'q'
;
}
else
{
$write_it_into_database
||= 1;
my
$line
= impl_get_genus(
$CLIENT_ref
,
$last_word
);
$type_str
=
$line
== 1 ?
'm'
:
$line
== 2 ?
'f'
:
$line
== 3 ?
's'
:
''
;
}
}
else
{
$type_str
=
'perhaps_s'
;
}
}
if
( !
$type_str
&& !
$has_no_genus
{
$word
} ) {
print
"Redoing: pos_prop( $CLIENT_ref, $word )\n"
;
return
pos_prop(
$CLIENT_ref
,
$word
,
$count
+ 1 );
}
if
(
$write_it_into_database
&&
$write_it_into_database
!= 5 ) {
if
(
$use_sql
) {
eval
q{
use DBI;
my $db_string = "DBI:mysql:database=" . $config{'mysql'}
{
'database'
};
if
(
$config
{
'mysql'
}{
'host'
} ) {
$db_string
.=
';host='
.
$config
{
'mysql'
}{
'host'
};
}
my
$user
=
$config
{
'mysql'
}{
'user'
};
my
$password
=
$config
{
'mysql'
}{
'password'
};
my
$dbh
= DBI->
connect
(
$db_string
,
$user
,
$password
)
or
say
(
"not connected to "
,
$config
{
'mysql'
}{
'database'
},
", user "
,
$config
{
'mysql'
}{
'user'
},
", password "
,
$config
{
'mysql'
}{
'user'
},
": "
);
my
$sql
=
qq{SELECT word FROM part_of_speech WHERE word = '$word'}
;
my
$sth
=
$dbh
->prepare(
$sql
);
$sth
->execute();
if
( !(
$sth
&& (
my
$data
=
$sth
->fetchrow_arrayref ) ) ) {
$dbh
->
do
(
qq{INSERT INTO part_of_speech( word, type, genus ) VALUES ( '$word', '', '' )}
);
}
$sql
=
qq{UPDATE part_of_speech SET genus = '$type_str' WHERE word = '$word'}
;
my
$sth
=
$dbh
->prepare(
$sql
);
$sth
->execute();
$dbh
->disconnect();
}
if
!$::batch;
}
else
{
part_of_speech_get_memory()->{
$word
}->{
'genus'
} =
$type_str
;
delete
part_of_speech_get_memory()->{
''
};
delete
part_of_speech_get_memory()->{
' '
};
}
}
}
}
my
$return_value
= part_of_speech_get_memory()->{
$word
};
return
{
type
=> (
( part_of_speech_get_memory()->{
$word
} || {} )->{type}
|| ( ( sql_get_part_of_speech_property
$word
) || {} )->{type}
|| ( ( sql_get_part_of_speech_property
lc
$word
) || {} )->{type}
),
genus
=> (
$type_str
|| ( part_of_speech_get_memory()->{
$word
} || {} )->{genus}
|| ( ( sql_get_part_of_speech_property
$word
) || {} )->{genus}
|| ( ( sql_get_part_of_speech_property
lc
$word
) || {} )->{genus}
),
};
}
sub
short_save_in_memory {
my
(
$do_save
,
$word
,
$type
,
$always
) =
@_
;
say
"$config{'features'}{'tagger'}: "
,
$config
{
'features'
}{
'tagger'
};
if
( part_of_speech_get_memory()->{
$word
}->{
'type'
}
&& part_of_speech_get_memory()->{
$word
}->{
'type'
} ne
'q'
&& (!
$always
||!
$config
{
'features'
}{
'tagger'
}) )
{
return
$type
;
}
if
( !
$do_save
) {
return
$type
;
}
part_of_speech_get_memory()->{
$word
}->{
'new'
} = 1;
print
"short_save_in_memory\n"
;
print
join
(
', '
,
@_
),
"\n"
;
my
$val
=
$data
->{lang}{string_to_constant}{ part_of_speech_get_memory()->{
$word
}->{
'type'
} =
$data
->{lang}{constant_to_string}{
$type
} };
print
"val: "
,
$val
,
"\n"
;
return
$val
;
}
sub
pos_of {
my
(
$CLIENT_ref
,
$word
,
$at_beginning_of_sentence
,
$noun_automatism
,
$do_not_ask_user
,
$sentence
,
$do_not_guess
,
$do_save
,
$no_autoguess
) =
@_
;
my
$CLIENT
=
undef
;
eval
'$CLIENT = ${$CLIENT_ref};'
;
$do_save
= 1
if
!
defined
$do_save
;
$do_not_guess
||= 0;
return
$data
->{const}{NO_POS}
if
!
defined
$word
;
return
$data
->{const}{NO_POS}
if
!
$word
;
$word
=~
tr
/-/ /;
if
( !
$sentence
) {
$sentence
=
$word
;
}
$noun_automatism
= 1
if
!
defined
$noun_automatism
;
if
(
$word
ne
lc
$word
) {
$word
=
ucfirst
lc
$word
;
}
my
$user_defined_word_type
=
undef
;
if
(
$_
[1] =~ /[{][{][{]/ ) {
(
$_
[1],
my
$type
) =
split
/[{][{][{]/,
$_
[1];
$type
= (
split
/[}]/,
$type
)[0];
my
$type_value
=
undef
;
my
$exec_str
=
'$type_value = $'
.
uc
$type
.
';'
;
say
$exec_str
;
eval
$exec_str
;
if
(
$type_value
&& !$@ ) {
say
'ok. '
.
$_
[1] .
"->"
.
$type_value
.
"->"
.
$data
->{lang}{constant_to_string}{
$type_value
};
part_of_speech_get_memory()->{
$_
[1] }->{
'type'
} =
$data
->{lang}{constant_to_string}{
$type_value
};
say
'ok...'
;
if
(
$at_beginning_of_sentence
) {
say
'added to cache_noun_or_not'
;
$cache_noun_or_not
{
$_
[1] } =
$type_value
;
$cache_noun_or_not
{
lc
$_
[1] } =
$type_value
;
}
say
'ok... ...'
;
return
pos_of(
$CLIENT_ref
,
$_
[1],
$at_beginning_of_sentence
,
$noun_automatism
,
$do_not_ask_user
,
$sentence
,
$do_not_guess
);
}
}
if
( !
$data
->{abilities}->{
'tagger'
} ) {
my
$sock
= ${ connect_to(
data
=>
*data
,
name
=>
'tagger'
) };
print
{
$sock
}
'GET<;;>type<;;>'
.
join
(
'<;;>'
,
@_
) .
"\n"
;
my
$result
= <
$sock
>;
while
(
$result
=~ /GET_WORD_TYPE:/ ) {
print
$CLIENT
$result
;
while
(
defined
(
$result
= <
$CLIENT
> ) ) {
if
(
$result
=~ /HERE_IS/ ) {
print
$sock
$result
;
last
;
}
}
$result
= <
$sock
>;
}
chomp
$result
;
close
$sock
;
$result
||= r_escape( nfreeze( \
undef
) );
eval
{
local
$SIG
{
'__DIE__'
};
$result
= ${ thaw( r_unescape(
$result
) ) };
};
if
($@) {
say
'error in thaw(): '
, $@;
}
return
$result
;
}
AI::FreeHAL::Engine::try_use_lowlatency_mode();
$word
=~ s/^ein(.?)[_\s]//igm;
$word
=~ s/^(und|or|and|oder)\s+//gm;
$word
=~ s/[{]//gm;
$word
=~ s/[}]//gm;
$word
=~ s{/}{}gm;
$word
=~ s/|//gm;
$word
=~ s/[-]//gm;
chomp
$word
;
if
(
$word
ne
lc
$word
) {
$word
=
ucfirst
lc
$word
;
}
return
$data
->{const}{ADJ}
if
$word
=~ /^[-\s+]?\d+$/;
return
$data
->{const}{ADJ}
if
$word
=~ /^[+-]?(\d+\.\d+|\d+\.|\.\d+|\d+)([eE][+-]?\d+)?$/;
return
$data
->{const}{ADJ}
if
$word
=~ /^\d+$/;
return
$data
->{const}{ADJ}
if
$word
=~ /^\d+[_]\d+[_]\d+$/;
return
$data
->{const}{ADJ}
if
$word
=~ /_komma_/;
return
$data
->{const}{INTER}
if
$word
eq
'+'
||
$word
eq
'-'
;
return
$data
->{const}{NO_POS}
if
not
$word
;
return
$data
->{const}{NO_POS}
if
$word
=~ /^[?,;]/;
return
$data
->{const}{ADJ}
if
$word
=~ /[(]/;
return
$data
->{const}{NOUN}
if
$word
=~ /http[:]/i;
return
$data
->{const}{NOUN}
if
$word
=~ /['"]/;
return
$data
->{const}{ADJ}
if
$word
=~ /zum_/;
return
$data
->{const}{ADJ}
if
$word
=~ /zur_/;
return
$data
->{const}{NOUN}
if
$word
=~ /_/;
return
$data
->{const}{NOUN}
if
$word
=~ / /;
return
$data
->{const}{NOUN}
if
$word
=~ /^\$\$/;
return
$data
->{const}{NOUN}
if
$word
=~ /^\s/;
return
$data
->{const}{ADJ}
if
$word
=~ /[%]/;
$word
=~ s/\.|[?!,;]//gm;
my
$word_low
=
lc
$word
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
$word
eq
$word_low
&&
$word
=~ /los$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB}, 1 )
if
$word
eq
$word_low
&&
$word
=~ /machen$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB}, 1 )
if
$word
eq
$word_low
&&
$word
=~ /mache$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB}, 1 )
if
$word
eq
$word_low
&&
$word
=~ /machst$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB}, 1 )
if
$word
eq
$word_low
&&
$word
=~ /macht$/;
return
$ART
if
$word_low
eq
q{a}
&& LANGUAGE() eq
'en'
;
if
(
$word_low
=~ /\s/ ) {
my
$last_word
= (
split
/\s+/,
$word_low
)[-1];
return
(
pos_of(
$CLIENT_ref
,
$last_word
,
$at_beginning_of_sentence
,
$noun_automatism
,
$do_not_ask_user
,
$sentence
)
);
}
say
'.'
;
my
$builtin_table
= AI::FreeHAL::Engine::build_builtin_table( LANGUAGE() );
say
','
;
if
(
defined
$builtin_table
->{
$word_low
} ) {
return
short_save_in_memory(
$do_save
,
$word
,
$builtin_table
->{
$word_low
} );
}
{
my
$word_low_stripped
=
$word_low
;
$word_low_stripped
=~ s/.$//;
if
(
defined
$builtin_table
->{
$word_low_stripped
} ) {
return
short_save_in_memory(
$do_save
,
$word
,
$builtin_table
->{
$word_low_stripped
} );
}
$word_low_stripped
=~ s/.$//;
if
(
defined
$builtin_table
->{
$word_low_stripped
} ) {
return
short_save_in_memory(
$do_save
,
$word
,
$builtin_table
->{
$word_low_stripped
} );
}
$word_low_stripped
=~ s/.$//;
if
(
defined
$builtin_table
->{
$word_low_stripped
} ) {
return
short_save_in_memory(
$do_save
,
$word
,
$builtin_table
->{
$word_low_stripped
} );
}
}
if
( !
$no_autoguess
&&
$config
{
'features'
}{
'tagger'
} ) {
if
( LANGUAGE() eq
'de'
) {
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{NOUN}, 1 )
if
$word
=~ /kern$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
$word
=~ /.farben$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
$word
=~ /.quent$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
(
$word
eq
lc
$word
||
$at_beginning_of_sentence
)
&&
$word
=~ /(der|al|haft|lich|ig|bar|isch|ich)$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
(
$word
ne
lc
$word
||
$at_beginning_of_sentence
)
&&
$word
=~ /.(haft|lich|ig|isch|ich)$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{QUESTIONWORD} )
if
(
$word
eq
lc
$word
||
$at_beginning_of_sentence
)
&&
$word
=~ /^wor/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
(
$word
eq
lc
$word
||
$at_beginning_of_sentence
)
&&
$word
=~ /(der|al|ich|bar)e$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
(
$word
eq
lc
$word
||
$at_beginning_of_sentence
)
&&
$word
=~ /(der|al|ich|bar)es$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
(
$word
eq
lc
$word
||
$at_beginning_of_sentence
)
&&
$word
=~ /(der|al|ich|bar)en$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
(
$word
eq
lc
$word
||
$at_beginning_of_sentence
)
&&
$word
=~ /(der|al|ich|bar)em$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
(
$word
eq
lc
$word
||
$at_beginning_of_sentence
)
&&
$word
=~ /(der|al|ich|bar)er$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
(
$word
eq
lc
$word
||
$at_beginning_of_sentence
)
&&
$word
=~ /(ss|mm|nn|gg)er$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB}, 1 )
if
$word
=~ /^(ge|
$regex_str_verb_prefixes
).*?t$/
&& LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB} )
if
$word
=~ /^(ge|ver).*?ten$/
&& LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{NOUN}, 1 )
if
$word
=~ /(heit|keit)$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
$word
=~ /(haft|lich|ig|isch|dnet)e?$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
$word
=~ /(haft|lich|ig|isch|dnet)es$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
$word
=~ /(haft|lich|ig|isch|dnet)en$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
$word
=~ /(haft|lich|ig|isch|dnet)em$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
$word
=~ /(haft|lich|ig|isch|dnet)er$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{NOUN} )
if
$word
=~ /angst$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
=~ /reich$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
$word
eq
lc
$word
&&
$word
=~ /reit$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
$word
eq
lc
$word
&&
$word
=~ /iert$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
$word
eq
lc
$word
&&
$word
=~ /ierte$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
$word
eq
lc
$word
&&
$word
=~ /ierter$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
$word
eq
lc
$word
&&
$word
=~ /iertes$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
$word
eq
lc
$word
&&
$word
=~ /ierten$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
$word
eq
lc
$word
&&
$word
=~ /iertem$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
$word
eq
lc
$word
&&
$word
=~ /ene[nmsr]?$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
$word
eq
lc
$word
&&
$word
=~ /on$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
eq
lc
$word
&&
$word
=~ /on$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
eq
lc
$word
&&
$word
=~ /iv$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
eq
lc
$word
&&
$word
=~ /iv(e|en|er|es|em|en)$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
eq
lc
$word
&&
$word
=~ /ig(e|es|en|em|er|)?$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
eq
lc
$word
&&
$word
=~ /^meist/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
eq
lc
$word
&&
$word
=~ /h(e|es|en|em|er)$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
eq
lc
$word
&&
$word
=~ /ht(e|es|en|em|er)$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
eq
lc
$word
&&
$word
=~ /er(e|en|er|es|em|en)$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
=~ /lich$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
=~ /te(t|te|ter|te|ten|tem|tes)$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
eq
lc
$word
&&
$word
=~ /ens$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
eq
lc
$word
&&
$word
=~ /st(e|en|er|es|em|en)$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
=~ /lich(e|es|en|em)$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
eq
lc
$word
&&
$word
=~ /.det(e|es|en|em)$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
eq
lc
$word
&&
$word
=~ /.haft$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB} )
if
$word
=~
/^(ge|
$regex_str_verb_prefixes
).*?nn(($)|((e|en|er|es|em|en)$))/
&& LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB} )
if
$word
=~
/^(ge|
$regex_str_verb_prefixes
).*?[lb]o[gt](($)|((e|en|er|es|em|en)$))/
&& LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB} )
if
$word
=~
/^(ge|
$regex_str_verb_prefixes
).*?b(($)|((e|en|er|es|em|en)$))/
&& LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB} )
if
$word
=~
/^(ge|
$regex_str_verb_prefixes
).*?s(($)|((e|en|er|es|em|en)$))/
&& LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB} )
if
$word
=~
/^(ge|
$regex_str_verb_prefixes
).*?mm(($)|((e|en|er|es|em|en)$))/
&& LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB} )
if
$word
=~ /^be.*?t$/ && LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB} )
if
$word
=~ /^ver.*?t$/ && LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB} )
if
$word
eq
lc
$word
&&
$word
=~ /ier(t|en|st|e)$/
&& LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB} )
if
$word
=~ /^ge.*?t$/ && LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB} )
if
$word
=~ /^be.*?t$/ && LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB} )
if
$word
=~ /^ver.*?t$/ && LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
=~
/^(un|in|an|a|im|
$regex_str_verb_prefixes
)?ge.*?[tn]et(($)|((e|en|er|es|em|en)$))/
&& LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
=~
/^(un|in|an|a|im|
$regex_str_verb_prefixes
)?be.*?[tn]et(($)|((e|en|er|es|em|en)$))/
&& LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
=~
/^(un|in|an|a|im|
$regex_str_verb_prefixes
)?ge.*?(es|er|em|en|e)$/
&& LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
$word
=~
/^(un|in|an|a|im|
$regex_str_verb_prefixes
)?ge.*?[fsrmnlp]t$/
&& LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
=~
/^(un|in|an|a|im|
$regex_str_verb_prefixes
)?ge.*?[tn]et$/
&& LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
=~
/^(un|in|an|a|im|
$regex_str_verb_prefixes
)?be.*?[tn]et$/
&& LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB} )
if
$word
eq
lc
$word
&&
$word
=~
/.(ier|eid|ied|und|uch|ueh|eh|erb|arb|ng|eb|eis|s|ieg|bau|iess|eig|esch|isch|ruh|err|mm|mpf|ink|ohl)en$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB}, 1 )
if
$word
eq
lc
$word
&&
$word
=~
/.(ier|eid|ied|und|uch|ueh|eh|erb|arb|ng|eb|eis|s|ieg|bau|ies|eig|esch|isch|ruh|err|mm|mpf|ink|ohl)st$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB}, 1 )
if
$word
eq
lc
$word
&&
$word
=~
/.(ier|eid|ied|und|uch|ueh|eh|erb|arb|ng|eb|eis|s|ieg|bau|iess|eig|esch|isch|ruh|err|mm|mpf|ink|ohl)e$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB}, 1 )
if
$word
eq
lc
$word
&&
$word
=~
/.(ier|eid|ied|und|uch|ueh|eh|erb|arb|ng|eb|eis|s|ieg|ruh|isch|err|mm|mpf|ieh|sch|ink|ohl)t$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB}, 1 )
if
$word
eq
lc
$word
&&
$word
=~
/.(ier|eid|ied|und|uch|ueh|eh|erb|arb|ng|eb|eis|s|ieg|ruh)et$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB} )
if
$word
eq
lc
$word
&&
$word
=~
/.(ier|eid|ied|und|uch|ueh|eh|erb|arb|ng|eb|eis|s|ieg|ruh)test$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB} )
if
$word
eq
lc
$word
&&
$word
=~
/.(ier|eid|ied|und|uch|ueh|eh|erb|arb|ng|eb|eis|s|ieg|ruh)est$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB} )
if
$word
eq
lc
$word
&&
$word
=~
/.(ier|eid|ied|und|uch|ueh|eh|erb|arb|ng|eb|eis|s|ieg|ruh)tst$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB} )
if
$word
eq
lc
$word
&&
$word
=~
/.(ier|eid|ied|und|uch|ueh|eh|erb|arb|ng|eb|eis|s|ieg|ruh)te$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB} )
if
$word
=~ /...eck(t|en|e|st)$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB} )
if
$word
eq
lc
$word
&&
$word
=~ /.ern$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{NOUN} )
if
$word
=~ /.[hk]eit$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB} )
if
$word
eq
$word_low
&&
$word
=~ /^.*?[nskz]en$/
&& LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB} )
if
$word
eq
$word_low
&&
$word
=~ /^.*?eln$/
&& LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB} )
if
$word
=~ /^(ver|be|ge|ent|er).+?et$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB} )
if
$word
=~ /^(zer|ver|be|ge|ent|er).+?et$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{NOUN} )
if
$word
=~ /[ts]ion$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{NOUN} )
if
$word
ne
lc
$word
&&
$word
=~ /...([gftlhsbkdm]|ier)er$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{NOUN} )
if
$word
ne
lc
$word
&&
$word
=~ /...([gftlhsbkdm]|ier)erin$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{NOUN} )
if
$word
ne
lc
$word
&&
$word
=~ /...([gftlhsbkdm]|ier)erinnen$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{NOUN} )
if
$word
=~ /.([sk])um$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{NOUN} )
if
$word
ne
lc
$word
&&
$word
=~ /...([gftlhsbkdm]|ier)ero$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{NOUN} )
if
$word
ne
lc
$word
&&
$word
=~ /...([gftlhsbkdm]|ier)t$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{NOUN} )
if
$word
ne
lc
$word
&&
$word
=~ /...([gftlhsbkdm]|ier)ist/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
eq
lc
$word
&&
$word
=~ /st(e|en|er|es|em|en)$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
$word
=~ /en(e|es|en|em|er)$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
eq
lc
$word
&&
$word
=~ /cht$/;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ}, 1 )
if
$word
=~
/^(ge|
$regex_str_verb_prefixes
).*?(((e|en|er|es|em|en|t)$))/
&& LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB}, 1 )
if
$word
=~ /tzt$/
&& LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB}, 1 )
if
$word
=~ /pft$/
&& LANGUAGE() eq
'de'
;
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{VERB}, 1 )
if
$word
=~ /ckt$/
&& LANGUAGE() eq
'de'
;
}
}
if
(
$word
ne
$word_low
&&
$config
{
'features'
}{
'tagger'
} ) {
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{NOUN} );
}
if
(
defined
$builtin_table
->{
$word_low
} ) {
return
$builtin_table
->{
$word_low
};
}
my
$word_low_without_prefix
=
$word_low
;
foreach
my
$prefix
(
keys
%is_verb_prefix
) {
$word_low_without_prefix
=~ s/^
$prefix
//im;
}
if
( !
$no_autoguess
&&
$config
{
'features'
}{
'tagger'
} ) {
if
(
$word_low
=~ /end$/ &&
length
(
$word
) >= 6 ) {
my
$word_to_check
=
$word_low
;
$word_to_check
=~ s/[a-z]$//;
if
(
pos_of(
$CLIENT_ref
,
$word_to_check
, 0, 0, 0,
undef
,
undef
, 0 ) ==
$data
->{const}{VERB}
)
{
return
$data
->{const}{ADJ};
}
}
if
(
$word
eq
$word_low
&&
$word_low
=~ /e$/ &&
length
(
$word
) >= 4 ) {
my
$word_to_check
=
$word_low
.
'n'
;
if
(
pos_of(
$CLIENT_ref
,
$word_to_check
, 0, 0, 1,
undef
,
undef
, 0, 1 )
|| 0 ==
$data
->{const}{VERB}
)
{
return
$data
->{const}{VERB};
}
}
if
(
$word
eq
$word_low
&&
$word_low
=~ /s$/ &&
length
(
$word
) >= 4 ) {
my
$word_to_check
=
$word_low
;
$word_to_check
=~ s/[a-z]$//;
if
(
pos_of(
$CLIENT_ref
,
$word_to_check
, 0, 1, 1,
undef
,
undef
, 0, 1 )
|| 0 ==
$data
->{const}{VERB}
)
{
return
$data
->{const}{VERB};
}
}
if
(
$word_low
=~ /s$/ &&
length
(
$word
) >= 4 ) {
my
$word_to_check
=
$word
;
$word_to_check
=~ s/[a-z]$//;
if
(
pos_of(
$CLIENT_ref
,
$word_to_check
, 0, 0, 1,
undef
,
undef
, 0, 1 )
|| 0 ==
$data
->{const}{NOUN}
)
{
return
$data
->{const}{NOUN};
}
}
if
(
$word_low
=~ /e[sr]$/ &&
length
(
$word
) >= 4 ) {
my
$word_to_check
=
$word
;
$word_to_check
=~ s/[a-z][a-z]$//;
if
(
pos_of(
$CLIENT_ref
,
$word_to_check
, 0, 0, 1,
undef
,
undef
, 0, 1 )
|| 0 ==
$data
->{const}{NOUN}
)
{
return
$data
->{const}{NOUN};
}
}
if
(
$word
eq
$word_low
&&
$word_low
=~ /st$/ &&
length
(
$word
) >= 5 ) {
my
$word_to_check
=
$word_low
;
$word_to_check
=~ s/st$/en/;
if
(
pos_of(
$CLIENT_ref
,
$word_to_check
, 0, 0, 1,
undef
,
undef
, 0, 1 ) ==
$data
->{const}{VERB}
)
{
return
$data
->{const}{VERB};
}
}
if
(
$word
eq
$word_low
&&
$word_low
=~ /et$/ &&
length
(
$word
) >= 5 ) {
my
$word_to_check
=
$word_low
;
$word_to_check
=~ s/et$/en/;
if
(
pos_of(
$CLIENT_ref
,
$word_to_check
, 0, 0, 1,
undef
,
undef
, 0, 1 ) ==
$data
->{const}{VERB}
)
{
return
$data
->{const}{VERB};
}
}
if
(
$word
eq
$word_low
&&
$word_low
=~ /t$/ &&
length
(
$word
) >= 5 ) {
my
$word_to_check
=
$word_low
;
$word_to_check
=~ s/t$/en/;
if
(
pos_of(
$CLIENT_ref
,
$word_to_check
, 0, 0, 1,
undef
,
undef
, 0, 1 ) ==
$data
->{const}{VERB}
)
{
return
$data
->{const}{VERB};
}
}
if
(
$word_low
=~ /(ed|ing)$/
&&
length
(
$word
) >= 6
&& LANGUAGE() eq
'en'
)
{
my
$word_to_check
=
$word_low
;
$word_to_check
=~ s/(ed|ing)$//;
if
(
pos_of(
$CLIENT_ref
,
$word_to_check
, 0, 0, 0,
undef
,
undef
, 0, 1 ) ==
$data
->{const}{VERB}
)
{
return
$data
->{const}{ADJ};
}
if
(
pos_of(
$CLIENT_ref
,
$word_to_check
.
'e'
,
0, 0, 1,
undef
,
undef
, 0, 1 ) ==
$data
->{const}{VERB}
)
{
return
$data
->{const}{ADJ};
}
}
return
short_save_in_memory(
$do_save
,
$word
,
$data
->{const}{ADJ} )
if
$word
=~ /en(d|de|den|des|dem|der)$/;
}
if
(
$at_beginning_of_sentence
) {
if
( !
$AI::SemanticNetwork::initialized
) {
$cache_noun_or_not
{
$word_low
} =
$data
->{const}{NOUN};
}
my
$in_cache
=
$cache_noun_or_not
{
$word_low
};
if
( !
$in_cache
) {
$in_cache
=
$WORDTYPE_UNKNOWN
;
}
return
$in_cache
if
$in_cache
==
$data
->{const}{NOUN};
return
$in_cache
if
$in_cache
==
$data
->{const}{VERB};
return
$in_cache
if
$in_cache
==
$data
->{const}{ADJ};
print
"Error in Cache: $in_cache"
if
$in_cache
!=
$WORDTYPE_UNKNOWN
;
my
$wt1
=
$WORDTYPE_UNKNOWN
;
my
$wt2
=
$WORDTYPE_UNKNOWN
;
if
(
$word
ne
lc
$word
) {
$wt1
=
pos_of(
$CLIENT_ref
,
lc
$word
, 0, 0,
undef
,
$sentence
);
$wt2
= pos_of(
$CLIENT_ref
,
ucfirst
$word
, 0, 0,
undef
,
$sentence
);
if
(
$wt1
==
$data
->{const}{QUESTIONWORD} ||
$wt2
==
$data
->{const}{QUESTIONWORD} ) {
return
$data
->{const}{QUESTIONWORD};
}
}
if
(
$wt1
!=
$wt2
&&
$word
ne
lc
$word
) {
if
(
$wt1
==
$data
->{const}{VERB} ||
$wt2
==
$data
->{const}{VERB} ) {
return
$data
->{const}{VERB};
}
if
( LANGUAGE() eq
'en'
&& (
$wt1
==
$data
->{const}{NOUN} ||
$wt2
==
$data
->{const}{NOUN} )
&& (
$wt1
==
$data
->{const}{ADJ} ||
$wt2
==
$data
->{const}{ADJ} ) )
{
return
$data
->{const}{NOUN};
}
if
( LANGUAGE() eq
'en'
&& (
$wt1
==
$data
->{const}{NOUN} ||
$wt2
==
$data
->{const}{NOUN} )
&& (
$wt1
==
$data
->{const}{PREP} ||
$wt2
==
$data
->{const}{PREP} ) )
{
return
$data
->{const}{PREP};
}
if
( LANGUAGE() eq
'en'
&& (
$wt1
==
$data
->{const}{NOUN} ||
$wt2
==
$data
->{const}{NOUN} )
&& (
$wt1
==
$ART
||
$wt2
==
$ART
) )
{
return
$ART
;
}
my
$line
=
!
$AI::SemanticNetwork::initialized
? 1
: impl_get_noun_or_not(
$CLIENT_ref
,
$word
);
if
(
$line
== 1 ) {
$cache_noun_or_not
{
$word_low
} =
$data
->{const}{NOUN};
}
elsif
(
$line
== 2 ) {
$cache_noun_or_not
{
$word_low
} =
$wt1
;
}
else
{
print
"Illegal Command: HERE_IS_NOUN_OR_NOT:"
.
$line
.
"\n"
;
}
}
elsif
(
$wt1
!=
$wt2
&&
$word
eq
lc
$word
) {
return
$wt1
;
}
}
my
$word_without_underscores
=
$word
;
$word_without_underscores
=~ s/_/ /igm;
my
$type
=
q{}
;
if
(
$at_beginning_of_sentence
) {
my
$prop_ref
=
part_of_speech_get_memory()->{
lc
$word_without_underscores
};
if
(
$prop_ref
->{
'type'
} ) {
$type
=
$prop_ref
->{
'type'
};
}
}
my
$prop_ref
= part_of_speech_get_memory()->{
$word_without_underscores
};
if
(
$prop_ref
->{
'type'
} && !
$type
) {
$type
=
$prop_ref
->{
'type'
};
}
if
( !
$type
&&
$at_beginning_of_sentence
) {
my
$prop_ref
= part_of_speech_get_entry(
lc
$word_without_underscores
);
if
(
$prop_ref
->{
'type'
} ) {
$type
=
$prop_ref
->{
'type'
};
}
}
if
( !
$type
) {
$prop_ref
= part_of_speech_get_entry(
$word_without_underscores
);
if
(
$prop_ref
->{
'type'
} ) {
$type
=
$prop_ref
->{
'type'
};
}
}
if
(
$at_beginning_of_sentence
) {
my
$prop_ref
= part_of_speech_get_memory()
->{
'_nosave_'
.
lc
$word_without_underscores
};
if
(
$prop_ref
->{
'type'
} ) {
$type
=
$prop_ref
->{
'type'
};
}
}
my
$prop_ref
=
part_of_speech_get_memory()->{
'_nosave_'
.
$word_without_underscores
};
if
(
$prop_ref
->{
'type'
} && !
$type
) {
$type
=
$prop_ref
->{
'type'
};
}
if
( !
$type
&&
$at_beginning_of_sentence
) {
my
$prop_ref
=
part_of_speech_get_entry(
'_nosave_'
.
lc
$word_without_underscores
);
if
(
$prop_ref
->{
'type'
} ) {
$type
=
$prop_ref
->{
'type'
};
}
}
if
( !
$type
) {
$prop_ref
=
part_of_speech_get_entry(
'_nosave_'
.
$word_without_underscores
);
if
(
$prop_ref
->{
'type'
} ) {
$type
=
$prop_ref
->{
'type'
};
}
}
if
(
$use_sql
) {
if
( ( !
$type
||
$type
eq
'q'
) &&
$at_beginning_of_sentence
) {
$type
=
( sql_get_part_of_speech_property
lc
$word_without_underscores
)
->{type};
}
if
( ( !
$type
||
$type
eq
'q'
) ) {
$type
=
( sql_get_part_of_speech_property
$word_without_underscores
)
->{type};
}
}
if
(
$type
eq
'q'
) {
$type
=
q{}
;
}
if
(
(
$word
||
''
) ne (
$word_low
||
''
)
&& !
$at_beginning_of_sentence
&&
$noun_automatism
&& (
(
$data
->{lang}{string_to_constant}{
$type
||
''
} ||
''
) ne
$data
->{const}{NOUN}
&& (
$data
->{lang}{string_to_constant}{
$data
->{lang}{constant_to_string}{
$type
||
''
} ||
''
}
||
''
) ne
$data
->{const}{NOUN}
)
)
{
}
else
{
if
(
$data
->{lang}{string_to_constant}{
$type
} ) {
return
$data
->{lang}{string_to_constant}{
$type
};
}
if
(
$data
->{lang}{string_to_constant}{
$data
->{lang}{constant_to_string}{
$type
} ||
''
} ) {
return
$data
->{lang}{string_to_constant}{
$data
->{lang}{constant_to_string}{
$type
} };
}
}
if
(
$do_not_ask_user
) {
return
$data
->{const}{NO_POS};
}
my
$downloaded
=
LANGUAGE() eq
'de'
&&
$config
{
'features'
}{
'tagger'
}
? download_pos(
$CLIENT_ref
,
$word
,
$at_beginning_of_sentence
)
:
$data
->{const}{NO_POS};
say
"$do_not_guess || $config{'features'}{'tagger'}"
;
my
$word_type_tagged
=
$do_not_guess
|| !
$config
{
'features'
}{
'tagger'
}
?
''
:
uc
find_word_type(
$CLIENT_ref
,
$word
,
$at_beginning_of_sentence
,
$sentence
);
if
(
$word_low_without_prefix
ne
$word_low
&&
$word
eq
$word_low
&&
$config
{
'features'
}{
'tagger'
} )
{
if
(
pos_of(
$CLIENT_ref
,
$word_low_without_prefix
, 0, 0, 0,
undef
,
0 ) ==
$data
->{const}{VERB}
)
{
$downloaded
=
$data
->{const}{VERB};
}
}
if
( !
$at_beginning_of_sentence
&&
$word_low
ne
$word
&&
$noun_automatism
&&
$downloaded
==
$data
->{const}{NO_POS}
&& ( !
$word_type_tagged
|| LANGUAGE() eq
'de'
)
&&
$config
{
'features'
}{
'tagger'
} )
{
$downloaded
=
$data
->{const}{NOUN};
}
$downloaded
||= 0;
$downloaded
=
$downloaded
==
$data
->{const}{NOUN} ? 2
:
$downloaded
==
$data
->{const}{VERB} ? 1
:
$downloaded
==
$data
->{const}{PREP} ? 6
:
$downloaded
==
$data
->{const}{ADJ} ? 3
:
$downloaded
==
$data
->{const}{INTER} ? 7
:
$downloaded
==
$data
->{const}{QUESTIONWORD} ? 5
: 0;
my
$word_type_tagged_new
=
$word_type_tagged
eq
'CD'
? 3
:
$word_type_tagged
eq
'EX'
? 3
:
$word_type_tagged
eq
'IN'
? 6
:
$word_type_tagged
eq
'JJ'
? 3
:
$word_type_tagged
eq
'JJR'
? 3
:
$word_type_tagged
eq
'JJS'
? 3
:
$word_type_tagged
eq
'MD'
? 1
:
$word_type_tagged
eq
'NN'
? 2
:
$word_type_tagged
eq
'NNS'
? 2
:
$word_type_tagged
eq
'NNPS'
? 2
:
$word_type_tagged
eq
'NNP'
? 2
:
$word_type_tagged
eq
'PDT'
? 3
:
$word_type_tagged
eq
'PRP'
? 2
:
$word_type_tagged
eq
'PRPS'
? 3
:
$word_type_tagged
eq
'RB'
? 3
:
$word_type_tagged
eq
'RBR'
? 3
:
$word_type_tagged
eq
'RBS'
? 3
:
$word_type_tagged
eq
'RP'
? 3
:
$word_type_tagged
eq
'TO'
? 6
:
$word_type_tagged
eq
'UH'
? 7
:
$word_type_tagged
eq
'SYM'
? 2
:
$word_type_tagged
=~ /^VB/ ? 1
:
$word_type_tagged
=~ /^W/ ? 5
: 0;
if
( !
$word_type_tagged_new
) {
say
'Tagged Word type not known: '
,
$word_type_tagged
;
}
else
{
say
'Word type tagged successful: '
,
$word_type_tagged
,
' = '
,
$word_type_tagged_new
;
}
$word_type_tagged
=
$word_type_tagged_new
;
if
(
$do_not_ask_user
&& !
$downloaded
&& !
$word_type_tagged
) {
return
$data
->{const}{NO_POS};
}
my
$line
=
(
$downloaded
) ? (
$downloaded
)
: (
$word_type_tagged
) ? (
$word_type_tagged
)
: ( impl_get_word_type(
$CLIENT_ref
,
$word
) );
print
'$line: '
,
$line
,
"\n"
;
my
$type_str
=
$line
== 1 ?
'vt'
:
$line
== 2 ?
'n,'
:
$line
== 3 ?
'adj'
:
$line
== 4 ?
'n,'
:
$line
== 5 ?
'fw'
:
$line
== 6 ?
'prep'
:
$line
== 7 ?
'inter'
:
'nothing'
;
if
(
$word
ne
$word_low
) {
$type_str
=
'n,'
;
}
if
( !
$do_save
) {
part_of_speech_get_memory()->{
'_nosave_'
.
$word_without_underscores
}
->{
'type'
} =
$type_str
;
}
elsif
(
$use_sql
) {
eval
q{
use DBI;
my $db_string = "DBI:mysql:database=" . $config{'mysql'}
{
'database'
};
if
(
$config
{
'mysql'
}{
'host'
} ) {
$db_string
.=
';host='
.
$config
{
'mysql'
}{
'host'
};
}
my
$user
=
$config
{
'mysql'
}{
'user'
};
my
$password
=
$config
{
'mysql'
}{
'password'
};
my
$dbh
= DBI->
connect
(
$db_string
,
$user
,
$password
)
or
say
(
"not connected to "
,
$config
{
'mysql'
}{
'database'
},
", user "
,
$config
{
'mysql'
}{
'user'
},
", password "
,
$config
{
'mysql'
}{
'user'
},
": "
);
my
$sql
=
qq{SELECT word FROM part_of_speech WHERE word = '$word_without_underscores'}
;
my
$sth
=
$dbh
->prepare(
$sql
);
$sth
->execute();
say
(
$sth
->errstr );
say
(
$dbh
->errstr );
if
( !(
$sth
&& (
my
$data
=
$sth
->fetchrow_arrayref ) ) ) {
$dbh
->
do
(
qq{INSERT INTO part_of_speech( word, type, genus ) VALUES ( '$word_without_underscores', '', '' )}
);
say
(
$sth
->errstr );
say
(
$dbh
->errstr );
}
$sql
=
qq{UPDATE part_of_speech SET type = '$type_str' WHERE word = '$word_without_underscores'}
;
my
$sth
=
$dbh
->prepare(
$sql
);
$sth
->execute();
say
(
$sth
->errstr );
say
(
$dbh
->errstr );
}
if
!$::batch;
}
else
{
part_of_speech_get_memory()->{
$word_without_underscores
}->{
'type'
} =
$type_str
;
part_of_speech_get_memory()->{
$word_without_underscores
}->{
'new'
} = 1;
}
return
pos_of(
$CLIENT_ref
,
$word
,
$at_beginning_of_sentence
,
$noun_automatism
,
$sentence
,
$do_not_guess
,
$do_save
);
return
$data
->{const}{NO_POS};
}
sub
find_word_type {
my
(
$CLIENT_ref
,
$word
,
$at_beginning_of_sentence
,
$sentence
) =
@_
;
$word
=~ s/^[_]//igm;
$word
=~ s/[_]$//igm;
$sentence
=~ s/^[_]//igm;
$sentence
=~ s/[_]$//igm;
$sentence
=~ s/\s[_]/ /igm;
$sentence
=~ s/[_]\s/ /igm;
return
'JJ'
if
$word
=~ /^ge.*?tet(($)|((e|en|er|es|em|en)$))/ && LANGUAGE() eq
'de'
;
return
'JJ'
if
$word
=~ /^be.*?tet(($)|((e|en|er|es|em|en)$))/ && LANGUAGE() eq
'de'
;
return
'VB'
if
$word
=~ /^ge.*?t(($)|((e|en|er|es|em|en)$))/ && LANGUAGE() eq
'de'
;
return
'VB'
if
$word
=~ /^be.*?t(($)|((e|en|er|es|em|en)$))/ && LANGUAGE() eq
'de'
;
return
'VB'
if
$word
=~ /^ver.*?t(($)|((e|en|er|es|em|en)$))/ && LANGUAGE() eq
'de'
;
return
'JJ'
if
$word
=~ /^ge.*?(es|er|em|en|e)$/ && LANGUAGE() eq
'de'
;
return
'VB'
if
$word
eq
lc
$word
&&
$word
=~ /.[gftlrs]en$/;
my
$builtin_table
= AI::FreeHAL::Engine::build_builtin_table();
foreach
my
$key
(
keys
%$builtin_table
) {
part_of_speech_get_memory()->{
$key
}{
'type'
} =
$data
->{lang}{constant_to_string}{
$builtin_table
->{
$key
} };
part_of_speech_get_memory()->{
$key
}{rtime} =
'not_new'
;
part_of_speech_get_memory()->{
ucfirst
$key
}{
'type'
} =
$data
->{lang}{constant_to_string}{
$builtin_table
->{
$key
} };
part_of_speech_get_memory()->{
ucfirst
$key
}{rtime} =
'not_new'
;
}
if
( !
$use_sql
) {
open
my
$old_format_file
,
'>'
,
$dir
.
'lang_de/word_types.taggerinput'
;
foreach
my
$key
(
sort
keys
%{ part_of_speech_get_memory() } ) {
print
$old_format_file
$key
.
'|'
. part_of_speech_get_memory()->{
$key
}->{
'type'
} .
"\n"
if
part_of_speech_get_memory()->{
$key
}->{
'type'
}
&& part_of_speech_get_memory()->{
$key
}->{
'type'
} !~ /^n/
&&
$key
eq
lc
$key
;
}
close
$old_format_file
;
}
else
{
eval
q{
use DBI;
my $db_string = "DBI:mysql:database=" . $config{'mysql'}
{
'database'
};
if
(
$config
{
'mysql'
}{
'host'
} ) {
$db_string
.=
';host='
.
$config
{
'mysql'
}{
'host'
};
}
my
$user
=
$config
{
'mysql'
}{
'user'
};
my
$password
=
$config
{
'mysql'
}{
'password'
};
my
$dbh
= DBI->
connect
(
$db_string
,
$user
,
$password
)
or
say
(
"not connected to "
,
$config
{
'mysql'
}{
'database'
},
", user "
,
$config
{
'mysql'
}{
'user'
},
", password "
,
$config
{
'mysql'
}{
'user'
},
": "
);
my
$sql
=
qq{create table part_of_speech (word varbinary(120), type char(3), genus char(1))}
;
$dbh
->prepare(
$sql
)->execute();
$sql
=
qq{ALTER TABLE `part_of_speech` ADD PRIMARY KEY ( `word` )}
;
$dbh
->prepare(
$sql
)->execute();
$sql
=
qq{SELECT word, type FROM part_of_speech}
;
my
$sth
=
$dbh
->prepare(
$sql
);
$sth
->execute();
open
my
$old_format_file
,
'>'
,
$dir
.
'lang_de/word_types.taggerinput'
;
while
(
$sth
&& (
my
$data
=
$sth
->fetchrow_arrayref ) ) {
print
$old_format_file
$data
->[0] .
'|'
.
$data
->[1] .
"\n"
if
$data
->[1]
&&
$data
->[1] !~ /^n/
&&
$data
->[0] eq
lc
$data
->[0];
}
close
$old_format_file
;
$sth
->finish();
}
if
!$::batch;
}
require
'convert-word-types-dic.pl'
if
LANGUAGE() eq
'de'
&& !$::batch;
say
$sentence
;
my
$p
=
LANGUAGE() eq
'en'
? new Lingua::EN::Tagger
: new Lingua::DE::Tagger;
my
$readable_text
=
$p
->get_readable(
$sentence
);
$readable_text
=~ s/[<]\/(.+?)[>]/\/$1 /igm;
$readable_text
=~ s/[<].+?[>]//igm;
$readable_text
=~ s/[\-]//igm;
my
%word_list
=
map
{
lc
$_
}
map
{
split
/\//,
$_
}
split
/\s/,
$readable_text
;
foreach
my
$k
(
keys
%word_list
) {
$k
=
lc
$k
;
}
print
Dumper \
%word_list
;
my
$tagged
=
$word_list
{
lc
$word
}
||
$word_list
{
$word
}
||
$word_list
{
ucfirst
$word
};
if
(
lc
$tagged
eq
'nn'
&&
$word
=~ /(es|er|en|em)$/ ) {
$tagged
=
'JJ'
;
}
return
$tagged
;
}
1;