Hide Show 31 lines of Pod
our
(
$VERSION
,
@ISA
,
@EXPORT
,
@EXPORT_OK
,
%EXPORT_TAGS
);
@ISA
=
qw(Exporter)
;
%EXPORT_TAGS
= ();
@EXPORT_OK
= ();
@EXPORT
= ();
$VERSION
=
'0.01'
;
$wn
= WordNet::QueryData->new;
$WNHOME
=
"/usr/local/WordNet-3.0"
;
$WNSEARCHDICT
=
"$WNHOME/dict"
;
$wnCRLength
= 29;
GetOptions(
'help'
=> \
$help
);
if
(
$help
== 1)
{
printHelp();
exit
(0);
}
Hide Show 18 lines of Pod
sub
new
{
my
$class
=
shift
;
my
$self
= {};
$self
->{errorString} =
''
;
$self
->{error}=0;
bless
$self
,
$class
;
return
$self
;
}
Hide Show 13 lines of Pod
sub
getError()
{
my
$self
=
shift
;
my
$error
=
$self
->{error};
my
$errString
=
$self
->{errorString};
$self
->{error}=0;
$self
->{errorString} =
""
;
$errString
=~ s/^[\r\n\t ]+//;
return
(
$error
,
$errString
);
}
Hide Show 14 lines of Pod
sub
attach()
{
my
$base
= 0;
if
(
scalar
@_
== 3)
{
$base
= 1;
}
my
@newSyn
= @{
$_
[
$base
]};
$base
=
$base
+1;
if
(
scalar
@newSyn
== 1)
{
my
@tempSyn
=
split
(
"\t"
,
$newSyn
[0]);
@newSyn
=
@tempSyn
;
}
my
$pos
=
substr
(
$newSyn
[1], 0, 1);
my
@location
= @{
$_
[
$base
]};
my
$write
= 1;
if
(
scalar
@newSyn
< 4)
{
my
$self
=
shift
;
$self
->{error} = 2;
$self
->{errorString} =
"New synset does not contain enough elements."
;
$write
= 0;
}
if
(
scalar
@location
< 2)
{
my
$self
=
shift
;
$self
->{error} = 2;
$self
->{errorString} =
"Location does not contain enough elements."
;
$write
= 0;
}
if
(
$write
== 1)
{
my
$newOffset
= findNewOffset(
$newSyn
[1]);
my
%offsetMap
;
my
$indexPos
=
""
;
my
$dataPos
=
""
;
my
$indexSense
=
""
;
my
$posNum
= 0;
my
$locationLemma
=
$location
[1];
$locationLemma
=~ s/
my
$locationPos
=
$newSyn
[1];
my
$locationOffset
=
$wn
->offset(
"$location[1]"
);
while
(
length
(
$locationOffset
) < 8)
{
$locationOffset
=
"0"
.
$locationOffset
;
}
my
$indexFile
=
"$WNSEARCHDICT/index.$locationPos"
;
my
$dataFile
=
"$WNSEARCHDICT/data.$locationPos"
;
my
$senseFile
=
"$WNSEARCHDICT/index.sense"
;
open
(WNINDEXNEW,
'>'
,
"$indexFile.new"
) or
die
$!;
open
(WNDATANEW,
'>'
,
"$dataFile.new"
) or
die
$!;
open
(WNSENSENEW,
'>'
,
"$senseFile.new"
) or
die
$!;
my
$fhIndex
=
select
(WNINDEXNEW);
$|=1;
select
(
$fhIndex
);
my
$fhData
=
select
(WNDATANEW);
$|=1;
select
(
$fhData
);
my
$fhSense
=
select
(WNSENSENEW);
$|=1;
select
(
$fhSense
);
if
(
$pos
eq
"n"
)
{
$posNum
= 1;
}
else
{
if
(
$pos
eq
"v"
)
{
$posNum
= 2;
}
else
{
my
$self
=
shift
;
$self
->{error} = 2;
$self
->{errorString} =
"Part of speech must be verb or noun"
;
$write
= 0;
}
}
if
(isNewWord(
$newSyn
[0],
$newSyn
[1]) == 0)
{
my
%hypData
= %{getDataInfo(
$locationOffset
,
$locationPos
)};
my
%hypInfo
= %{getIndexInfo(
$locationLemma
,
$locationPos
)};
open
WNINDEX,
"$indexFile"
or
die
$!;
open
WNDATA,
"$dataFile"
or
die
$!;
open
WNSENSE,
"$senseFile"
or
die
$!;
open
(WNDATATEMP,
'>'
,
"$dataFile.temp"
) or
die
$!;
my
$changed
= 0;
my
$curLine
= 1;
while
(<WNDATA>)
{
for
$tempIn
(
split
(
"\n"
))
{
if
(
$curLine
>
$wnCRLength
)
{
my
@tempLine
=
split
/\s/,
$tempIn
;
if
(
$changed
== 1)
{
my
$newNewOffset
=
$tempLine
[0] +14;
while
(
length
(
$newNewOffset
) < 8)
{
$newNewOffset
=
"0"
.
$newNewOffset
;
}
$offsetMap
{
$tempLine
[0]} =
$newNewOffset
;
}
else
{
$offsetMap
{
$tempLine
[0]} =
$tempLine
[0];
}
if
(
$tempLine
[0] ==
$locationOffset
)
{
my
$newPcnt
=
$hypData
{
'p_cnt'
} + 1;
while
(
length
$newPcnt
< 3)
{
$newPcnt
=
"0"
.
$newPcnt
;
}
$newOffset
=
$newOffset
+ 14;
$tempIn
=
"$hypData{'synset_offset'} $hypData{'lex_filenum'} $hypData{'ss_type'} $hypData{'w_cnt'} $hypData{'word_lex_id'} $newPcnt $hypData{'ptr'} ~ $newOffset $pos 0000 | $hypData{'gloss'}"
;
$changed
= 1;
}
}
else
{
$curLine
+=1;
}
print
WNDATATEMP
"$tempIn\n"
;
}
}
$indexPos
=
"$newSyn[0] $pos 1 1 \@ 1 0 $newOffset"
;
$dataPos
=
"$newOffset $hypData{'lex_filenum'} $pos 01 $newSyn[0] 0 001 \@ $hypData{'synset_offset'} $pos 0000 | $newSyn[3]"
;
$indexSense
=
"$newSyn[0]%$posNum:$hypData{'lex_filenum'}:00:: $newOffset 1 0"
;
close
WNDATATEMP;
open
WNDATATEMP,
"$dataFile.temp"
or
die
$!;
while
(<WNDATATEMP>)
{
for
$tempIn
(
split
(
"\n"
))
{
my
@tempLine
=
split
/\s/,
$tempIn
;
for
my
$i
(0 ..
$#tempLine
)
{
if
(
exists
$offsetMap
{
$tempLine
[
$i
]})
{
$tempLine
[
$i
] =
"$offsetMap{$tempLine[$i]}"
;
}
}
$tempIn
=
join
(
' '
,
@tempLine
);
print
WNDATANEW
"$tempIn \n"
;
}
}
print
WNDATANEW
"$dataPos \n"
;
$curLine
= 1;
while
(<WNINDEX>)
{
for
$tempIn
(
split
(
"\n"
))
{
if
(
$curLine
>
$wnCRLength
)
{
if
(
$tempIn
=~ /^
$locationLemma
\b[^-]/)
{
unless
(
$tempIn
=~ /\~/)
{
my
$newPcnt
=
$hypInfo
{
'p_cnt'
};
$newPcnt
+=1;
$tempIn
=
"$hypInfo{'lemma'} $hypInfo{'pos'} $hypInfo{'synset_cnt'} $newPcnt $hypInfo{'ptr_symbol'} ~ $hypInfo{'sense_cnt'} $hypInfo{'tagsense_cnt'} $hypInfo{'synset_offset'}"
;
}
}
my
@tempLine
=
split
/\s/,
$tempIn
;
my
$tmpPcnt
=
$tempLine
[2];
my
$offsetPtr
=
scalar
(
@tempLine
) - 1;
while
(
$tmpPcnt
> 0)
{
if
(
exists
$offsetMap
{
$tempLine
[
$offsetPtr
]})
{
$tempLine
[
$offsetPtr
] =
"$offsetMap{$tempLine[$offsetPtr]}"
;
}
$tmpPcnt
-=1;
$offsetPtr
-=1;
}
$tempIn
=
join
(
' '
,
@tempLine
);
}
else
{
$curLine
+=1;
}
print
WNINDEXNEW
"$tempIn \n"
;
}
}
print
WNINDEXNEW
"$indexPos \n"
;
while
(<WNSENSE>)
{
for
$tempIn
(
split
(
"\n"
))
{
my
@tempLine
=
split
/\s/,
$tempIn
;
if
(
exists
$offsetMap
{
$tempLine
[1]})
{
$tempLine
[1] =
"$offsetMap{$tempLine[1]}"
;
}
$tempIn
=
join
(
' '
,
@tempLine
);
print
WNSENSENEW
"$tempIn\n"
;
}
}
print
WNSENSENEW
"$indexSense\n"
;
close
WNINDEX;
close
WNDATA;
close
WNSENSE;
close
WNDATATEMP;
}
else
{
my
%hypData
= %{getDataInfo(
$locationOffset
,
$locationPos
)};
my
%hypInfo
= %{getIndexInfo(
$locationLemma
,
$locationPos
)};
my
%lemmaIndex
= %{getIndexInfo(
$newSyn
[0],
$newSyn
[1])};
my
$newSynNum
=
$lemmaIndex
{
'synset_cnt'
} + 1;
open
WNINDEX,
"$indexFile"
or
die
$!;
open
WNDATA,
"$dataFile"
or
die
$!;
open
WNSENSE,
"$senseFile"
or
die
$!;
open
(WNDATATEMP,
'>'
,
"$dataFile.temp"
) or
die
$!;
my
$changed
= 0;
my
$curLine
= 1;
while
(<WNDATA>)
{
for
$tempIn
(
split
(
"\n"
))
{
if
(
$curLine
>
$wnCRLength
)
{
my
@tempLine
=
split
/\s/,
$tempIn
;
if
(
$changed
== 1)
{
my
$newNewOffset
=
$tempLine
[0] +14;
while
(
length
(
$newNewOffset
) < 8)
{
$newNewOffset
=
"0"
.
$newNewOffset
;
}
$offsetMap
{
$tempLine
[0]} =
$newNewOffset
;
}
else
{
$offsetMap
{
$tempLine
[0]} =
$tempLine
[0];
}
if
(
$tempLine
[0] ==
$locationOffset
)
{
my
$newPcnt
=
$hypData
{
'p_cnt'
} + 1;
while
(
length
$newPcnt
< 3)
{
$newPcnt
=
"0"
.
$newPcnt
;
}
$newOffset
=
$newOffset
+ 14;
$tempIn
=
"$hypData{'synset_offset'} $hypData{'lex_filenum'} $hypData{'ss_type'} $hypData{'w_cnt'} $hypData{'word_lex_id'} $newPcnt $hypData{'ptr'} ~ $newOffset $pos 0000 | $hypData{'gloss'}"
;
$changed
= 1;
}
}
else
{
$curLine
+=1;
}
print
WNDATATEMP
"$tempIn\n"
;
}
}
$indexPos
=
"$newSyn[0] $pos $newSynNum $lemmaIndex{'p_cnt'} $lemmaIndex{'ptr_symbol'} $newSynNum $lemmaIndex{'tagsense_cnt'} $lemmaIndex{'synset_offset'} $newOffset"
;
$dataPos
=
"$newOffset $hypData{'lex_filenum'} $pos 01 $newSyn[0] 0 001 @ $hypData{'synset_offset'} $pos 0000 | $newSyn[3]"
;
$indexSense
=
"$newSyn[0]%$posNum:$hypData{'lex_filenum'}:00:: $newOffset $newSynNum 0"
;
close
WNDATATEMP;
open
WNDATATEMP,
"$dataFile.temp"
or
die
$!;
while
(<WNDATATEMP>)
{
for
$tempIn
(
split
(
"\n"
))
{
my
@tempLine
=
split
/\s/,
$tempIn
;
for
my
$i
(0 ..
$#tempLine
)
{
if
(
exists
$offsetMap
{
$tempLine
[
$i
]})
{
$tempLine
[
$i
] =
"$offsetMap{$tempLine[$i]}"
;
}
}
$tempIn
=
join
(
' '
,
@tempLine
);
print
WNDATANEW
"$tempIn \n"
;
}
}
print
WNDATANEW
"$dataPos \n"
;
$curLine
= 1;
while
(<WNINDEX>)
{
for
$tempIn
(
split
(
"\n"
))
{
if
(
$curLine
>
$wnCRLength
)
{
if
(
$tempIn
=~ /^
$locationLemma
\b[^-]/)
{
unless
(
$tempIn
=~ /\~/)
{
my
$newPcnt
=
$hypInfo
{
'p_cnt'
};
$newPcnt
+=1;
$tempIn
=
"$hypInfo{'lemma'} $hypInfo{'pos'} $hypInfo{'synset_cnt'} $newPcnt $hypInfo{'ptr_symbol'} ~ $hypInfo{'sense_cnt'} $hypInfo{'tagsense_cnt'} $hypInfo{'synset_offset'}"
;
}
}
if
(
$tempIn
=~ /^
$newSyn
[0]\b[^-]/)
{
$tempIn
=
"$indexPos"
;
}
my
@tempLine
=
split
/\s/,
$tempIn
;
my
$tmpPcnt
=
$tempLine
[2];
my
$offsetPtr
=
scalar
(
@tempLine
) - 1;
while
(
$tmpPcnt
> 0)
{
if
(
exists
$offsetMap
{
$tempLine
[
$offsetPtr
]})
{
$tempLine
[
$offsetPtr
] =
"$offsetMap{$tempLine[$offsetPtr]}"
;
}
$tmpPcnt
-=1;
$offsetPtr
-=1;
}
$tempIn
=
join
(
' '
,
@tempLine
);
}
else
{
$curLine
+=1;
}
print
WNINDEXNEW
"$tempIn \n"
;
}
}
while
(<WNSENSE>)
{
for
$tempIn
(
split
(
"\n"
))
{
my
@tempLine
=
split
/\s/,
$tempIn
;
if
(
exists
$offsetMap
{
$tempLine
[1]})
{
$tempLine
[1] =
"$offsetMap{$tempLine[1]}"
;
}
$tempIn
=
join
(
' '
,
@tempLine
);
print
WNSENSENEW
"$tempIn\n"
;
}
}
print
WNSENSENEW
"$indexSense\n"
;
close
WNINDEX;
close
WNDATA;
close
WNSENSE;
close
WNDATATEMP;
}
close
WNSENSENEW;
close
WNDATANEW;
close
WNSENSENEW;
if
(
$write
== 1)
{
my
$backupcheck
=
"$indexFile.backup"
;
unless
(-f
$backupcheck
)
{
copy(
$indexFile
,
"$indexFile.backup"
);
copy(
$dataFile
,
"$dataFile.backup"
);
}
unless
(-f
"$senseFile.backup"
)
{
copy(
$senseFile
,
"$senseFile.backup"
);
}
if
(-f
"$dataFile.temp"
)
{
unlink
"$dataFile.temp"
;
}
unlink
$indexFile
;
unlink
$dataFile
;
unlink
$senseFile
;
move(
"$indexFile.new"
,
$indexFile
);
move(
"$dataFile.new"
,
$dataFile
);
move(
"$senseFile.new"
,
$senseFile
);
}
}
}
Hide Show 14 lines of Pod
sub
merge()
{
my
$base
= 0;
if
(
scalar
@_
== 3)
{
$base
= 1;
}
my
@newSyn
= @{
$_
[
$base
]};
$base
=
$base
+1;
if
(
scalar
@newSyn
== 1)
{
my
@tempSyn
=
split
(
"\t"
,
$newSyn
[0]);
@newSyn
=
@tempSyn
;
}
my
$pos
=
substr
(
$newSyn
[1], 0, 1);
my
@location
= @{
$_
[
$base
]};
my
$write
= 1;
if
(
scalar
@newSyn
< 4)
{
my
$self
=
shift
;
$self
->{error} = 2;
$self
->{errorString} =
"New synset does not contain enough elements."
;
$write
= 0;
}
if
(
scalar
@location
< 2)
{
my
$self
=
shift
;
$self
->{error} = 2;
$self
->{errorString} =
"Location does not contain enough elements."
;
$write
= 0;
}
if
(
$write
== 1)
{
my
$newOffset
= findNewOffset(
$newSyn
[1]);
my
%offsetMap
;
my
$indexPos
=
""
;
my
$dataPos
=
""
;
my
$indexSense
=
""
;
my
$posNum
= 0;
my
$locationLemma
=
$location
[1];
$locationLemma
=~ s/
my
$locationPos
=
$newSyn
[1];
my
$locationOffset
=
$wn
->offset(
"$location[1]"
);
while
(
length
(
$locationOffset
) < 8)
{
$locationOffset
=
"0"
.
$locationOffset
;
}
my
$indexFile
=
"$WNSEARCHDICT/index.$locationPos"
;
my
$dataFile
=
"$WNSEARCHDICT/data.$locationPos"
;
my
$senseFile
=
"$WNSEARCHDICT/index.sense"
;
open
(WNINDEXNEW,
'>'
,
"$indexFile.new"
) or
die
$!;
open
(WNDATANEW,
'>'
,
"$dataFile.new"
) or
die
$!;
open
(WNSENSENEW,
'>'
,
"$senseFile.new"
) or
die
$!;
my
$fhIndex
=
select
(WNINDEXNEW);
$|=1;
select
(
$fhIndex
);
my
$fhData
=
select
(WNDATANEW);
$|=1;
select
(
$fhData
);
my
$fhSense
=
select
(WNSENSENEW);
$|=1;
select
(
$fhSense
);
if
(
$pos
eq
"n"
)
{
$posNum
= 1;
}
else
{
if
(
$pos
eq
"v"
)
{
$posNum
= 2;
}
else
{
my
$self
=
shift
;
$self
->{error} = 2;
$self
->{errorString} =
"Part of speech must be verb or noun"
;
$write
= 0;
}
}
if
(isNewWord(
$newSyn
[0],
$newSyn
[1]) == 0)
{
my
%synIndex
= %{getIndexInfo(
$locationLemma
,
$locationPos
)};
my
%synData
= %{getDataInfo(
$locationOffset
,
$locationPos
)};
open
WNINDEX,
"$indexFile"
or
die
$!;
open
WNDATA,
"$dataFile"
or
die
$!;
open
WNSENSE,
"$senseFile"
or
die
$!;
open
(WNDATATEMP,
'>'
,
"$dataFile.temp"
) or
die
$!;
my
$changed
= 0;
my
$curLine
= 1;
my
$newWordLength
=
length
(
$newSyn
[0]) + 1;
while
(<WNDATA>)
{
for
$tempIn
(
split
(
"\n"
))
{
if
(
$curLine
>
$wnCRLength
)
{
my
@tempLine
=
split
/\s/,
$tempIn
;
if
(
$changed
== 1)
{
my
$newNewOffset
=
$tempLine
[0] +
$newWordLength
;
while
(
length
(
$newNewOffset
) < 8)
{
$newNewOffset
=
"0"
.
$newNewOffset
;
}
$offsetMap
{
$tempLine
[0]} =
$newNewOffset
;
}
else
{
$offsetMap
{
$tempLine
[0]} =
$tempLine
[0];
}
if
(
$tempLine
[0] ==
$locationOffset
)
{
$changed
= 1;
}
}
else
{
$curLine
+=1;
}
print
WNDATATEMP
"$tempIn\n"
;
}
}
$indexPos
=
"$newSyn[0] $pos 1 $synIndex{'p_cnt'} $synIndex{'ptr_symbol'} 1 0 $locationOffset"
;
my
$wcnt
=
$synData
{
'w_cnt'
} + 1;
$dataPos
=
"$locationOffset $synData{'lex_filenum'} $synData{'ss_type'} $wcnt $synData{'word_lex_id'} $newSyn[0] 0 $synData{'p_cnt'} $synData{'ptr'} | $synData{'gloss'}"
;
$indexSense
=
"$newSyn[0]%$posNum:$synData{'lex_filenum'}:00:: $locationOffset 1 0"
;
close
WNDATATEMP;
open
WNDATATEMP,
"$dataFile.temp"
or
die
$!;
while
(<WNDATATEMP>)
{
for
$tempIn
(
split
(
"\n"
))
{
if
(
$tempIn
=~ /^
$locationOffset
\b/)
{
$tempIn
=
$dataPos
;
}
my
@tempLine
=
split
/\s/,
$tempIn
;
for
my
$i
(0 ..
$#tempLine
)
{
if
(
exists
$offsetMap
{
$tempLine
[
$i
]})
{
$tempLine
[
$i
] =
"$offsetMap{$tempLine[$i]}"
;
}
}
$tempIn
=
join
(
' '
,
@tempLine
);
print
WNDATANEW
"$tempIn \n"
;
}
}
$curLine
= 1;
while
(<WNINDEX>)
{
for
$tempIn
(
split
(
"\n"
))
{
if
(
$curLine
>
$wnCRLength
)
{
my
@tempLine
=
split
/\s/,
$tempIn
;
my
$tmpPcnt
=
$tempLine
[2];
my
$offsetPtr
=
scalar
(
@tempLine
) - 1;
while
(
$tmpPcnt
> 0)
{
if
(
exists
$offsetMap
{
$tempLine
[
$offsetPtr
]})
{
$tempLine
[
$offsetPtr
] =
"$offsetMap{$tempLine[$offsetPtr]}"
;
}
$tmpPcnt
-=1;
$offsetPtr
-=1;
}
$tempIn
=
join
(
' '
,
@tempLine
);
}
else
{
$curLine
+=1;
}
print
WNINDEXNEW
"$tempIn \n"
;
}
}
print
WNINDEXNEW
"$indexPos \n"
;
while
(<WNSENSE>)
{
for
$tempIn
(
split
(
"\n"
))
{
my
@tempLine
=
split
/\s/,
$tempIn
;
if
(
exists
$offsetMap
{
$tempLine
[1]})
{
$tempLine
[1] =
"$offsetMap{$tempLine[1]}"
;
}
$tempIn
=
join
(
' '
,
@tempLine
);
print
WNSENSENEW
"$tempIn\n"
;
}
}
print
WNSENSENEW
"$indexSense\n"
;
close
WNINDEX;
close
WNDATA;
close
WNSENSE;
close
WNDATATEMP;
}
else
{
my
%synIndex
= %{getIndexInfo(
$locationLemma
,
$locationPos
)};
my
%synData
= %{getDataInfo(
$locationOffset
,
$locationPos
)};
my
%lemmaIndex
= %{getIndexInfo(
$newSyn
[0],
$newSyn
[1])};
my
$newSynNum
=
$lemmaIndex
{
'synset_cnt'
} + 1;
open
WNINDEX,
"$indexFile"
or
die
$!;
open
WNDATA,
"$dataFile"
or
die
$!;
open
WNSENSE,
"$senseFile"
or
die
$!;
open
(WNDATATEMP,
'>'
,
"$dataFile.temp"
) or
die
$!;
my
$changed
= 0;
my
$curLine
= 1;
my
$newWordLength
=
length
(
$newSyn
[0]) + 1;
while
(<WNDATA>)
{
for
$tempIn
(
split
(
"\n"
))
{
if
(
$curLine
>
$wnCRLength
)
{
my
@tempLine
=
split
/\s/,
$tempIn
;
if
(
$changed
== 1)
{
my
$newNewOffset
=
$tempLine
[0] +
$newWordLength
;
while
(
length
(
$newNewOffset
) < 8)
{
$newNewOffset
=
"0"
.
$newNewOffset
;
}
$offsetMap
{
$tempLine
[0]} =
$newNewOffset
;
}
else
{
$offsetMap
{
$tempLine
[0]} =
$tempLine
[0];
}
if
(
$tempLine
[0] ==
$locationOffset
)
{
$changed
= 1;
}
}
else
{
$curLine
+=1;
}
print
WNDATATEMP
"$tempIn\n"
;
}
}
$indexPos
=
"$newSyn[0] $pos $newSynNum $lemmaIndex{'p_cnt'} $lemmaIndex{'ptr_symbol'} $newSynNum $lemmaIndex{'tagsense_cnt'} $lemmaIndex{'synset_offset'} $locationOffset"
;
my
$wcnt
=
$synData
{
'w_cnt'
} + 1;
if
(
length
$wcnt
< 2)
{
$wcnt
=
"0"
.
$wcnt
;
}
$dataPos
=
"$locationOffset $synData{'lex_filenum'} $synData{'ss_type'} $wcnt $synData{'word_lex_id'} $newSyn[0] 0 $synData{'p_cnt'} $synData{'ptr'} | $synData{'gloss'}"
;
$indexSense
=
"$newSyn[0]%$posNum:$synData{'lex_filenum'}:00:: $locationOffset $newSynNum 0"
;
close
WNDATATEMP;
open
WNDATATEMP,
"$dataFile.temp"
or
die
$!;
while
(<WNDATATEMP>)
{
for
$tempIn
(
split
(
"\n"
))
{
if
(
$tempIn
=~ /^
$locationOffset
\b/)
{
$tempIn
=
$dataPos
;
}
my
@tempLine
=
split
/\s/,
$tempIn
;
for
my
$i
(0 ..
$#tempLine
)
{
if
(
exists
$offsetMap
{
$tempLine
[
$i
]})
{
$tempLine
[
$i
] =
"$offsetMap{$tempLine[$i]}"
;
}
}
$tempIn
=
join
(
' '
,
@tempLine
);
print
WNDATANEW
"$tempIn \n"
;
}
}
$curLine
= 1;
while
(<WNINDEX>)
{
for
$tempIn
(
split
(
"\n"
))
{
if
(
$curLine
>
$wnCRLength
)
{
if
(
$tempIn
=~ /^
$newSyn
[0]\b[^-]/)
{
$tempIn
=
$indexPos
;
}
my
@tempLine
=
split
/\s/,
$tempIn
;
my
$tmpPcnt
=
$tempLine
[2];
my
$offsetPtr
=
scalar
(
@tempLine
) - 1;
while
(
$tmpPcnt
> 0)
{
if
(
exists
$offsetMap
{
$tempLine
[
$offsetPtr
]})
{
$tempLine
[
$offsetPtr
] =
"$offsetMap{$tempLine[$offsetPtr]}"
;
}
$tmpPcnt
-=1;
$offsetPtr
-=1;
}
$tempIn
=
join
(
' '
,
@tempLine
);
}
else
{
$curLine
+=1;
}
print
WNINDEXNEW
"$tempIn \n"
;
}
}
while
(<WNSENSE>)
{
for
$tempIn
(
split
(
"\n"
))
{
my
@tempLine
=
split
/\s/,
$tempIn
;
if
(
exists
$offsetMap
{
$tempLine
[1]})
{
$tempLine
[1] =
"$offsetMap{$tempLine[1]}"
;
}
$tempIn
=
join
(
' '
,
@tempLine
);
print
WNSENSENEW
"$tempIn\n"
;
}
}
print
WNSENSENEW
"$indexSense\n"
;
close
WNINDEX;
close
WNDATA;
close
WNSENSE;
close
WNDATATEMP;
}
close
WNSENSENEW;
close
WNDATANEW;
close
WNSENSENEW;
if
(
$write
== 1)
{
my
$backupcheck
=
"$indexFile.backup"
;
unless
(-f
$backupcheck
)
{
copy(
$indexFile
,
"$indexFile.backup"
);
copy(
$dataFile
,
"$dataFile.backup"
);
}
unless
(-f
"$senseFile.backup"
)
{
copy(
$senseFile
,
"$senseFile.backup"
);
}
if
(-f
"$dataFile.temp"
)
{
unlink
"$dataFile.temp"
;
}
unlink
$indexFile
;
unlink
$dataFile
;
unlink
$senseFile
;
move(
"$indexFile.new"
,
$indexFile
);
move(
"$dataFile.new"
,
$dataFile
);
move(
"$senseFile.new"
,
$senseFile
);
}
}
}
Hide Show 12 lines of Pod
sub
restoreWordNet()
{
my
$backupFlag
= 0;
if
(-f
"$WNSEARCHDICT/index.noun.backup"
)
{
unlink
"$WNSEARCHDICT/index.noun"
;
unlink
"$WNSEARCHDICT/data.noun"
;
$backupFlag
= 1;
move(
"$WNSEARCHDICT/index.noun.backup"
,
"$WNSEARCHDICT/index.noun"
);
move(
"$WNSEARCHDICT/data.noun.backup"
,
"$WNSEARCHDICT/data.noun"
);
}
if
(-f
"$WNSEARCHDICT/index.verb.backup"
)
{
unlink
"$WNSEARCHDICT/index.verb"
;
unlink
"$WNSEARCHDICT/data.verb"
;
$backupFlag
= 1;
move(
"$WNSEARCHDICT/index.verb.backup"
,
"$WNSEARCHDICT/index.verb"
);
move(
"$WNSEARCHDICT/data.verb.backup"
,
"$WNSEARCHDICT/data.verb"
);
}
if
(-f
"$WNSEARCHDICT/index.adj.backup"
)
{
unlink
"$WNSEARCHDICT/index.adj"
;
unlink
"$WNSEARCHDICT/data.adj"
;
$backupFlag
= 1;
move(
"$WNSEARCHDICT/index.adj.backup"
,
"$WNSEARCHDICT/index.adj"
);
move(
"$WNSEARCHDICT/data.adj.backup"
,
"$WNSEARCHDICT/data.adj"
);
}
if
(-f
"$WNSEARCHDICT/index.adv.backup"
)
{
unlink
"$WNSEARCHDICT/index.adv"
;
unlink
"$WNSEARCHDICT/data.adv"
;
$backupFlag
= 1;
move(
"$WNSEARCHDICT/index.adv.backup"
,
"$WNSEARCHDICT/index.adv"
);
move(
"$WNSEARCHDICT/data.adv.backup"
,
"$WNSEARCHDICT/data.adv"
);
}
if
(
$backupFlag
== 1)
{
unlink
"$WNSEARCHDICT/index.sense"
;
move(
"$WNSEARCHDICT/index.sense.backup"
,
"$WNSEARCHDICT/index.sense"
);
}
}
Hide Show 10 lines of Pod
sub
isNewWord()
{
my
$base
= 0;
if
(
scalar
@_
== 3)
{
$base
= 1;
}
my
$lemma
=
$_
[
$base
];
$base
=
$base
+1;
my
$pos
=
$_
[
$base
];
my
$indexFile
=
"$WNSEARCHDICT/index.$pos"
;
open
WNINDEX,
"$indexFile"
or
die
$!;
while
(<WNINDEX>)
{
for
$tempIn
(
split
(
"\n"
))
{
if
(
$tempIn
=~ /^
$lemma
\b[^-]/)
{
close
WNINDEX;
return
1;
}
}
}
close
WNINDEX;
return
0;
}
Hide Show 11 lines of Pod
sub
getIndexInfo()
{
my
$base
= 0;
if
(
scalar
@_
== 3)
{
$base
= 1;
}
my
$lemma
=
$_
[
$base
];
$base
=
$base
+1;
my
$pos
=
$_
[
$base
];
my
$indexFile
=
"$WNSEARCHDICT/index.$pos"
;
my
$indexInfoLine
=
""
;
my
%indexInfo
;
open
WNINDEX,
"$indexFile"
or
die
$!;
while
(<WNINDEX>)
{
for
$tempIn
(
split
(
"\n"
))
{
if
(
$tempIn
=~ /^
$lemma
\b[^-]/)
{
$indexInfoLine
=
$tempIn
;
close
WNINDEX;
}
}
}
my
@index
=
split
/\s/,
$indexInfoLine
;
$indexInfo
{
'lemma'
} =
$index
[0];
$indexInfo
{
'pos'
} =
$index
[1];
$indexInfo
{
'synset_cnt'
} =
$index
[2];
$indexInfo
{
'p_cnt'
} =
$index
[3];
my
$pcnt
=
$index
[3];
my
$ptrSym
=
""
;
my
$offset
= 0;
while
(
$pcnt
>0)
{
my
$sym
= 4 +
$offset
;
$ptrSym
=
$ptrSym
.
" $index[$sym]"
;
$pcnt
-=1;
if
(
$pcnt
> 0)
{
$offset
+= 1;
}
}
$ptrSym
=~ s/^\s+//;
$indexInfo
{
'ptr_symbol'
} =
$ptrSym
;
my
$indexPtr
= 5 +
$offset
;
$indexInfo
{
'sense_cnt'
} =
$index
[
$indexPtr
];
$indexPtr
+=1;
$indexInfo
{
'tagsense_cnt'
} =
$index
[
$indexPtr
];
$indexPtr
+=1;
my
$scnt
=
$index
[2];
my
$indexOffsets
=
""
;
while
(
$scnt
> 0)
{
$indexOffsets
=
$indexOffsets
.
" $index[$indexPtr]"
;
$indexPtr
+=1;
$scnt
-=1;
}
$indexOffsets
=~ s/^\s+//;
$indexInfo
{
'synset_offset'
} =
$indexOffsets
;
return
\
%indexInfo
;
}
Hide Show 11 lines of Pod
sub
getDataInfo()
{
my
$base
= 0;
if
(
scalar
@_
== 3)
{
$base
= 1;
}
my
$synOffset
=
$_
[
$base
];
$base
+=1;
my
$pos
=
$_
[
$base
];
my
$dataFile
=
"$WNSEARCHDICT/data.$pos"
;
my
$dataInfoLine
=
""
;
open
WNDATA,
"$dataFile"
or
die
$!;
while
(<WNDATA>)
{
for
$tempIn
(
split
(
"\n"
))
{
if
(
$tempIn
=~ /^
$synOffset
\b/)
{
$dataInfoLine
=
$tempIn
;
close
WNDATA;
}
}
}
my
@data
=
split
/\s/,
$dataInfoLine
;
my
%dataInfo
;
$dataInfo
{
'synset_offset'
} =
$data
[0];
$dataInfo
{
'lex_filenum'
} =
$data
[1];
$dataInfo
{
'ss_type'
} =
$data
[2];
$dataInfo
{
'w_cnt'
} =
$data
[3];
my
$offset
= 0;
my
$wcnt
=
$data
[3];
my
$words
=
""
;
while
(
$wcnt
> 0)
{
my
$wptr
= 4 +
$offset
;
$words
=
$words
.
" $data[$wptr]"
;
$wptr
+=1;
$words
=
$words
.
" $data[$wptr]"
;
$wcnt
-=1;
if
(
$wcnt
> 0)
{
$offset
+=2;
}
}
$words
=~ s/^\s+//;
$dataInfo
{
'word_lex_id'
} =
$words
;
my
$dataPtr
= 6 +
$offset
;
$dataInfo
{
'p_cnt'
} =
$data
[
$dataPtr
];
$dataPtr
+=1;
$offset
= 0;
my
$pcnt
=
$dataInfo
{
'p_cnt'
};;
my
$ptrs
=
""
;
while
(
$pcnt
> 0)
{
my
$pptr
=
$dataPtr
+
$offset
;
$ptrs
=
$ptrs
.
" $data[$pptr]"
;
$pptr
+=1;
$ptrs
=
$ptrs
.
" $data[$pptr]"
;
$pptr
+=1;
$ptrs
=
$ptrs
.
" $data[$pptr]"
;
$pptr
+=1;
$ptrs
=
$ptrs
.
" $data[$pptr]"
;
$pptr
+=1;
$pcnt
-=1;
$offset
+=4;
}
$ptrs
=~ s/^\s+//;
$dataInfo
{
'ptr'
} =
$ptrs
;
$dataPtr
=
$dataPtr
+
$offset
;
$dataPtr
+=1;
my
$size
=
scalar
@data
;
my
$gloss
=
""
;
while
(
$dataPtr
<
$size
)
{
$gloss
=
$gloss
.
" $data[$dataPtr]"
;
$dataPtr
+=1;
}
$gloss
=~ s/^\s+//;
$dataInfo
{
'gloss'
} =
$gloss
;
return
\
%dataInfo
;
}
Hide Show 11 lines of Pod
sub
getSenseInfo()
{
my
$base
= 0;
if
(
scalar
@_
== 2)
{
$base
= 1;
}
my
$synOffset
=
$_
[
$base
];
my
$senseFile
=
"$WNSEARCHDICT/index.sense"
;
my
$senseInfoLine
=
""
;
open
WNSENSE,
"$senseFile"
or
die
$!;
while
(<WNSENSE>)
{
for
$tempIn
(
split
(
"\n"
))
{
if
(
$tempIn
=~ /\b
$synOffset
\b/)
{
$senseInfoLine
=
$tempIn
;
close
WNSENSE;
}
}
}
my
@sense
=
split
/\s/,
$senseInfoLine
;
my
%senseInfo
;
$senseInfo
{
'sense_key'
} =
$sense
[0];
$senseInfo
{
'synset_offset'
} =
$sense
[1];
$senseInfo
{
'sense_number'
} =
$sense
[2];
$senseInfo
{
'tag_cnt'
} =
$sense
[3];
return
\
%senseInfo
;
}
Hide Show 10 lines of Pod
sub
findNewOffset()
{
my
$offset
= 0;
my
$base
= 0;
if
(
scalar
@_
== 3)
{
$base
= 1;
}
my
$pos
=
$_
[
$base
];
my
$dataFile
=
"$WNSEARCHDICT/data.$pos"
;
my
$dataLastLine
=
""
;
open
WNDATA,
"$dataFile"
or
die
$!;
while
(<WNDATA>)
{
for
$tempIn
(
split
(
"\n"
))
{
$dataLastLine
=
$tempIn
;
}
}
close
WNDATA;
my
@data
=
split
/\s/,
$dataLastLine
;
$offset
=
$data
[0] +
length
(
$dataLastLine
) + 3;
return
$offset
;
}
sub
printHelp()
{
printUsage();
print
"Takes in lemmas from file and attempts to\n"
;
print
"insert them into WordNet by first finding\n"
;
print
"a hypernym, then either a) merging the \n"
;
print
"lemma with the hypernym or b) attaching \n"
;
print
"the lemma to the hypernym.\n"
;
}
1;