NAME
DDC::Client - Client socket object and utilities for DDC::Concordance
SYNOPSIS
use
DDC::Client;
##---------------------------------------------------------------------
## Constructors, etc.
##---------------------------------------------------------------------
## Common Requests
$rsp
=
$dc
->request(
$request
);
##-- generic request
$rsp
=
$dc
->requestNC(
$request
);
##-- generic request, no close()
$data
=
$dc
->requestJson(
$request
);
##-- generic JSON request
$version
=
$dc
->version();
##-- get server version string
$status
=
$dc
->status();
##-- get server status HASH-ref
$vstatus
=
$dc
->vstatus();
##-- get verbose status HASH-ref
$info
=
$dc
->info();
##-- get server info HASH-ref
$nodes
=
$dc
->nodes();
##-- get server nodes ARRAY-ref
$rsp
=
$dc
->expand_terms(\
@pipeline
, \
@terms
);
##-- raw term expansion
@terms
=
$dc
->expand(\
@pipeline
, \
@terms
);
##-- parsed term expansion
$hits
=
$dc
->query(
$query_string
);
##-- fetch and parse hits
$hits
=
$dc
->queryJson(
$query_string
);
##-- fetch and parse JSON-formatted hits
$buf
=
$dc
->queryRaw(
$query_string
);
##-- fetch raw query result buffer
$buf
=
$dc
->queryRawNC(
$query_string
);
##-- fetch raw query result, no close()
@bufs
=
$dc
->queryMulti(
@query_strings
);
##-- fetch multiple request results without intervening close()
$rsp
=
$dc
->get_first_hits(
$query
);
##-- low-level request
$rsp
=
$dc
->get_hit_strings();
##-- low-level request
$rsp
=
$dc
->run_query(
$corpus
,
$query
);
##-- low-level request
##---------------------------------------------------------------------
## Low-level Communications
$connect
=
$dc
->parseAddr();
##-- parse connection parameters
$urlstr
=
$dc
->addrStr();
##-- get connection parameter string
$io_socket
=
$dc
->
open
();
##-- open the connection
undef
=
$dc
->
close
();
##-- close the connection
$dc
->
send
(
@command
);
##-- send a command (prepends size)
$dc
->sendfh(
$fh
,
@command
);
##-- ... to specified filehandle
$size
=
$dc
->readSize();
##-- get size of return message from client socket
$size
=
$dc
->readSize(
$fh
);
##-- ... or from a given filehandle
$buf
=
$dc
->readBytes(
$size
);
##-- read a sized return buffer from client socket
$buf
=
$dc
->readBytes(
$size
,
$fh
);
##-- ... or from a given filehandle
$buf
=
$dc
->readData();
##-- same as $dc->readBytes($dc->readSize())
$buf
=
$dc
->readData(
$fh
);
##-- ... same as $dc->readBytes($dc->readSize($fh),$fh)
$hits
=
$dc
->parseData(
$buf
);
##-- parse a return buffer
$hits
=
$dc
->parseJsonData(
$buf
);
##-- parse a return buffer in 'json' mode
$hits
=
$dc
->parseTextData(
$buf
);
##-- parse a return buffer in 'text' mode
$hits
=
$dc
->parseTableData(
$buf
);
##-- parse a return buffer in 'table' mode
$hits
=
$dc
->parseHtmlData(
$buf
);
##-- parse a return buffer in 'html' mode
@terms
=
$dc
->parseExpandTermsResponse(
$buf
);
##-- parse an expand_terms response buffer
DESCRIPTION
Globals
- Variable: $ifmt
-
pack()-format to use for integer sizes passed to and from a DDC server. The default value ('V') should be right for ddc-2.x (always 32-bit unsigned little endian). For ddc-1.x, the machine word size and endian-ness should match the those native to the machine running the DDC server.
- Variable: $ilen
-
Length of message size integer used for DDC protocol in bytes. If you change $ifmt, you should make sure to change $ilen appropriately, e.g. by setting:
$ilen
=
length
(
pack
(
$ifmt
,0));
- Variable: $JSON_BACKEND
-
Name of module to use for JSON response decoding via decodeJson(), defaults to
JSON
. Set this toJSON::PP
or set the environment variablePERL_JSON_BACKEND=JSON::PP
if you are using multiple DDC clients via the threads module.
Constructors etc
- new
-
$dc
=
$CLASS_OR_OBJ
->new(
%args
);
- accepted %args are keys of %$dc:
-
(
##-- connection options
connect
=>\
%connectArgs
,
##-- passed to IO::Socket::(INET|UNIX)->new(), depending on $connectArgs{Domain}
## + you can also specify connect=>{url=>$url} or connect=>$url ; see parseAddr() method
mode
=>
$mode
,
##-- query mode; one of qw(json table text html raw); default='json'
linger
=>\
@linger
,
##-- SO_LINGER socket option (default=[1,0]: immediate termination)
##-- query options (formerly only in DDC::Client::Distributed)
start
=>
$start
,
##-- index of first hit to fetch (default=0)
limit
=>
$limit
,
##-- maximum number of hits to fetch (default=10)
timeout
=>
$secs
,
##-- query timeout in seconds (lower bound, default=60)
##-- hit parsing options (mostly obsolete)
optFile
=>
$filename
,
##-- parse meta names, separators from DDC *.opt file
parseMeta
=>
$bool
,
##-- if true, hit metadata will be parsed to $hit->{_meta} (default=1)
parseContext
=>
$bool
,
##-- if true, hit context data will be parsed to $hit->{_ctx} (default=1)
keepRaw
=>
$bool
,
##-- if false, raw context buffer $hit->{_raw} will be deleted after parsing context data (default=false)
encoding
=>
$enc
,
##-- DDC server encoding (default='UTF-8')
fieldSeparator
=>
$str
,
##-- intra-token field separator (default="\x{1f}": ASCII unit separator); 'text' and 'table' modes only
tokenSeparator
=>
$str
,
##-- inter-token separator (default="\x{1e}": ASCII record separator); 'text' and 'table' modes only
metaNames
=> \
@names
,
##-- metadata names for 'text' and 'html' modes; default=none
textHighlight
=> [
$l0
,
$r0
,
$l1
,
$r1
],
##-- highlighting strings, text mode (default=[qw(&& && _& &_)])
htmlHighlight
=> [
$l0
,
$r0
,
$l1
,
$r1
],
##-- highlighting strings, html mode (default=[('<STRONG><FONT COLOR=red>','</FONT></STRONG>') x 2])
tableHighlight
=> [
$l0
,
$r0
,
$l1
,
$r1
],
##-- highlighting strings, table mode (default=[qw(&& && _& &_)])
)
- default \%connectArgs:
-
Domain
=>
'INET'
,
##-- also accepts 'UNIX'
PeerAddr
=>
'localhost'
,
PeerPort
=>50000,
Proto
=>
'tcp'
,
Type
=>SOCK_STREAM,
Blocking
=>1,
- Examples
-
#-- connect to an INET socket on C<$HOST:$PORT>:
$dc
= DDC::Client->new(
connect
=>{
Domain
=>
'INET'
,
PeerAddr
=>
$HOST
,
PeerPort
=>
$Port
});
#
# ... syntactic sugar:
$dc
= DDC::Client->new(
connect
=>
"$HOST:$PORT"
)
#-- connect to an INET socket on localhost port C<$PORT>, setting socket timeout $TIMEOUT
$dc
= DDC::Client->new(
connect
=>{
PeerPort
=>
$PORT
,
Timeout
=>
$TIMEOUT
});
$dc
= DDC::Client->new(
connect
=>
":$PORT?Timeout=$TIMEOUT"
)
#-- connect to a UNIX socket at C<$SOCKPATH> on the local host:
$dc
= DDC::Client->new(
connect
=>{
Domain
=>
'UNIX'
,
Peer
=>
$SOCKPATH
});
#
# ... syntactic sugar:
Querying
- queryRaw
-
$buf
=
$dc
->queryRaw(
$query_string
);
Send a query string to the selected server and returns the raw result buffer. Implicitly close()s the connection.
- queryRawNC
-
$buf
=
$dc
->queryRawNC(
$query_string
);
Send a query string to the selected server and returns the raw result buffer. No implicit close().
- queryMulti
-
@bufs
=
$dc
->queryMulti(
@query_strings
);
Sends a series of query strings or requests to the server, and returns a list of raw result buffers. Implicitly close()s the client after all requests have been sent, but not between individual requests.
- query
-
$hits
=
$dc
->query(
$query_string
);
Send a query string to the selected server and parses the result into a list of hits.
- get_first_hits
-
$buf
=
$dc
->get_first_hits(
$query
,
$timeout
?,
$limit
?,
$hint
?);
Requests IDs of the first $limit hit(s) for query $query, using optional navigation hint $hint, and returns the raw DDC response buffer. The optional parameters default to the %$dc keys of the same name.
- get_hit_strings
-
$buf
=
$dc
->get_hit_strings(
$format
?,
$start
?,
$limit
?)
Requests the full strings for up to $limit hits beginning at logical offset $start formatted as $format. $format defaults to $dc->{mode}, and the remaining optional parameters default to the %$dc keys of the same name.
- run_query
-
$buf
=
$dc
->run_query(
$corpus
,
$query
,
$format
?,
$start
?,
$limit
?,
$timeout
?,
$hint
?)
Requests a complete query evaluation of up to $limit hit(s) beginning at offset $start for query $query, formatted as $format with server-side timeout lower bound $timeout and optional navigation hint $hint. If $corpus is specified as
undef
, it defaults to the string "Distributed". Optional parameters default to the %$dc keys of the same name. Note that this method returns the raw DDC response; see the query() method for a more comfortable alternative.
Common Requests
- request
-
$rsp
=
$dc
->request(
$request_string
);
Send a raw DDC request and return the server's response as a raw byte-string.
- requestJson
-
$data
=
$dc
->requestJson(
$request_string
);
Send a raw DDC request and decode the server's response as JSON data.
- version
-
$server_version
=
$dc
->version();
Request the current running version of the selected server, wraps $dc->request("version").
- status
-
$status
=
$dc
->status();
$status
=
$dc
->status(
$timeout
);
Get basic server status; wraps $dc->requestJson("status $timeout").
- vstatus
-
$vstatus
=
$dc
->vstatus();
$vstatus
=
$dc
->vstatus(
$timeout
);
Get verbose server status; wraps $dc->requestJson("vstatus $timeout").
- info
-
$info
=
$dc
->info();
$info
=
$dc
->info(
$timeout
);
Get verbose server information; wraps $dc->requestJson("info $timeout").
- nodes
-
$info
=
$dc
->nodes();
$info
=
$dc
->nodes(
$depth
);
Get ARRAY-ref of accessible server nodes suitable for use with the ':' query-operator; wraps $dc->requestJson("nodes $depth").
- expand_terms
-
$expandRaw
=
$dc
->expand_terms(
$pipeline
,
$term
);
$expandRaw
=
$dc
->expand_terms(
$pipeline
,
$term
,
$timeout
);
$expandRaw
=
$dc
->expand_terms(
$pipeline
,
$term
,
$timeout
,
$subcorpus
);
Perform server-side term-expansion for the term
$term
via pipeline$pipeline
. Both$term
and$pipeline
may be specified as ARRAY-refs or bare strings. Returns the raw response data string. - expand
-
@terms
=
$dc
->expand(
$pipeline
,
$term
);
@terms
=
$dc
->expand(
$pipeline
,
$term
,
$timeout
);
@terms
=
$dc
->expand(
$pipeline
,
$term
,
$timeout
,
$subcorpus
);
Perform server-side term-expansion for the term
$term
via pipeline$pipeline
and parses the response with parseExpandTermsResponse. Returns an array@terms
of server expansions in list-context; in scalar context returns the reference \@terms to such an array. - query
-
$hits
=
$dc
->query(
$query_string
);
Send a query string to the selected server and parses the result into a DDC::HitList object.
- queryRaw
-
$buf
=
$dc
->queryRaw(
$query_string
);
$buf
=
$dc
->queryRaw(\
@raw_strings
);
Send a query string to the selected server and returns the raw result buffer. The second form is equivalent to
$dc
->queryRaw(
join
(
"\x01"
,
@raw_strings
));
Implicitly close()s the connection.
- queryRawNC
-
$buf
=
$dc
->queryRawNC(
$query_string
);
Send a query string to the selected server and returns the raw result buffer. No implicit close().
- queryMulti
-
@bufs
=
$dc
->queryMulti(
@query_strings
);
Sends a series of query strings or requests to the server, and returns a list of raw result buffers. Implicitly close()s the client after all requests have been sent, but not between individual requests.
Low-level Communications
- parseAddr
-
\
%connect
=
$dc
->parseAddr()
\
%connect
=
$CLASS_OR_OBJECT
->parseAddr(\
%connect
,
$PEER_OR_LOCAL
,
%options
)
\
%connect
=
$CLASS_OR_OBJECT
->parserAddr({
url
=>
$url
},
$PEER_OR_LOCAL
,
%options
)
\
%connect
=
$CLASS_OR_OBJECT
->parserAddr(
$url
,
$PEER_OR_LOCAL
,
%options
)
Parses connect options into a form suitable for use as parameters to
IO::Socket::INET::new()
rsp.IO::Socket::UNIX::new()
. Sets$connect{Domain}
to eitherINET
orUNIX
. If called as an object method, operates directly on (and updates)$dc->{connect}
.Honors bare URL-style strings
$url
of the form:inet://ADDR:PORT?OPT=VAL...
# canonical INET socket URL format
unix://UNIX_PATH?OPT=VAL...
# canonical UNIX socket URL format
unix:UNIX_PATH?OPT=VAL...
# = unix://UNIX_PATH?OPT=val
ADDR?OPT=VAL...
# = inet://ADDR:5000?OPT=VAL...
:PORT?OPT=VAL...
# = inet://localhost:PORT?OPT=VAL...
ADDR:PORT?OPT=VAL...
# = inet://ADDR:PORT?OPT=VAL...
/UNIX_PATH?OPT=VAL...
# = unix:///UNIX_PATH?POT=VAL...
- addrStr
-
$urlstr
=
$dc
->addrStr();
$urlstr
=
$CLASS_OR_OBJECT
->addrStr(\
%connect
,
$PEER_OR_LOCAL
);
$urlstr
=
$CLASS_OR_OBJECT
->addrStr(
$url
,
$PEER_OR_LOCAL
);
$urlstr
=
$CLASS_OR_OBJECT
->addrStr(
$sock
,
$PEER_OR_LOCAL
);
Formats specified socket connection parameters (by default those of the calling object if called as an object method) as a URL-style string.
- open
-
$io_socket
=
$dc
->
open
();
Open the underlying INET- or UNIX-domain socket; returns undef on failure. Most users will never need to call this method, since it will be called implicitly by higher-level methods such as requiest(), query(), status() if required.
- close
-
undef
=
$dc
->
close
();
Closes the underlying socket if currently open. Most users will never need to call this method, since it will be called implicitly by higher-level methods such as requiest(), query(), status() if required.
- send
-
undef
=
$dc
->
send
(
@message_strings
);
Sends @message_strings to the underlying socket as a single message.
- sendfh
-
undef
=
$dc
->sendfh(
$fh
,
@message_strings
);
Sends @message_strings to filehandle $fh, prepending total length.
- readSize
-
$size
=
$dc
->readSize();
$size
=
$dc
->readSize(
$fh
)
Reads message size from $fh (default=underlying socket).
- readBytes
-
$data
=
$dc
->readBytes(
$nbytes
);
$data
=
$dc
->readBytes(
$nbytes
,
$fh
)
Reads fixed number of bytes from $fh (default=underlying socket).
- readData
-
$data
=
$dc
->readData();
$data
=
$dc
->readData(
$fh
)
Reads pending data from $fh (default=underlying socket); calls readSize() and readBytes().
Hit Parsing
- parseTableData
- parseTextData
- parseJsonData
-
\
@hits
=
$dc
->parseTableData(
$buf
);
\
@hits
=
$dc
->parseTextData(
$buf
);
\
@hits
=
$dc
->parseJsonData(
$buf
);
Parses raw DDC data buffer in $buf. Returns an array-ref of DDC::Hit objects representing the individual hits.
JSON parsing requires the JSON module.
- parseExpandTermsResponse
-
\
@terms
=
$dc
->parseExpandTermsResponse(
$buf
);
@terms
=
$dc
->parseExpandTermsResponse(
$buf
);
Parses a DDC server
expand_terms
response buffer. Returns an array@terms
of server expansions in list-context; in scalar context returns the reference \@terms to such an array.
AUTHOR
Bryan Jurish <moocow@cpan.org>
COPYRIGHT AND LICENSE
Copyright (C) 2006-2020 by Bryan Jurish
This package is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.24.1 or, at your option, any later version of Perl 5 you may have available.