use
Fcntl
qw(SEEK_SET S_IWUSR S_IWGRP S_IWOTH)
;
use
constant
WRITE_BITS
=> S_IWUSR|S_IWGRP|S_IWOTH;
our
$VERSION
= v0.02;
my
@_xattr_hashes
=
qw(sha-1-160 sha-2-256 sha-3-512)
;
my
%_valid_properties
=
map
{
$_
=> 1}
qw(size inode mediasubtype contentise)
;
my
%_exts
= (
'image/jpeg'
=>
'jpg'
,
'image/png'
=>
'png'
,
'image/gif'
=>
'gif'
,
'image/vnd.microsoft.icon'
=>
'ico'
,
'audio/flac'
=>
'flac'
,
'application/pdf'
=>
'pdf'
,
'application/zip'
=>
'zip'
,
'text/plain'
=>
'txt'
,
);
my
%_db_tags
= (
final_file_size
=> Data::Identifier->new(
uuid
=>
'1cd4a6c6-0d7c-48d1-81e7-4e8d41fdb45d'
),
final_file_encoding
=> Data::Identifier->new(
uuid
=>
'448c50a8-c847-4bc7-856e-0db5fea8f23b'
),
final_file_hash
=> Data::Identifier->new(
uuid
=>
'79385945-0963-44aa-880a-bca4a42e9002'
),
also_has_role
=> Data::Identifier->new(
uuid
=>
'd2750351-aed7-4ade-aa80-c32436cc6030'
),
also_has_state
=> Data::Identifier->new(
uuid
=>
'4c426c3c-900e-4350-8443-e2149869fbc9'
),
has_final_state
=> Data::Identifier->new(
uuid
=>
'54d30193-2000-4d8a-8c28-3fa5af4cad6b'
),
specific_proto_file_state
=> Data::Identifier->new(
uuid
=>
'63da70a8-78a4-51b0-8b87-86872b474a5d'
),
);
sub
dbname {
my
(
$self
) =
@_
;
return
$self
->{dbname} //=
do
{
my
$sth
=
$self
->_prepare(
'SELECT filename FROM file WHERE id = ?'
);
my
$res
;
$sth
->execute(
$self
->{dbid});
$res
=
$sth
->fetchall_arrayref;
$res
->[0][0] // croak
'Database error'
;
};
}
sub
filename {
my
(
$self
) =
@_
;
return
$self
->{filename} //=
do
{
$self
->store->_file(
qw(v2 store)
,
$self
->dbname);
};
}
sub
contentise {
my
(
$self
,
%opts
) =
@_
;
my
$as
=
delete
(
$opts
{as}) //
'uuid'
;
croak
'Stray options passed'
if
scalar
keys
%opts
;
$self
->{contentise} //=
eval
{Data::Identifier->new(
ise
=>
$self
->get(
properties
=>
'contentise'
))};
$self
->{contentise} //=
$self
->_calculate_contentise;
croak
'No contentise known for this file'
unless
defined
$self
->{contentise};
return
$self
->{contentise}->as(
$as
,
db
=>
$self
->db(
default
=>
undef
),
extractor
=>
$self
->extractor(
default
=>
undef
),
);
}
sub
ise {
my
(
$self
,
@args
) =
@_
;
return
$self
->contentise(
@args
);
}
sub
open
{
my
(
$self
) =
@_
;
my
$fh
=
$self
->_open;
$self
->
stat
;
$self
->_detach_fh;
return
$fh
;
}
sub
link_out {
my
(
$self
,
$filename
) =
@_
;
link
(
$self
->filename,
$filename
) or croak $!;
}
sub
symlink_out {
my
(
$self
,
$filename
) =
@_
;
symlink
(
$self
->filename,
$filename
) or croak $!;
}
sub
update {
my
(
$self
,
%opts
) =
@_
;
my
File::FStore
$store
=
$self
->store;
my
$no_digests
=
delete
(
$opts
{no_digests});
my
$inode
;
my
$properties
;
my
$digests
;
croak
'Stray options passed'
if
scalar
keys
%opts
;
$store
->in_transaction(
rw
=>
sub
{
my
$fh
=
$self
->_open;
delete
$self
->{
stat
};
$inode
=
$self
->_fii_inode;
unless
(
$no_digests
) {
my
$verify_result
=
$inode
->verify;
unless
(
$verify_result
->has_passed ||
$verify_result
->has_no_data ||
$verify_result
->has_insufficient_data) {
croak
sprintf
(
'File (%s) is in bad state: %s'
,
$self
->dbname,
$verify_result
->status);
}
}
{
my
$data
=
$self
->get;
my
(
$dev
,
$ino
,
$mode
,
$nlink
,
$uid
,
$gid
,
$rdev
,
$size
,
$atime
,
$mtime
,
$ctime
,
$blksize
,
$blocks
) =
$self
->
stat
;
$properties
=
$data
->{properties} //= {};
$digests
=
$data
->{digests} //= {};
$properties
->{size} //=
$size
;
croak
'Size missmatch'
if
$properties
->{size} !=
$size
;
$properties
->{inode} //=
$ino
;
croak
'inode missmatch'
if
$properties
->{inode} !=
$ino
;
foreach
my
$lifecycle
(
qw(final current)
) {
if
(
defined
(
my
$v
=
$inode
->get(
'size'
,
lifecycle
=>
$lifecycle
,
default
=>
undef
))) {
$properties
->{size} //=
$v
;
croak
'Size missmatch'
if
$properties
->{size} !=
$v
;
}
if
(
defined
(
my
$v
=
$inode
->get(
'mediatype'
,
lifecycle
=>
$lifecycle
,
default
=>
undef
,
as
=>
'mediatype'
))) {
$properties
->{mediasubtype} //=
$v
;
croak
sprintf
(
'Media subtype missmatch on (%s): "%s" vs. "%s"'
,
$self
->dbname,
$properties
->{mediasubtype},
$v
)
if
$properties
->{mediasubtype} ne
$v
;
}
if
(
defined
(
my
$v
=
$inode
->get(
'st_ino'
,
lifecycle
=>
$lifecycle
,
default
=>
undef
))) {
$properties
->{inode} //=
$v
;
croak
'inode missmatch'
if
$properties
->{inode} !=
$v
;
}
}
foreach
my
$digest
(@{
$self
->_used_digests}) {
if
(
defined
(
my
$v
=
$inode
->digest(
$digest
,
lifecycle
=>
'final'
,
default
=>
undef
))) {
$digests
->{
$digest
} //=
$v
;
croak
'Digest missmatch for '
.
$digest
if
$digests
->{
$digest
} ne
$v
;
}
}
unless
(
$no_digests
) {
foreach
my
$digest
(@{
$self
->_used_digests}) {
if
(
defined
(
my
$v
=
$inode
->digest(
$digest
,
default
=>
undef
))) {
$digests
->{
$digest
} //=
$v
;
croak
'Digest missmatch for '
.
$digest
if
$digests
->{
$digest
} ne
$v
;
}
}
}
if
(
defined
(
my
$contentise
=
eval
{
$self
->_calculate_contentise(
$data
)->uuid})) {
$properties
->{contentise} //=
$contentise
;
croak
'Content ISE missmatch'
if
$properties
->{contentise} ne
$contentise
;
}
$self
->set(
$data
);
}
{
my
$dbname
=
$self
->dbname;
my
$filename
= File::Spec->catfile(
'..'
,
'..'
,
'store'
,
$dbname
);
foreach
my
$fn
(
$self
->_linknames(
$digests
)) {
next
if
-l
$fn
;
symlink
(
$filename
,
$fn
) or croak $!;
}
}
if
(
defined
(
my
$handle
= File::FStore::File::_DUMMY_FOR_XATTR->new(
$fh
))) {
if
(
defined
(
$properties
->{size}) && !
defined
(
$handle
->getfattr(
'utag.final.file.size'
))) {
$handle
->setfattr(
'utag.final.file.size'
=>
$properties
->{size});
}
if
(
defined
(
$properties
->{mediasubtype}) && !
defined
(
$handle
->getfattr(
'mime_type'
))) {
$handle
->setfattr(
'mime_type'
=>
$properties
->{mediasubtype});
}
if
(
defined
(
$properties
->{mediasubtype}) && !
defined
(
$handle
->getfattr(
'utag.final.file.encoding'
))) {
my
$v
= Data::Identifier::Generate->generic(
namespace
=>
'50d7c533-2d9b-4208-b560-bcbbf75ce3f9'
,
input
=>
$properties
->{mediasubtype},
)->uuid;
$handle
->setfattr(
'utag.final.file.encoding'
=>
$v
);
}
if
(!
defined
(
$handle
->getfattr(
'utag.write-mode'
))) {
$handle
->setfattr(
'utag.write-mode'
=>
'7b177183-083c-4387-abd3-8793eb647373'
);
}
if
(!
defined
(
$handle
->getfattr(
'utag.final-mode'
))) {
$handle
->setfattr(
'utag.final-mode'
=>
'f418cdb9-64a7-4f15-9a18-63f7755c5b47'
);
}
if
(
defined
(
my
$size
=
$properties
->{size}) && !
defined
(
$handle
->getfattr(
'utag.final.file.hash'
))) {
my
@el
;
my
$v
=
''
;
foreach
my
$algo
(
@_xattr_hashes
) {
push
(
@el
,
sprintf
(
' %s bytes 0-%u/%u %s'
,
$algo
,
$size
- 1,
$size
,
$digests
->{
$algo
} //
next
));
}
for
(
my
$i
= 0;
$i
<
scalar
(
@el
);
$i
++) {
$v
.=
' '
if
$i
;
$v
.=
$i
==
$#el
?
'v0'
:
'v0m'
;
$v
.=
$el
[
$i
];
}
$handle
->setfattr(
'utag.final.file.hash'
=>
$v
)
if
length
$v
;
}
}
{
my
(
$dev
,
$ino
,
$mode
,
$nlink
,
$uid
,
$gid
,
$rdev
,
$size
,
$atime
,
$mtime
,
$ctime
,
$blksize
,
$blocks
) =
$self
->
stat
;
if
(
defined
(
$mode
) && (
$mode
& WRITE_BITS)) {
my
$n
=
$mode
& (07777 ^ WRITE_BITS);
eval
{
chmod
(
$n
,
$fh
) };
}
}
});
}
sub
stat
{
my
(
$self
) =
@_
;
$self
->{
stat
} //=
do
{
my
@s
;
if
(
defined
$self
->{fh}) {
@s
=
stat
(
$self
->{fh});
}
else
{
@s
=
stat
(
$self
->filename);
}
croak
'File missing on filesystem, store is corruped: '
.
$self
->dbname
unless
scalar
(
@s
);
\
@s
;
};
return
@{
$self
->{
stat
}};
}
sub
get {
my
(
$self
,
$domain
,
$key
) =
@_
;
if
(
defined
(
$domain
)) {
if
(
defined
(
$key
)) {
my
$sth
;
my
$res
;
if
(
$domain
eq
'properties'
) {
$sth
=
$self
->_prepare(
'SELECT value FROM file_properties WHERE file = ? AND key = ?'
);
}
elsif
(
$domain
eq
'digests'
) {
$sth
=
$self
->_prepare(
'SELECT hash FROM file_hash WHERE file = ? AND algo = ?'
);
}
croak
'Invalid domain: '
.
$domain
unless
defined
$sth
;
$sth
->execute(
$self
->{dbid},
$key
);
$res
=
$sth
->fetchall_arrayref;
return
$res
->[0][0] // croak
'No value for domain '
.
$domain
.
' key '
.
$key
;
}
else
{
my
$sth
;
my
%res
;
if
(
$domain
eq
'properties'
) {
$sth
=
$self
->_prepare(
'SELECT key,value FROM file_properties WHERE file = ?'
);
}
elsif
(
$domain
eq
'digests'
) {
$sth
=
$self
->_prepare(
'SELECT algo,hash FROM file_hash WHERE file = ?'
);
}
croak
'Invalid domain: '
.
$domain
unless
defined
$sth
;
$sth
->execute(
$self
->{dbid});
while
(
my
$row
=
$sth
->fetchrow_arrayref) {
$res
{
$row
->[0]} =
$row
->[1];
}
return
\
%res
;
}
}
else
{
return
{
map
{
$_
=>
$self
->get(
$_
)}
qw(properties digests)
};
}
}
sub
set {
my
(
$self
,
$domain
,
$key
,
$value
) =
@_
;
my
$dbid
=
$self
->{dbid};
my
$data
;
croak
'No data given'
unless
defined
$domain
;
if
(
defined
(
$key
)) {
if
(
defined
(
$value
)) {
$data
= {
$domain
=> {
$key
=>
$value
}};
}
else
{
$data
= {
$domain
=>
$key
};
}
}
else
{
$data
=
$domain
;
}
$self
->store->in_transaction(
rw
=>
sub
{
foreach
my
$cdomain
(
keys
%{
$data
}) {
my
$d
=
$data
->{
$cdomain
};
my
$sth
;
my
$valids
;
if
(
$cdomain
eq
'properties'
) {
$sth
=
$self
->_prepare(
'INSERT INTO file_properties (file,key,value) SELECT ?, ?, ? WHERE NOT EXISTS (SELECT TRUE FROM file_properties WHERE file = ? AND key = ? AND value = ?)'
);
$valids
= \
%_valid_properties
;
}
elsif
(
$cdomain
eq
'digests'
) {
$sth
=
$self
->_prepare(
'INSERT INTO file_hash (file,algo,hash) SELECT ?, ?, ? WHERE NOT EXISTS (SELECT TRUE FROM file_hash WHERE file = ? AND algo = ? AND hash = ?)'
);
$valids
= \
%File::FStore::_valid_digests
;
}
croak
'Invalid domain: '
.
$domain
unless
defined
$sth
;
foreach
my
$key
(
keys
%{
$d
}) {
croak
'Invalid key '
.
$key
.
' for domain '
.
$cdomain
unless
defined
$valids
->{
$key
};
$sth
->execute(
$dbid
,
$key
,
$d
->{
$key
},
$dbid
,
$key
,
$d
->{
$key
});
}
}
});
}
sub
delete
{
my
(
$self
) =
@_
;
my
$store
=
$self
->store;
my
$dbid
=
$self
->{dbid} or croak
'Call on invalid object'
;
$store
->in_transaction(
rw
=>
sub
{
my
$filename
=
$self
->filename;
my
@linknames
=
$self
->_linknames;
my
$sth
;
unlink
(
$filename
) or croak
'Cannot unlink file: '
.$!;
unlink
(
$_
)
foreach
@linknames
;
$sth
=
$self
->_prepare(
'DELETE FROM file_hash WHERE file = ?'
);
$sth
->execute(
$dbid
);
$sth
=
$self
->_prepare(
'DELETE FROM file_properties WHERE file = ?'
);
$sth
->execute(
$dbid
);
$sth
=
$self
->_prepare(
'DELETE FROM file WHERE id = ?'
);
$sth
->execute(
$dbid
);
});
%{
$self
} = ();
}
sub
sync_with_db {
my
(
$self
,
%opts
) =
@_
;
my
$db
=
$opts
{db} //
$self
->db;
my
$fii_inode
=
$self
->_fii_inode;
$db
->in_transaction(
rw
=>
sub
{
my
$data
=
$self
->get;
my
%ids
= (
contentise
=>
$self
->contentise(
as
=>
'Data::Identifier'
),
inodeise
=>
$fii_inode
->get(
'inodeise'
,
as
=>
'Data::Identifier'
,
default
=>
undef
),
proto
=> (
defined
(
$opts
{proto}) ?
$opts
{proto}->Data::Identifier::as(
'Data::Identifier'
) :
undef
),
encoding
=> (
defined
(
$data
->{properties}{mediasubtype}) ? Data::Identifier::Generate->generic(
namespace
=>
'50d7c533-2d9b-4208-b560-bcbbf75ce3f9'
,
input
=>
$data
->{properties}{mediasubtype},
):
undef
),
%_db_tags
,
);
my
%tags
=
map
{
$_
=>
scalar
(
eval
{
$ids
{
$_
}->as(
'Data::TagDB::Tag'
,
%opts
{autocreate},
db
=>
$db
)})}
grep
{
defined
$ids
{
$_
}}
keys
%ids
;
if
(
defined
(
my
$tag
=
$tags
{contentise})) {
my
$size
=
$data
->{properties}{size};
if
(
defined
(
$tags
{also_has_role}) &&
defined
(
$tags
{specific_proto_file_state})) {
$db
->create_relation(
tag
=>
$tag
,
relation
=>
$tags
{also_has_role},
related
=>
$tags
{specific_proto_file_state});
}
if
(
defined
(
$size
) &&
defined
(
$tags
{final_file_size})) {
$db
->create_metadata(
tag
=>
$tag
,
relation
=>
$tags
{final_file_size},
data_raw
=>
$size
);
}
if
(
defined
(
$tags
{encoding}) &&
defined
(
$tags
{final_file_encoding})) {
$db
->create_relation(
tag
=>
$tag
,
relation
=>
$tags
{final_file_encoding},
related
=>
$tags
{encoding});
}
if
(
defined
(
$tags
{final_file_hash}) &&
defined
(
$size
) &&
$size
> 0) {
foreach
my
$digest
(
keys
%{
$data
->{digests}}) {
my
$v
=
sprintf
(
'v0 %s bytes 0-%u/%u %s'
,
$digest
,
$size
- 1,
$size
,
$data
->{digests}{
$digest
} //
next
);
$db
->create_metadata(
tag
=>
$tag
,
relation
=>
$tags
{final_file_hash},
data_raw
=>
$v
);
}
}
}
if
(
defined
(
my
$proto
=
$tags
{proto})) {
if
(
defined
(
my
$tag
=
$tags
{contentise})) {
if
(
defined
(
my
$relation
=
$opts
{final_of_proto} ?
$tags
{has_final_state} :
$tags
{also_has_state})) {
$db
->create_relation(
tag
=>
$proto
,
relation
=>
$relation
,
related
=>
$tag
);
}
}
}
if
(
defined
(
my
$inode
=
$tags
{inodeise})) {
if
(
defined
(
my
$tag
=
$tags
{contentise})) {
if
(
defined
(
$tags
{has_final_state})) {
$db
->create_relation(
tag
=>
$inode
,
relation
=>
$tags
{has_final_state},
related
=>
$tag
);
}
}
}
});
}
sub
store {
my
(
$self
) =
@_
;
return
$self
->{store};
}
sub
db {
my
(
$self
,
%opts
) =
@_
;
return
$self
->{db} //=
$self
->store->db(
%opts
);
}
sub
extractor {
my
(
$self
,
%opts
) =
@_
;
return
$self
->{extractor}
if
defined
$self
->{extractor};
return
$opts
{
default
}
if
exists
$opts
{
default
};
croak
'No extractor known'
;
}
sub
fii {
my
(
$self
) =
@_
;
return
$self
->{fii} //=
$self
->store->fii;
}
sub
_new {
my
(
$pkg
,
%opts
) =
@_
;
my
$self
=
bless
\
%opts
,
$pkg
;
croak
'No store is given'
unless
defined
$self
->{store};
croak
'No dbid is given'
unless
defined
$self
->{dbid};
return
$self
;
}
sub
_prepare {
my
(
$self
,
$q
) =
@_
;
my
$dbh
=
$self
->{dbh} //=
$self
->store->{dbh};
return
$dbh
->prepare(
$q
);
}
sub
_open {
my
(
$self
,
%opts
) =
@_
;
my
$fh
;
$self
->{fh} =
$opts
{fh}
if
defined
$opts
{fh};
$fh
=
$self
->{fh} //=
do
{
CORE::
open
(
my
$in
,
'<'
,
$self
->filename) or croak $!;
$in
;
};
seek
(
$fh
, 0, SEEK_SET) or croak $!;
return
$fh
;
}
sub
_detach_fh {
my
(
$self
) =
@_
;
$self
->{fh} =
undef
;
}
sub
_fii_inode {
my
(
$self
) =
@_
;
return
$self
->{fii_inode} //=
$self
->fii->for_handle(
$self
->_open);
}
sub
_used_digests {
my
(
$self
) =
@_
;
return
$self
->{used_digests} //=
$self
->store->_used_digests;
}
sub
_calculate_contentise {
my
(
$self
,
$data
) =
@_
;
my
$sha_1_160
=
eval
{
$self
->get(
digests
=>
'sha-1-160'
) };
my
$sha_3_512
=
eval
{
$self
->get(
digests
=>
'sha-3-512'
) };
my
$size
=
eval
{
$self
->get(
properties
=>
'size'
) };
if
(
defined
$data
) {
$data
->{digests} //= {};
$data
->{properties} //= {};
$sha_1_160
//=
$data
->{digests}{
'sha-1-160'
};
$sha_3_512
//=
$data
->{digests}{
'sha-3-512'
};
$size
//=
$data
->{properties}{size};
}
if
(
defined
(
$sha_1_160
) &&
defined
(
$sha_3_512
) &&
defined
(
$size
)) {
my
$digest
=
sprintf
(
'v0m sha-1-160 bytes 0-%u/%u %s v0 sha-3-512 bytes 0-%u/%u %s'
,
$size
- 1,
$size
,
$sha_1_160
,
$size
- 1,
$size
,
$sha_3_512
,
);
return
Data::Identifier::Generate->generic(
namespace
=>
'66d488c0-3b19-4e6c-856f-79edf2484f37'
,
input
=>
$digest
,
);
}
return
undef
;
}
sub
_ext {
my
(
$self
) =
@_
;
return
$self
->{ext}
if
exists
$self
->{ext};
{
my
$mediasubtype
=
eval
{
$self
->get(
properties
=>
'mediasubtype'
)} //
'x.x/x.x'
;
return
$self
->{ext} =
$_exts
{
$mediasubtype
}
if
defined
$_exts
{
$mediasubtype
};
}
{
my
$dbname
=
$self
->dbname;
if
(
$dbname
=~ /\.([a-z0-9]{1,4})$/) {
my
$ext
= $1;
if
(
$dbname
=~ /\.(tar\.(?:gz|bz2|xz|lz|zst))$/) {
$ext
= $1;
}
return
$self
->{ext} =
$ext
;
}
}
return
$self
->{ext} =
undef
;
}
sub
_linknames {
my
(
$self
,
$digests
) =
@_
;
my
File::FStore
$store
=
$self
->store;
my
$ext
=
$self
->_ext;
my
@res
;
$ext
=
'.'
.
$ext
if
defined
$ext
;
$digests
//=
$self
->get(
'digests'
);
foreach
my
$digest
(
keys
%{
$digests
}) {
my
$v
=
$digests
->{
$digest
} //
next
;
my
$fn
;
$v
.=
$ext
if
defined
$ext
;
$fn
=
$store
->_file(
v2
=>
by
=>
$digest
=>
$v
);
push
(
@res
,
$fn
);
}
return
@res
;
}
sub
new {
my
(
$pkg
,
$fh
) =
@_
;
return
undef
unless
eval
{
require
File::ExtAttr ; File::ExtAttr->
import
; 1;};
return
bless
\
$fh
;
}
sub
isa {
my
(
$self
,
$pkg
) =
@_
;
return
1
if
$pkg
eq
'IO::Handle'
;
return
$self
->SUPER::isa(
$pkg
);
}
sub
fileno
{
my
(
$self
) =
@_
;
return
${
$self
}->
fileno
;
}
sub
getfattr {
my
(
$self
,
$key
) =
@_
;
return
eval
{
$self
->File::ExtAttr::getfattr(
$key
, {
namespace
=>
'user'
}) };
}
sub
setfattr {
my
(
$self
,
$key
,
$value
) =
@_
;
return
eval
{
$self
->File::ExtAttr::setfattr(
$key
=>
$value
, {
namespace
=>
'user'
}) };
}
}
1;
Hide Show 359 lines of Pod