#!/usr/bin/env perl
randomize_ga4gh();
exit
;
sub
randomize_ga4gh {
my
$VERSION
=
'0.09'
;
my
$format
=
'bff'
;
my
$number
= 100;
my
$output
=
'individuals.json'
;
my
(
$phenotypicFeatures
,
$diseases
,
$treatments
,
$procedures
,
$exposures
,
$ethnicity
)
= (1) x 6;
GetOptions(
'format|f=s'
=> \
$format
,
'number|n=i'
=> \
$number
,
'output|o=s'
=> \
$output
,
'diseases=i'
=> \
$diseases
,
'exposures=i'
=> \
$exposures
,
'phenotypicFeatures=i'
=> \
$phenotypicFeatures
,
'procedures=i'
=> \
$procedures
,
'treatments=i'
=> \
$treatments
,
'max-diseases-pool=i'
=> \
my
$max_diseases_pool
,
'max-ethnicity-pool=i'
=> \
my
$max_ethnicity_pool
,
'max-exposures-pool=i'
=> \
my
$max_exposures_pool
,
'max-phenotypicFeatures-pool=i'
=> \
my
$max_phenotypicFeatures_pool
,
'max-treatments-pool=i'
=> \
my
$max_treatments_pool
,
'max-procedures-pool=i'
=> \
my
$max_procedures_pool
,
'random-seed=i'
=> \
my
$random_seed
,
'external-ontologies=s'
=> \
my
$ext_ontologies
,
'help|?'
=> \
my
$help
,
'man'
=> \
my
$man
,
'debug=i'
=> \
my
$debug
,
'verbose|'
=> \
my
$verbose
,
'version|V'
=>
sub
{
print
"$0 Version $VERSION\n"
;
exit
; }
) or pod2usage(2);
pod2usage(1)
if
$help
;
pod2usage(
-verbose
=> 2,
-exitval
=> 0 )
if
$man
;
my
$randomize
= Randomizer->new(
{
format
=>
$format
,
number
=>
$number
,
output
=>
$output
,
diseases
=>
$diseases
,
ethnicity
=>
$ethnicity
,
exposures
=>
$exposures
,
phenotypicFeatures
=>
$phenotypicFeatures
,
procedures
=>
$procedures
,
treatments
=>
$treatments
,
max_diseases_pool
=>
$max_diseases_pool
,
max_ethnicity_pool
=>
$max_ethnicity_pool
,
max_exposures_pool
=>
$max_exposures_pool
,
max_phenotypicFeatures_pool
=>
$max_phenotypicFeatures_pool
,
max_procedures_pool
=>
$max_procedures_pool
,
max_treatments_pool
=>
$max_treatments_pool
,
random_seed
=>
$random_seed
,
ext_ontologies
=>
$ext_ontologies
,
debug
=>
$debug
,
verbose
=>
$verbose
}
);
$randomize
->run;
}
qw($hpo_array $omim_array $rxnorm_array $ncit_procedures_array $ncit_exposures_array $ethnicity_array)
;
sub
new {
my
(
$class
,
$self
) =
@_
;
bless
$self
,
$class
;
return
$self
;
}
sub
run {
my
$self
=
shift
;
my
$number
=
$self
->{number};
my
$format
=
$self
->{
format
};
my
$output
=
$self
->{output};
my
$random_seed
=
$self
->{random_seed};
my
%func
= (
bff
=> \
&bff_generator
,
pxf
=> \
&pxf_generator
);
srand
(
$random_seed
)
if
defined
$random_seed
;
$self
->{ontologies_data} =
$self
->{ext_ontologies}
? validate_json(
$self
->{ext_ontologies} )
:
undef
;
my
$json_data
;
for
(
my
$i
= 1 ;
$i
<=
$number
;
$i
++ ) {
push
@$json_data
,
$func
{
$format
}->(
$i
,
$self
);
}
write_json( {
filepath
=>
$output
,
data
=>
$json_data
} );
}
sub
write_json {
my
$arg
=
shift
;
my
$file
=
$arg
->{filepath};
my
$json_data
=
$arg
->{data};
my
$json
= JSON::XS->new->utf8->canonical->pretty->encode(
$json_data
);
path(
$file
)->spew_utf8(
$json
);
return
1;
}
sub
pxf_generator {
my
(
$id
,
$self
) =
@_
;
my
$result_hash
= run_functions(
$self
);
my
$pxf
= fake_hash(
{
id
=>
"Phenopacket_"
.
$id
,
subject
=> {
id
=>
"IndividualId_"
.
$id
,
age
=> {
iso8601duration
=>
fake_template(
"P%dY"
, fake_int_mod( 1, 99 ) )
},
sex
=> fake_pick_mod( [
'MALE'
,
'FEMALE'
] )
},
diseases
=>
$result_hash
->{diseases},
phenotypicFeatures
=>
$result_hash
->{phenotypicFeatures},
medicalActions
=> merge_medical_actions(
$result_hash
)
}
);
return
$pxf
->();
}
sub
merge_medical_actions {
my
$hash
=
shift
;
my
@processed_treatments
;
my
@processed_procedures
;
if
(
defined
$hash
->{treatments} ) {
@processed_treatments
=
map
{ {
treatment
=>
$_
} } @{
$hash
->{treatments} };
}
if
(
defined
$hash
->{procedures} ) {
@processed_procedures
=
map
{ {
procedure
=>
$_
} } @{
$hash
->{procedures} };
}
return
[
@processed_treatments
,
@processed_procedures
];
}
sub
bff_generator {
my
(
$id
,
$self
) =
@_
;
my
$default_array
= [];
my
$result_hash
= run_functions(
$self
);
my
$bff
= fake_hash(
{
id
=>
"Beacon_"
.
$id
,
ethnicity
=>
$result_hash
->{ethnicity},
sex
=> fake_pick_mod(
[
{
id
=>
"NCIT:C20197"
,
label
=>
"Male"
},
{
id
=>
"NCIT:C16576"
,
label
=>
"Female"
}
]
),
diseases
=>
$result_hash
->{diseases} //
$default_array
,
phenotypicFeatures
=>
$result_hash
->{phenotypicFeatures}
//
$default_array
,
treatments
=>
$result_hash
->{treatments} //
$default_array
,
interventionsOrProcedures
=>
$result_hash
->{procedures}
//
$default_array
,
exposures
=>
$result_hash
->{exposures} //
$default_array
}
);
return
$bff
->();
}
sub
create_entries {
my
(
$params
,
$ontologies_array
,
$n
,
$max
) =
@_
;
my
$shuffled_slice
= shuffle_slice(
$max
,
$ontologies_array
);
my
$array
;
for
(
my
$i
= 0 ;
$i
<
$n
;
$i
++ ) {
push
@$array
,
$params
->{entry_creator}->(
$shuffled_slice
->[
$i
],
$params
);
}
return
$array
;
}
sub
common_entry_creator {
my
(
$element
,
$params
) =
@_
;
return
{
$params
->{type} =>
$element
,
$params
->{onset} => {
age
=> {
iso8601duration
=>
fake_template(
"P%dY"
, fake_int_mod( 1, 99 ) )
}
}
};
}
sub
phenotypicFeatures {
my
(
$format
,
$ontologies_array
,
$n
,
$max
) =
@_
;
my
$params
= {
type
=>
$format
eq
'bff'
?
'featureType'
:
'type'
,
onset
=>
$format
eq
'bff'
?
'ageOfOnset'
:
'onset'
,
entry_creator
=> \
&common_entry_creator
};
return
create_entries(
$params
,
$ontologies_array
,
$n
,
$max
);
}
sub
diseases {
my
(
$format
,
$ontologies_array
,
$n
,
$max
) =
@_
;
my
$params
= {
type
=>
$format
eq
'bff'
?
'diseaseCode'
:
'term'
,
onset
=>
$format
eq
'bff'
?
'ageOfOnset'
:
'onset'
,
entry_creator
=> \
&common_entry_creator
};
return
create_entries(
$params
,
$ontologies_array
,
$n
,
$max
);
}
sub
treatments {
my
(
$format
,
$ontologies_array
,
$n
,
$max
) =
@_
;
my
$params
= {
entry_creator
=>
sub
{
my
(
$element
,
$p
) =
@_
;
return
$format
eq
'bff'
? {
treatmentCode
=>
$element
}
: {
agent
=>
$element
};
}
};
return
create_entries(
$params
,
$ontologies_array
,
$n
,
$max
);
}
sub
procedures {
my
(
$format
,
$ontologies_array
,
$n
,
$max
) =
@_
;
my
$params
= {
type
=>
$format
eq
'bff'
?
'procedureCode'
:
'term'
,
onset
=>
$format
eq
'bff'
?
'ageAtProcedure'
:
'onset'
,
entry_creator
=> \
&common_entry_creator
};
return
create_entries(
$params
,
$ontologies_array
,
$n
,
$max
);
}
sub
exposures {
my
(
$format
,
$ontologies_array
,
$n
,
$max
) =
@_
;
my
$default_duration
=
'P999Y'
;
my
$default_ontology_term
=
{
id
=>
'NCIT:C126101'
,
label
=>
'Not Available'
};
my
$params
= {
type
=>
$format
eq
'bff'
?
'exposureCode'
:
'term'
,
onset
=>
$format
eq
'bff'
?
'ageAtExposure'
:
'onset'
,
entry_creator
=> \
&common_entry_creator
};
my
$entries
= create_entries(
$params
,
$ontologies_array
,
$n
,
$max
);
foreach
my
$entry
(
@$entries
) {
$entry
->{duration} =
$default_duration
;
$entry
->{unit} =
$default_ontology_term
;
}
return
$entries
;
}
sub
ethnicity {
my
(
undef
,
$ontologies_array
,
undef
,
$max
) =
@_
;
my
$shuffled_slice
= shuffle_slice(
$max
,
$ontologies_array
);
return
$shuffled_slice
->[0];
}
sub
load_ontology_hash {
my
$self
=
shift
;
my
%ont
= (
diseases
=>
$omim_array
,
ethnicity
=>
$ethnicity_array
,
exposures
=>
$ncit_exposures_array
,
phenotypicFeatures
=>
$hpo_array
,
procedures
=>
$ncit_procedures_array
,
treatments
=>
$rxnorm_array
);
return
\
%ont
;
}
sub
run_functions {
my
$self
=
shift
;
my
$ontologies
= load_ontology_hash(
$self
);
my
%func
= (
diseases
=> \
&diseases
,
ethnicity
=> \
ðnicity
,
exposures
=> \
&exposures
,
phenotypicFeatures
=> \
&phenotypicFeatures
,
procedures
=> \
&procedures
,
treatments
=> \
&treatments
);
my
%hash
;
for
my
$key
(
sort
keys
%func
) {
my
$ontologies_array
=
exists
$self
->{ontologies_data}{
$key
}
?
$self
->{ontologies_data}{
$key
}
:
$ontologies
->{
$key
};
$hash
{
$key
} =
$func
{
$key
}->(
$self
->{
format
},
$ontologies_array
,
$self
->{
$key
},
$self
->{
'max_'
.
$key
.
'_pool'
}
);
}
return
\
%hash
;
}
sub
shuffle_slice {
my
(
$max
,
$array
) =
@_
;
my
@slice
=
defined
$max
? head
$max
,
@$array
:
@$array
;
my
@shuffled_slice
= shuffle
@slice
;
return
wantarray
?
@shuffled_slice
: \
@shuffled_slice
;
}
sub
fake_int_mod {
my
(
$low
,
$high
) =
@_
;
my
$range
=
$high
-
$low
;
return
int
(
rand
(
$range
) ) + 1;
}
sub
fake_pick_mod {
my
$array
=
shift
;
return
$array
->[
int
(
rand
(
@$array
) ) ];
}
sub
validate_json {
my
$file
=
shift
;
my
$data
= read_yaml(
$file
);
my
$schema
= {
type
=>
"object"
,
properties
=> {
diseases
=> {
'$ref'
=>
'#/$defs/array'
},
phenotypicFeatures
=> {
'$ref'
=>
'#/$defs/array'
},
treatments
=> {
'$ref'
=>
'#/$defs/array'
},
procedures
=> {
'$ref'
=>
'#/$defs/array'
},
exposures
=> {
'$ref'
=>
'#/$defs/array'
},
ethnicity
=> {
'$ref'
=>
'#/$defs/array'
}
},
'$defs'
=> {
array
=> {
type
=>
"array"
,
items
=> {
'$ref'
=>
'#/$defs/item'
}
},
item
=> {
type
=>
"object"
,
required
=> [
"id"
,
"label"
],
properties
=> {
id
=> {
type
=>
"string"
,
pattern
=>
qq/^\\w[^:]+:.+\$/
},
label
=> {
type
=>
"string"
}
}
}
}
};
my
$jv
= JSON::Validator->new;
$jv
->schema(
$schema
);
my
@errors
=
$jv
->validate(
$data
);
say_errors( \
@errors
) and
die
if
@errors
;
return
$data
;
}
sub
say_errors {
my
$errors
=
shift
;
if
( @{
$errors
} ) {
say
join
"\n"
, @{
$errors
};
}
return
1;
}
sub
read_yaml {
YAML::XS->
import
(
'LoadFile'
);
return
LoadFile(
shift
);
}
1;
Hide Show 100 lines of Pod