# Copyright (c) 2024 Yuki Kimoto # MIT License class R::DataFrame { version_from R; use StringList; use List; use R::NDArray; use R::DataFrame::Column; # Fields has colobjs_list : List of R::DataFrame::Column; has colobjs_indexes_h : Hash of Int; # Class Methods static method new : R::DataFrame () { my $self = new R::DataFrame; my $colobjs_list = List->new(new R::DataFrame::Column[0]); $self->{colobjs_list} = $colobjs_list; my $colobjs_indexes_h = Hash->new; $self->{colobjs_indexes_h} = $colobjs_indexes_h; return $self; } # Instance Methods method colnames : string[] () { my $ncol = $self->ncol; my $colnames = new string[$ncol]; for (my $i = 0; $i < $ncol; $i++) { $colnames->[$i] = $self->colname($i); } return $colnames; } method exists_col : int ($colname : string) { my $colobjs_indexes_h = $self->{colobjs_indexes_h}; my $exists = $colobjs_indexes_h->exists($colname); return $exists; } method colname : string ($col : int) { my $colobjs_list = $self->{colobjs_list}; unless ($col >= 0 && $col < $colobjs_list->length) { die "The column at index $col dose not exist."; } my $colobj = (R::DataFrame::Column)$colobjs_list->get($col); my $colname = $colobj->{name}; return $colname; } method colindex : int ($colname : string) { my $ncol = $self->ncol; my $match = 0; for (my $col = 0; $col < $ncol; $col++) { my $colname_cmp = $self->colname($col); if ($colname eq $colname_cmp) { return $col; } } die "The column named \"$colname\" does not exists."; } method col_by_index : R::NDArray ($col : int) { my $colobjs_list = $self->{colobjs_list}; unless ($col >= 0 && $col < $colobjs_list->length) { die "The column at index $col does not exists."; } my $colobj = (R::DataFrame::Column)$colobjs_list->get($col); my $ndarray = $colobj->{ndarray}; return $ndarray; } method first_col : R::NDArray () { return $self->col_by_index(0); } method col : R::NDArray ($colname : string) { unless ($self->exists_col($colname)) { die "The column named \"$colname\" dose not exist."; } my $col = $self->colindex($colname); my $ndarray = $self->col_by_index($col); return $ndarray; } method set_col : void ($colname : string, $ndarray : R::NDArray) { my $exists_col = $self->exists_col($colname); if ($exists_col) { my $col = $self->colindex($colname); my $colobjs_list = $self->{colobjs_list}; my $colobj = (R::DataFrame::Column)$colobjs_list->get($col); $colobj->{ndarray} = $ndarray; $ndarray->make_dim_read_only; } else { $self->insert_col($colname, $ndarray); } } method insert_col : void ($colname : string, $ndarray : R::NDArray, $before_colname : string = undef) { unless (length $colname) { die "The column name \$colname must be a non-empty string."; } unless ($ndarray) { die "The n-dimensional array \$ndarray must be defined."; } my $exists_col = $self->exists_col($colname); if ($exists_col) { die "The column named \"$colname\" already exists."; } if ($self->ncol > 0) { unless (R::OP->equals_dim($ndarray, $self->first_col)) { die "The dimensions of the n-dimensional array \$ndarray must be equal to the dimensions of the n-dimensional array of the first column of this data frame."; } } my $colobj = new R::DataFrame::Column; $colobj->{name} = $colname; $colobj->{ndarray} = $ndarray; $ndarray->make_dim_read_only; my $col = -1; if ($before_colname) { $col = $self->colindex($before_colname); } else { $col = $self->ncol; } my $colobjs_list = $self->{colobjs_list}; $colobjs_list->insert($col, $colobj); $self->update_colobjs_indexes_h($col); } method ncol : int () { my $colobjs_list = $self->{colobjs_list}; my $ncol = $colobjs_list->length; return $ncol; } method nrow : int () { my $ncol = $self->ncol; my $nrow = 0; if ($ncol > 0) { unless ($self->first_col->is_vector) { die "The n-dimensional array of the first column of this data frame must be a vector."; } $nrow = $self->first_col->length; } return $nrow; } method remove_col : void ($colname : string) { my $col = $self->colindex($colname); my $colobjs_list = $self->{colobjs_list}; $colobjs_list->remove($col); my $colobjs_indexes_h = $self->{colobjs_indexes_h}; $colobjs_indexes_h->delete($colname); $self->update_colobjs_indexes_h($col); } method clone : R::DataFrame ($shallow : int = 0) { my $ncol = $self->ncol; my $colnames = $self->colnames; my $ret_data_frame = R::DataFrame->new; for my $colname (@$colnames) { my $ndarray = $self->col($colname); my $ret_ndarray = $ndarray->clone($shallow); $ret_data_frame->insert_col($colname, $ret_ndarray); } return $ret_data_frame; } method to_string : string () { my $ncol = $self->ncol; my $string_buffer = StringBuffer->new; if ($ncol == 0) { $string_buffer->push("Empty"); } else { my $first_ndarray = $self->first_col; my $first_dim = $first_ndarray->dim; my $first_dim_length = @$first_dim; if ($first_dim_length <= 1) { for (my $col = 0; $col < $ncol; $col++) { my $colname = $self->colname($col); $string_buffer->push("$colname"); unless ($col == $ncol - 1) { $string_buffer->push("\t"); } } $string_buffer->push("\n"); my $nrow = $self->nrow; for (my $row = 0; $row < $nrow; $row++) { for (my $col = 0; $col < $ncol; $col++) { my $ndarray = $self->col_by_index($col); my $elem_string = $ndarray->elem_to_string($ndarray->data, $row); $string_buffer->push($elem_string); unless ($col == $ncol - 1) { $string_buffer->push("\t"); } } $string_buffer->push("\n"); } } else { for (my $col = 0; $col < $ncol; $col++) { my $colname = $self->colname($col); my $ndarray = $self->col_by_index($col); my $dim = $ndarray->dim; my $dim_length = @$dim; $string_buffer->push("$colname\n"); my $ndarray_string = $ndarray->to_string; $string_buffer->push($ndarray_string); $string_buffer->push("\n\n"); } } } my $string = $string_buffer->to_string; return $string; } method slice : R::DataFrame ($colnames : string[], $axis_indexes_product : R::NDArray::Int[]) { my $ncol = $self->ncol; my $colobjs_indexes_h = $self->{colobjs_indexes_h}; unless ($colnames) { $colnames = $self->colnames; } my $ret_data_frame = R::DataFrame->new; for my $colname (@$colnames) { my $ndarray = $self->col($colname); my $ret_ndarray = $ndarray->slice($axis_indexes_product); $ret_data_frame->insert_col($colname, $ret_ndarray); } return $ret_data_frame; } method set_order : void ($data_indexes_ndarray : R::NDArray::Int) { my $ncol = $self->ncol; for (my $col = 0; $col < $ncol; $col++) { my $ndarray = $self->col_by_index($col); $ndarray->set_order($data_indexes_ndarray); } } method sort : void ($colnames_with_sort_order : string[]) { my $order_ndarray = $self->order($colnames_with_sort_order); $self->set_order($order_ndarray); } method order : R::NDArray::Int ($colnames_with_sort_order : string[]) { unless ($colnames_with_sort_order) { die "The column names \$colnames_with_sort_order must be defined."; } my $ncol = $self->ncol; unless ($ncol > 0) { die "The column numbers of this data frame must be greater than 0."; } my $first_ndarray = $self->first_col; my $data_length = $self->first_col->length; my $order_data_indexes_ndarray = R::OP::Int->seq(0, $data_length - 1); my $order_data = $order_data_indexes_ndarray->data; my $colnames_length = @$colnames_with_sort_order; my $colnames = new string[$colnames_length]; my $order_by_desc_h = Hash->new; for (my $col = 0; $col < $colnames_length; $col++) { my $colname_with_order_by = $colnames_with_sort_order->[$col]; my $ret = Fn->split(" ", $colname_with_order_by); my $colname = $ret->[0]; my $order_by = (string)undef; if (@$ret > 1) { $order_by = $ret->[1]; } else { $order_by = "asc"; } unless ($self->exists_col($colname)) { die "The column named \"$colname\" does not exist."; } unless (@$ret >= 1 && @$ret <= 2 && ($order_by eq "asc" || $order_by eq "desc")) { die "The column name with the sort order \"$colname_with_order_by\" is invalid format."; } $colnames->[$col] = $colname; if ($order_by eq "desc") { $order_by_desc_h->set($colname, 1); } else { $order_by_desc_h->set($colname, 0); } } Sort->sort_int($order_data, [has that : R::DataFrame = $self, $colnames : string[], $order_by_desc_h : Hash] method : int ($a_data_index : int, $b_data_index : int) { my $cmp = 0; for my $colname (@$colnames) { my $ndarray = $self->{that}->col($colname); my $desc = $order_by_desc_h->get_int($colname); if ($desc) { $cmp = $ndarray->elem_cmp($ndarray->data, $b_data_index, $ndarray->data, $a_data_index); } else { $cmp = $ndarray->elem_cmp($ndarray->data, $a_data_index, $ndarray->data, $b_data_index); } unless ($cmp == 0) { last; } } return $cmp; }); return $order_data_indexes_ndarray; } # Private Class Methods private static method reverse_int : void ($array : int[]) { unless ($array) { die "The array \$array must be defined."; } my $length = @$array; for (my $i = 0; $i < $length / 2; $i++) { my $temp = $array->[$i]; $array->[$i] = $array->[$length - $i - 1]; $array->[$length - $i - 1] = $temp; } } # Private Static Methods private method update_colobjs_indexes_h : void ($begin_col : int) { my $colobjs_list = $self->{colobjs_list}; my $colobjs_indexes_h = $self->{colobjs_indexes_h}; for (my $i = $begin_col; $i < $colobjs_list->length; $i++) { my $colobj = (R::DataFrame::Column)$colobjs_list->get($i); my $colname = $colobj->{name}; $colobjs_indexes_h->set($colname, $i); } } }