#define C_KINO_SORTEXTERNAL
#include "KinoSearch/Util/ToolSet.h"
#include "KinoSearch/Util/SortExternal.h"
static
void
S_refill_cache(SortExternal *self);
static
void
S_absorb_slices(SortExternal *self, uint8_t *endpost);
static
uint8_t*
S_find_endpost(SortExternal *self);
static
uint32_t
S_find_slice_size(SortExternal *self, uint8_t *endpost);
SortExternal*
SortEx_init(SortExternal *self,
size_t
width)
{
self->width = width;
self->mem_thresh = U32_MAX;
self->cache = NULL;
self->cache_cap = 0;
self->cache_max = 0;
self->cache_tick = 0;
self->scratch = NULL;
self->scratch_cap = 0;
self->runs = VA_new(0);
self->slice_sizes = NULL;
self->slice_starts = NULL;
self->num_slices = 0;
self->flipped =
false
;
ABSTRACT_CLASS_CHECK(self, SORTEXTERNAL);
return
self;
}
void
SortEx_destroy(SortExternal *self)
{
FREEMEM(self->scratch);
FREEMEM(self->slice_sizes);
FREEMEM(self->slice_starts);
if
(self->cache) {
SortEx_Clear_Cache(self);
FREEMEM(self->cache);
}
DECREF(self->runs);
SUPER_DESTROY(self, SORTEXTERNAL);
}
void
SortEx_clear_cache(SortExternal *self)
{
self->cache_max = 0;
self->cache_tick = 0;
}
void
SortEx_feed(SortExternal *self,
void
*data)
{
const
size_t
width = self->width;
if
(self->cache_max == self->cache_cap) {
size_t
amount = Memory_oversize(self->cache_max + 1, width);
SortEx_Grow_Cache(self, amount);
}
uint8_t *target = self->cache + self->cache_max * width;
memcpy
(target, data, width);
self->cache_max++;
}
static
INLINE
void
*
SI_peek(SortExternal *self)
{
if
(self->cache_tick >= self->cache_max) {
S_refill_cache(self);
}
if
(self->cache_max > 0) {
return
self->cache + self->cache_tick * self->width;
}
else
{
return
NULL;
}
}
void
*
SortEx_fetch(SortExternal *self)
{
void
*address = SI_peek(self);
self->cache_tick++;
return
address;
}
void
*
SortEx_peek(SortExternal *self)
{
return
SI_peek(self);
}
void
SortEx_sort_cache(SortExternal *self)
{
if
(self->cache_tick != 0) {
THROW(ERR,
"Cant Sort_Cache() after fetching %u32 items"
, self->cache_tick);
}
if
(self->cache_max != 0) {
VTable *vtable = SortEx_Get_VTable(self);
kino_Sort_compare_t compare
= (kino_Sort_compare_t)METHOD(vtable, SortEx, Compare);
if
(self->scratch_cap < self->cache_cap) {
self->scratch_cap = self->cache_cap;
self->scratch = (uint8_t*)REALLOCATE(self->scratch,
self->scratch_cap * self->width);
}
Sort_mergesort(self->cache, self->scratch, self->cache_max,
self->width, compare, self);
}
}
void
SortEx_flip(SortExternal *self)
{
SortEx_Flush(self);
self->flipped =
true
;
}
void
SortEx_add_run(SortExternal *self, SortExternal *run)
{
VA_Push(self->runs, (Obj*)run);
uint32_t num_runs = VA_Get_Size(self->runs);
self->slice_sizes = (uint32_t*)REALLOCATE(self->slice_sizes,
num_runs *
sizeof
(uint32_t));
self->slice_starts = (uint8_t**)REALLOCATE(self->slice_starts,
num_runs *
sizeof
(uint8_t*));
}
static
void
S_refill_cache(SortExternal *self)
{
SortEx_Clear_Cache(self);
uint32_t i = 0;
while
(i < VA_Get_Size(self->runs)) {
SortExternal *
const
run = (SortExternal*)VA_Fetch(self->runs, i);
if
(SortEx_Cache_Count(run) > 0 || SortEx_Refill(run) > 0) {
i++;
}
else
{
VA_Excise(self->runs, i, 1);
}
}
if
(VA_Get_Size(self->runs)) {
uint8_t *endpost = S_find_endpost(self);
S_absorb_slices(self, endpost);
}
}
static
uint8_t*
S_find_endpost(SortExternal *self)
{
uint8_t *endpost = NULL;
const
size_t
width = self->width;
for
( uint32_t i = 0, max = VA_Get_Size(self->runs); i < max; i++) {
SortExternal *
const
run = (SortExternal*)VA_Fetch(self->runs, i);
const
uint32_t tick = run->cache_max - 1;
if
(tick >= run->cache_cap || run->cache_max < 1) {
THROW(ERR,
"Invalid SortExternal cache access: %u32 %u32 %u32"
, tick,
run->cache_max, run->cache_cap);
}
else
{
uint8_t *candidate = run->cache + tick * width;
if
(i == 0) {
endpost = candidate;
}
else
if
(SortEx_Compare(self, candidate, endpost) < 0) {
endpost = candidate;
}
}
}
return
endpost;
}
static
void
S_absorb_slices(SortExternal *self, uint8_t *endpost)
{
size_t
width = self->width;
uint32_t num_runs = VA_Get_Size(self->runs);
uint8_t **slice_starts = self->slice_starts;
uint32_t *slice_sizes = self->slice_sizes;
VTable *vtable = SortEx_Get_VTable(self);
kino_Sort_compare_t compare
= (kino_Sort_compare_t)METHOD(vtable, SortEx, Compare);
if
(self->cache_max != 0) { THROW(ERR,
"Can't refill unless empty"
); }
for
(uint32_t i = 0; i < num_runs; i++) {
SortExternal *
const
run = (SortExternal*)VA_Fetch(self->runs, i);
uint32_t slice_size = S_find_slice_size(run, endpost);
if
(slice_size) {
if
(self->cache_max + slice_size > self->cache_cap) {
size_t
cap = Memory_oversize(self->cache_max + slice_size,
width);
SortEx_Grow_Cache(self, cap);
}
memcpy
(self->cache + self->cache_max * width,
run->cache + run->cache_tick * width,
slice_size * width);
run->cache_tick += slice_size;
self->cache_max += slice_size;
slice_sizes[self->num_slices++] = slice_size;
}
}
uint32_t total = 0;
for
(uint32_t i = 0; i < self->num_slices; i++) {
slice_starts[i] = self->cache + total * width;
total += slice_sizes[i];
}
if
(self->scratch_cap < self->cache_cap) {
self->scratch_cap = self->cache_cap;
self->scratch = (uint8_t*)REALLOCATE(self->scratch,
self->scratch_cap * width);
}
while
(self->num_slices > 1) {
uint32_t i = 0;
uint32_t j = 0;
while
(i < self->num_slices) {
if
(self->num_slices - i >= 2) {
const
uint32_t merged_size = slice_sizes[i] + slice_sizes[i+1];
Sort_merge(slice_starts[i], slice_sizes[i],
slice_starts[i+1], slice_sizes[i+1], self->scratch,
self->width, compare, self);
slice_sizes[j] = merged_size;
slice_starts[j] = slice_starts[i];
memcpy
(slice_starts[j], self->scratch, merged_size * width);
i += 2;
j += 1;
}
else
if
(self->num_slices - i >= 1) {
slice_sizes[j] = slice_sizes[i];
slice_starts[j] = slice_starts[i];
i += 1;
j += 1;
}
}
self->num_slices = j;
}
self->num_slices = 0;
}
void
SortEx_grow_cache(SortExternal *self, uint32_t size)
{
if
(size > self->cache_cap) {
self->cache = (uint8_t*)REALLOCATE(self->cache, size * self->width);
self->cache_cap = size;
}
}
static
uint32_t
S_find_slice_size(SortExternal *self, uint8_t *endpost)
{
int32_t lo = self->cache_tick - 1;
int32_t hi = self->cache_max;
uint8_t *
const
cache = self->cache;
const
size_t
width = self->width;
SortEx_compare_t compare = (SortEx_compare_t)METHOD(
SortEx_Get_VTable(self), SortEx, Compare);
while
(hi - lo > 1) {
const
int32_t mid = lo + ((hi - lo) / 2);
const
int32_t delta = compare(self, cache + mid * width, endpost);
if
(delta > 0) { hi = mid; }
else
{ lo = mid; }
}
return
lo == -1
? 0
: (lo - self->cache_tick) + 1;
}
void
SortEx_set_mem_thresh(SortExternal *self, uint32_t mem_thresh)
{
self->mem_thresh = mem_thresh;
}
uint32_t
SortEx_cache_count(SortExternal *self)
{
return
self->cache_max - self->cache_tick;
}