#include "datagen.h"
#include "platform.h" /* SET_BINARY_MODE */
#include <stdlib.h> /* malloc, free */
#include <stdio.h> /* FILE, fwrite, fprintf */
#include <string.h> /* memcpy */
#include "../lib/common/mem.h" /* U32 */
#define KB *(1 <<10)
#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
#define RDG_DEBUG 0
#define TRACE(...) if (RDG_DEBUG) fprintf(stderr, __VA_ARGS__ )
#define LTLOG 13
#define LTSIZE (1<<LTLOG)
#define LTMASK (LTSIZE-1)
#define RDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
static
U32 RDG_rand(U32* src)
{
static
const
U32 prime1 = 2654435761U;
static
const
U32 prime2 = 2246822519U;
U32 rand32 = *src;
rand32 *= prime1;
rand32 ^= prime2;
rand32 = RDG_rotl32(rand32, 13);
*src = rand32;
return
rand32 >> 5;
}
typedef
U32 fixedPoint_24_8;
static
void
RDG_fillLiteralDistrib(
BYTE
* ldt, fixedPoint_24_8 ld)
{
BYTE
const
firstChar = (ld<=0.0) ? 0 :
'('
;
BYTE
const
lastChar = (ld<=0.0) ? 255 :
'}'
;
BYTE
character = (ld<=0.0) ? 0 :
'0'
;
U32 u;
if
(ld<=0) ld = 0;
for
(u=0; u<LTSIZE; ) {
U32
const
weight = (((LTSIZE - u) * ld) >> 8) + 1;
U32
const
end = MIN ( u + weight , LTSIZE);
while
(u < end) ldt[u++] = character;
character++;
if
(character > lastChar) character = firstChar;
}
}
static
BYTE
RDG_genChar(U32* seed,
const
BYTE
* ldt)
{
U32
const
id = RDG_rand(seed) & LTMASK;
return
ldt[id];
}
static
U32 RDG_rand15Bits (U32* seedPtr)
{
return
RDG_rand(seedPtr) & 0x7FFF;
}
static
U32 RDG_randLength(U32* seedPtr)
{
if
(RDG_rand(seedPtr) & 7)
return
(RDG_rand(seedPtr) & 0xF);
return
(RDG_rand(seedPtr) & 0x1FF) + 0xF;
}
static
void
RDG_genBlock(
void
* buffer,
size_t
buffSize,
size_t
prefixSize,
double
matchProba,
const
BYTE
* ldt, U32* seedPtr)
{
BYTE
*
const
buffPtr = (
BYTE
*)buffer;
U32
const
matchProba32 = (U32)(32768 * matchProba);
size_t
pos = prefixSize;
U32 prevOffset = 1;
while
(matchProba >= 1.0) {
size_t
size0 = RDG_rand(seedPtr) & 3;
size0 = (
size_t
)1 << (16 + size0 * 2);
size0 += RDG_rand(seedPtr) & (size0-1);
if
(buffSize < pos + size0) {
memset
(buffPtr+pos, 0, buffSize-pos);
return
;
}
memset
(buffPtr+pos, 0, size0);
pos += size0;
buffPtr[pos-1] = RDG_genChar(seedPtr, ldt);
continue
;
}
if
(pos==0) buffPtr[0] = RDG_genChar(seedPtr, ldt), pos=1;
while
(pos < buffSize) {
if
(RDG_rand15Bits(seedPtr) < matchProba32) {
U32
const
length = RDG_randLength(seedPtr) + 4;
U32
const
d = (U32) MIN(pos + length , buffSize);
U32
const
repeatOffset = (RDG_rand(seedPtr) & 15) == 2;
U32
const
randOffset = RDG_rand15Bits(seedPtr) + 1;
U32
const
offset = repeatOffset ? prevOffset : (U32) MIN(randOffset , pos);
size_t
match = pos - offset;
while
(pos < d) { buffPtr[pos++] = buffPtr[match++];
}
prevOffset = offset;
}
else
{
U32
const
length = RDG_randLength(seedPtr);
U32
const
d = (U32) MIN(pos + length, buffSize);
while
(pos < d) { buffPtr[pos++] = RDG_genChar(seedPtr, ldt); }
} }
}
void
RDG_genBuffer(
void
* buffer,
size_t
size,
double
matchProba,
double
litProba, unsigned seed)
{
U32 seed32 = seed;
BYTE
ldt[LTSIZE];
memset
(ldt,
'0'
,
sizeof
(ldt));
if
(litProba<=0.0) litProba = matchProba / 4.5;
RDG_fillLiteralDistrib(ldt, (fixedPoint_24_8)(litProba * 256 + 0.001));
RDG_genBlock(buffer, size, 0, matchProba, ldt, &seed32);
}
void
RDG_genStdout(unsigned
long
long
size,
double
matchProba,
double
litProba, unsigned seed)
{
U32 seed32 = seed;
size_t
const
stdBlockSize = 128 KB;
size_t
const
stdDictSize = 32 KB;
BYTE
*
const
buff = (
BYTE
*)
malloc
(stdDictSize + stdBlockSize);
U64 total = 0;
BYTE
ldt[LTSIZE];
if
(buff==NULL) {
perror
(
"datagen"
);
exit
(1); }
if
(litProba<=0.0) litProba = matchProba / 4.5;
memset
(ldt,
'0'
,
sizeof
(ldt));
RDG_fillLiteralDistrib(ldt, (fixedPoint_24_8)(litProba * 256 + 0.001));
SET_BINARY_MODE(stdout);
RDG_genBlock(buff, stdDictSize, 0, matchProba, ldt, &seed32);
while
(total < size) {
size_t
const
genBlockSize = (
size_t
) (MIN (stdBlockSize, size-total));
RDG_genBlock(buff, stdDictSize+stdBlockSize, stdDictSize, matchProba, ldt, &seed32);
total += genBlockSize;
{
size_t
const
unused =
fwrite
(buff, 1, genBlockSize, stdout); (
void
)unused; }
memcpy
(buff, buff + stdBlockSize, stdDictSize);
}
free
(buff);
}