#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#ifdef XS_VERSION
#undef XS_VERSION
#endif
#define XS_VERSION "1.100"
#define BASE 36
#define TMIN 1
#define TMAX 26
#define SKEW 38
#define DAMP 700
#define INITIAL_BIAS 72
#define INITIAL_N 128
#define isBASE(x) UTF8_IS_INVARIANT((unsigned char)x)
#define DELIM '-'
#define TMIN_MAX(t) (((t) < TMIN) ? (TMIN) : ((t) > TMAX) ? (TMAX) : (t))
static char enc_digit[BASE] = {
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
};
static IV dec_digit[0x80] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 00..0F */
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10..1F */
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 20..2F */
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1, /* 30..3F */
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 40..4F */
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, /* 50..5F */
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 60..6F */
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, /* 70..7F */
};
static int adapt(int delta, int numpoints, int first) {
int k;
delta /= first ? DAMP : 2;
delta += delta/numpoints;
for(k=0; delta > ((BASE-TMIN) * TMAX)/2; k += BASE)
delta /= BASE-TMIN;
return k + (((BASE-TMIN+1) * delta) / (delta+SKEW));
};
MODULE = Net::IDN::Punycode PACKAGE = Net::IDN::Punycode
SV*
encode_punycode(input)
SV * input
PREINIT:
UV c, m, n = INITIAL_N;
int k, q, t;
int bias = INITIAL_BIAS;
int delta = 0, skip_delta;
const char *in_s, *in_p, *in_e, *skip_p;
char *re_s, *re_p, *re_e;
int first = 1;
STRLEN length_guess, len, h, u8;
PPCODE:
in_s = in_p = SvPVutf8(input, len);
in_e = in_s + len;
length_guess = len;
if(length_guess < 64) length_guess = 64; /* optimise for maximum length of domain names */
length_guess += 2; /* plus DELIM + '\0' */
RETVAL = NEWSV('P',length_guess);
sv_2mortal(RETVAL); /* so we can use croak w/o memory leaks */
SvPOK_only(RETVAL); /* UTF8 is off (BASE chars only) */
re_s = re_p = SvPV_nolen(RETVAL);
re_e = re_s + SvLEN(RETVAL);
/* copy basic code points */
while(in_p < in_e) {
if( isBASE(*in_p) )
*re_p++ = *in_p;
in_p++;
}
h = re_p - re_s;
/* add DELIM if needed */
if(h) *re_p++ = DELIM;
for(;;) {
/* find smallest code point not yet handled */
m = UV_MAX;
q = skip_delta = 0;
for(in_p = skip_p = in_s; in_p < in_e;) {
c = utf8_to_uvuni((U8*)in_p, &u8);
if(c >= n && c < m) {
m = c;
skip_p = in_p;
skip_delta = q;
}
if(c < n)
++q;
in_p += u8;
}
if(m == UV_MAX)
break;
/* increase delta to the state corresponding to
the m code point at the beginning of the string */
delta += (m-n) * (h+1);
n = m;
/* now find the chars to be encoded in this round */
delta += skip_delta;
for(in_p = skip_p; in_p < in_e;) {
c = utf8_to_uvuni((U8*)in_p, &u8);
if(c < n) {
++delta;
} else if( c == n ) {
q = delta;
for(k = BASE;; k += BASE) {
if(re_p >= re_e) {
length_guess = re_e - re_s + 16;
re_e = SvGROW(RETVAL, length_guess);
re_p = re_e + (re_p - re_s);
re_s = re_e;
re_e = re_s + SvLEN(RETVAL);
}
t = TMIN_MAX(k - bias);
if(q < t) break;
*re_p++ = enc_digit[t + ((q-t) % (BASE-t))];
q = (q-t) / (BASE-t);
}
if(q > BASE) croak("input exceeds punycode limit");
*re_p++ = enc_digit[q];
bias = adapt(delta, h+1, first);
delta = first = 0;
++h;
}
in_p += u8;
}
++delta;
++n;
}
*re_p = 0;
SvCUR_set(RETVAL, re_p - re_s);
ST(0) = RETVAL;
XSRETURN(1);
SV*
decode_punycode(input)
SV * input
PREINIT:
UV c, n = INITIAL_N;
IV dc;
int i = 0, oldi, j, k, t, w;
int bias = INITIAL_BIAS;
int delta = 0, skip_delta;
const char *in_s, *in_p, *in_e, *skip_p;
char *re_s, *re_p, *re_e;
int first = 1;
STRLEN length_guess, len, h, u8;
PPCODE:
in_s = in_p = SvPV_nolen(input);
in_e = SvEND(input);
length_guess = SvCUR(input) * 2;
if(length_guess < 256) length_guess = 256;
RETVAL = NEWSV('D',length_guess);
sv_2mortal(RETVAL); /* so we can use croak w/o memory leaks */
SvPOK_only(RETVAL);
re_s = re_p = SvPV_nolen(RETVAL);
re_e = re_s + SvLEN(RETVAL);
skip_p = NULL;
for(in_p = in_s; in_p < in_e; in_p++) {
c = *in_p; /* we don't care whether it's UTF-8 */
if(!isBASE(c)) croak("non-base character in input for decode_punycode");
if(c == DELIM) skip_p = in_p;
*re_p++ = c; /* copy it */
}
if(skip_p) {
h = skip_p - in_s; /* base chars handled */
re_p = re_s + h; /* points to end of base chars */
skip_p++; /* skip over DELIM */
} else {
h = 0; /* no base chars */
re_p = re_s;
skip_p = in_s; /* read everything */
}
for(in_p = skip_p; in_p < in_e; i++) {
oldi = i;
w = 1;
for(k = BASE;; k+= BASE) {
if(!(in_p < in_e)) croak("incomplete encoded code point in decode_punycode");
dc = dec_digit[*in_p++]; /* we already know it's in 0..127 */
if(dc < 0) croak("invalid digit in input for decode_punycode");
c = (UV)dc;
i += c * w;
t = TMIN_MAX(k - bias);
if(c < t) break;
w *= BASE-t;
}
h++;
bias = adapt(i-oldi, h, first);
first = 0;
n += i / h; /* code point n to insert */
i = i % h; /* at position i */
u8 = UNISKIP(n); /* how many bytes we need */
if(re_p + u8 >= re_e) {
length_guess = re_e - re_p + u8 + 16;
re_e = SvGROW(RETVAL, length_guess);
re_p = re_e + (re_p - re_s);
re_s = re_e;
re_e = re_s + SvLEN(RETVAL);
}
j = i;
for(skip_p = re_s; j > 0; j--) /* find position in UTF-8 */
skip_p+=UTF8SKIP(skip_p);
if(skip_p < re_p) /* move succeeding chars */
Move(skip_p, skip_p + u8, re_p - skip_p, char);
re_p += u8;
uvuni_to_utf8_flags((U8*)skip_p, n, UNICODE_ALLOW_ANY);
}
if(!first) SvUTF8_on(RETVAL); /* UTF-8 chars have been inserted */
*re_p = 0;
SvCUR_set(RETVAL, re_p - re_s);
ST(0) = RETVAL;
XSRETURN(1);