removed --utf8 and --utf16.

added --base-charset, --input-charset, --log-charset
This commit is contained in:
hyunghwan.chung 2019-05-18 18:01:02 +00:00
parent 8e4b175579
commit 96790ee42c
3 changed files with 80 additions and 30 deletions

View File

@ -87,15 +87,16 @@ int main (int argc, char* argv[])
moo_bci_t c; moo_bci_t c;
static moo_bopt_lng_t lopt[] = static moo_bopt_lng_t lopt[] =
{ {
{ ":log", 'l' }, { ":log", 'l' },
{ ":memsize", 'm' }, { ":memsize", 'm' },
{ "large-pages", '\0' }, { "large-pages", '\0' },
{ "utf8", '\0' }, { ":base-charset", '\0' },
{ "utf16", '\0' }, { ":input-charset", '\0' },
{ ":log-charset", '\0' },
#if defined(MOO_BUILD_DEBUG) #if defined(MOO_BUILD_DEBUG)
{ ":debug", '\0' }, /* NOTE: there is no short option for --debug */ { ":debug", '\0' }, /* NOTE: there is no short option for --debug */
#endif #endif
{ MOO_NULL, '\0' } { MOO_NULL, '\0' }
}; };
static moo_bopt_t opt = static moo_bopt_t opt =
{ {
@ -113,8 +114,9 @@ int main (int argc, char* argv[])
fprintf (stderr, " --log filename[,logopts]\n"); fprintf (stderr, " --log filename[,logopts]\n");
fprintf (stderr, " --memsize number\n"); fprintf (stderr, " --memsize number\n");
fprintf (stderr, " --large-pages\n"); fprintf (stderr, " --large-pages\n");
fprintf (stderr, " --utf16\n"); fprintf (stderr, " --base-charset=name\n");
fprintf (stderr, " --utf8\n"); fprintf (stderr, " --input-charset=name\n");
fprintf (stderr, " --log-charset=name\n");
#if defined(MOO_BUILD_DEBUG) #if defined(MOO_BUILD_DEBUG)
fprintf (stderr, " --debug dbgopts\n"); fprintf (stderr, " --debug dbgopts\n");
#endif #endif
@ -124,6 +126,8 @@ int main (int argc, char* argv[])
memset (&cfg, 0, MOO_SIZEOF(cfg)); memset (&cfg, 0, MOO_SIZEOF(cfg));
cfg.type = MOO_CFGSTD_OPTB; cfg.type = MOO_CFGSTD_OPTB;
cfg.cmgr = moo_get_utf8_cmgr(); cfg.cmgr = moo_get_utf8_cmgr();
cfg.input_cmgr = cfg.cmgr;
cfg.log_cmgr = cfg.cmgr;
memsize = MIN_MEMSIZE; memsize = MIN_MEMSIZE;
@ -146,14 +150,34 @@ int main (int argc, char* argv[])
cfg.large_pages = 1; cfg.large_pages = 1;
break; break;
} }
else if (moo_comp_bcstr(opt.lngopt, "utf8") == 0) else if (moo_comp_bcstr(opt.lngopt, "base-charset") == 0)
{ {
cfg.cmgr = moo_get_utf8_cmgr(); cfg.cmgr = moo_get_cmgr_by_bcstr(opt.arg);
if (!cfg.cmgr)
{
fprintf (stderr, "unknown base-charset name - %s\n", opt.arg);
return -1;
}
break; break;
} }
else if (moo_comp_bcstr(opt.lngopt, "utf16") == 0) else if (moo_comp_bcstr(opt.lngopt, "input-charset") == 0)
{ {
cfg.cmgr = moo_get_utf16_cmgr(); cfg.input_cmgr = moo_get_cmgr_by_bcstr(opt.arg);
if (!cfg.input_cmgr)
{
fprintf (stderr, "unknown input-charset name - %s\n", opt.arg);
return -1;
}
break;
}
else if (moo_comp_bcstr(opt.lngopt, "log-charset") == 0)
{
cfg.log_cmgr = moo_get_cmgr_by_bcstr(opt.arg);
if (!cfg.log_cmgr)
{
fprintf (stderr, "unknown log-charset name - %s\n", opt.arg);
return -1;
}
break; break;
} }
#if defined(MOO_BUILD_DEBUG) #if defined(MOO_BUILD_DEBUG)

View File

@ -17,6 +17,8 @@ struct moo_cfgstd_t
int large_pages; int large_pages;
moo_cmgr_t* cmgr; moo_cmgr_t* cmgr;
moo_cmgr_t* input_cmgr;
moo_cmgr_t* log_cmgr;
union union
{ {
@ -70,6 +72,7 @@ struct moo_iostd_t
const moo_uch_t* path; const moo_uch_t* path;
} fileu; } fileu;
} u; } u;
moo_cmgr_t* cmgr;
}; };
typedef struct moo_iostd_t moo_iostd_t; typedef struct moo_iostd_t moo_iostd_t;

View File

@ -284,6 +284,9 @@ struct xtn_t
int vm_running; int vm_running;
int rcv_tick; int rcv_tick;
moo_cmgr_t* input_cmgr;
moo_cmgr_t* log_cmgr;
struct struct
{ {
int fd; int fd;
@ -442,8 +445,7 @@ static MOO_INLINE moo_ooi_t open_input (moo_t* moo, moo_ioarg_t* arg)
const moo_bch_t* fn, * fb; const moo_bch_t* fn, * fb;
#if defined(MOO_OOCH_IS_UCH) #if defined(MOO_OOCH_IS_UCH)
/*if (moo_convootobcstr(moo, arg->name, &ucslen, MOO_NULL, &bcslen) <= -1) goto oops;*/ if (moo_convootobcstr(moo, arg->name, &ucslen, MOO_NULL, &bcslen) <= -1) goto oops;
if (moo_conv_uchars_to_bchars_with_cmgr(arg->name, &ucslen, MOO_NULL, &bcslen, moo_get_cmgr_by_id(MOO_CMGR_UTF8)) <= -1) goto oops;
#else #else
bcslen = moo_count_bcstr(arg->name); bcslen = moo_count_bcstr(arg->name);
#endif #endif
@ -459,11 +461,7 @@ static MOO_INLINE moo_ooi_t open_input (moo_t* moo, moo_ioarg_t* arg)
bb->fn = (moo_bch_t*)(bb + 1); bb->fn = (moo_bch_t*)(bb + 1);
moo_copy_bchars (bb->fn, fn, parlen); moo_copy_bchars (bb->fn, fn, parlen);
#if defined(MOO_OOCH_IS_UCH) #if defined(MOO_OOCH_IS_UCH)
/* [NOTE] as i convert a unicode string for fopen() below, the conversion moo_convootobcstr (moo, arg->name, &ucslen, &bb->fn[parlen], &bcslen);
* should use the system locale instead of moo's cmgr configuration.
* but let's not use the system locale for now. */
/*moo_convootobcstr (moo, arg->name, &ucslen, &bb->fn[parlen], &bcslen);*/
moo_conv_uchars_to_bchars_with_cmgr (arg->name, &ucslen, &bb->fn[parlen], &bcslen, moo_get_cmgr_by_id(MOO_CMGR_UTF8));
#else #else
moo_copy_bcstr (&bb->fn[parlen], bcslen + 1, arg->name); moo_copy_bcstr (&bb->fn[parlen], bcslen + 1, arg->name);
#endif #endif
@ -522,15 +520,16 @@ static MOO_INLINE moo_ooi_t open_input (moo_t* moo, moo_ioarg_t* arg)
{ {
*at = '\0'; *at = '\0';
bb->cmgr = moo_get_cmgr_by_bcstr(at + 1); bb->cmgr = moo_get_cmgr_by_bcstr(at + 1);
if (!bb->cmgr) bb->cmgr = moo_getcmgr(moo); if (!bb->cmgr)
} {
else if (arg->includer) moo_seterrbfmt (moo, MOO_EINVAL, "unsupported charset - %hs", at + 1);
{ goto oops;
bb->cmgr = ((bb_t*)arg->includer->handle)->cmgr; }
} }
else else
{ {
bb->cmgr = moo_getcmgr(moo); bb->cmgr = arg->includer? ((bb_t*)arg->includer->handle)->cmgr: xtn->in->cmgr;
if (!bb->cmgr) bb->cmgr = xtn->input_cmgr;
} }
/* TODO: support _wfopen or the like */ /* TODO: support _wfopen or the like */
@ -599,8 +598,6 @@ static MOO_INLINE moo_ooi_t read_input (moo_t* moo, moo_ioarg_t* arg)
#if defined(MOO_OOCH_IS_UCH) #if defined(MOO_OOCH_IS_UCH)
bcslen = bb->len; bcslen = bb->len;
ucslen = MOO_COUNTOF(arg->buf); ucslen = MOO_COUNTOF(arg->buf);
/* TODO: use the default cmgr first.
* then fallback to utf8, mb8, etc */
/*x = moo_convbtooochars(moo, bb->buf, &bcslen, arg->buf, &ucslen);*/ /*x = moo_convbtooochars(moo, bb->buf, &bcslen, arg->buf, &ucslen);*/
x = moo_conv_bchars_to_uchars_with_cmgr(bb->buf, &bcslen, arg->buf, &ucslen, bb->cmgr, 0); x = moo_conv_bchars_to_uchars_with_cmgr(bb->buf, &bcslen, arg->buf, &ucslen, bb->cmgr, 0);
if (x <= -1 /*&& ucslen <= 0 */) if (x <= -1 /*&& ucslen <= 0 */)
@ -896,7 +893,11 @@ static void log_write (moo_t* moo, moo_bitmask_t mask, const moo_ooch_t* msg, mo
if (!(mask & (MOO_LOG_STDOUT | MOO_LOG_STDERR))) if (!(mask & (MOO_LOG_STDOUT | MOO_LOG_STDERR)))
{ {
time_t now; time_t now;
#if defined(MOO_OOCH_IS_UCH)
char ts[32 * MOO_BCSIZE_MAX];
#else
char ts[32]; char ts[32];
#endif
size_t tslen; size_t tslen;
struct tm tm, *tmp; struct tm tm, *tmp;
@ -947,6 +948,23 @@ static void log_write (moo_t* moo, moo_bitmask_t mask, const moo_ooch_t* msg, mo
tslen = sprintf(ts, "%04d-%02d-%02d %02d:%02d:%02d ", tmp->tm_year + 1900, tmp->tm_mon + 1, tmp->tm_mday, tmp->tm_hour, tmp->tm_min, tmp->tm_sec); tslen = sprintf(ts, "%04d-%02d-%02d %02d:%02d:%02d ", tmp->tm_year + 1900, tmp->tm_mon + 1, tmp->tm_mday, tmp->tm_hour, tmp->tm_min, tmp->tm_sec);
} }
#endif #endif
#if defined(MOO_OOCH_IS_UCH)
if (moo_getcmgr(moo) != xtn->log_cmgr)
{
moo_uch_t tsu[32];
moo_oow_t tsulen;
/* the timestamp is likely to contain simple ascii characters only.
* conversion is not likely to fail regardless of encodings.
* so i don't check errors here */
tsulen = MOO_COUNTOF(tsu);
moo_convbtooochars (moo, ts, &tslen, tsu, &tsulen);
tslen = MOO_COUNTOF(ts);
moo_conv_uchars_to_bchars_with_cmgr (tsu, &tsulen, ts, &tslen, xtn->log_cmgr);
}
#endif
write_log (moo, logfd, ts, tslen); write_log (moo, logfd, ts, tslen);
} }
@ -964,7 +982,8 @@ static void log_write (moo_t* moo, moo_bitmask_t mask, const moo_ooch_t* msg, mo
ucslen = len; ucslen = len;
bcslen = MOO_COUNTOF(buf); bcslen = MOO_COUNTOF(buf);
n = moo_convootobchars(moo, &msg[msgidx], &ucslen, buf, &bcslen); /*n = moo_convootobchars(moo, &msg[msgidx], &ucslen, buf, &bcslen);*/
n = moo_conv_uchars_to_bchars_with_cmgr(&msg[msgidx], &ucslen, buf, &bcslen, xtn->log_cmgr);
if (n == 0 || n == -2) if (n == 0 || n == -2)
{ {
/* n = 0: /* n = 0:
@ -987,7 +1006,7 @@ static void log_write (moo_t* moo, moo_bitmask_t mask, const moo_ooch_t* msg, mo
} }
else if (n <= -1) else if (n <= -1)
{ {
/* conversion error */ /* conversion error but i just stop here but don't treat it as a hard error. */
break; break;
} }
} }
@ -3631,6 +3650,10 @@ static void fini_moo (moo_t* moo)
if (!moo) return MOO_NULL; if (!moo) return MOO_NULL;
xtn = GET_XTN(moo); xtn = GET_XTN(moo);
xtn->input_cmgr = cfg->input_cmgr;
if (!xtn->input_cmgr) xtn->input_cmgr = moo_getcmgr(moo);
xtn->log_cmgr = cfg->log_cmgr;
if (!xtn->log_cmgr) xtn->log_cmgr = moo_getcmgr(moo);
chain (moo); /* call chain() before moo_regevtcb() as fini_moo() calls unchain() */ chain (moo); /* call chain() before moo_regevtcb() as fini_moo() calls unchain() */
reset_log_to_default (xtn); reset_log_to_default (xtn);