diff --git a/lib/logfmt.c b/lib/logfmt.c index 0f8df7a..a820e4f 100644 --- a/lib/logfmt.c +++ b/lib/logfmt.c @@ -803,7 +803,8 @@ static HCL_INLINE int print_formatted (hcl_t* hcl, hcl_ooi_t nargs, hcl_fmtout_t if (ch == HCL_OOCI_EOF) { - /* fmt is not advanced when it is length-bounded. so not fmt - checkpoint - 1 */ + /* fmt is not advanced when it is length-bounded. + * so not fmt - checkpoint - 1 */ PRINT_OOCS (checkpoint, fmt - checkpoint); goto done; } @@ -818,7 +819,7 @@ static HCL_INLINE int print_formatted (hcl_t* hcl, hcl_ooi_t nargs, hcl_fmtout_t flagc = 0; -reswitch: + reswitch: GET_NEXT_CHAR_TO (hcl, fmt, fmtend, ch); switch (ch) { @@ -954,16 +955,15 @@ reswitch: case 'c': case 'C': - print_char: - - /* zeropad must not take effect for 'c' */ - if (flagc & FLAGC_ZEROPAD) padc = ' '; - GET_NEXT_ARG_TO (hcl, nargs, &arg_state, arg); if (HCL_OOP_IS_SMOOI(arg)) arg = HCL_CHAR_TO_OOP(HCL_OOP_TO_SMOOI(arg)); if (!HCL_OOP_IS_CHAR(arg)) goto invalid_format; ooch = HCL_OOP_TO_CHAR(arg); + print_char: + /* zeropad must not take effect for 'c' */ + if (flagc & FLAGC_ZEROPAD) padc = ' '; + /* precision 0 doesn't kill the letter */ width--; if (!(flagc & FLAGC_LEFTADJ) && width > 0) PRINT_OOCH (padc, width); @@ -1024,11 +1024,9 @@ reswitch: extra = nslen; if (sign && ((HCL_OOP_IS_SMOOI(arg) && HCL_OOP_TO_SMOOI(arg) < 0) || HCL_IS_NBIGINT(hcl,arg))) neg = 1; - if ((flagc & FLAGC_SHARP) && arg != HCL_SMOOI_TO_OOP(0)) { - if (base == 8) extra++; - else if (base == 16 || base == -16) extra += 2; + if (base == 2 || base == 8 || base == 16 || base == -16) extra += 2; } if (neg) extra++; else if (flagc & FLAGC_SIGN) extra++; @@ -1052,19 +1050,19 @@ reswitch: if ((flagc & FLAGC_SHARP) && arg != HCL_SMOOI_TO_OOP(0)) { - if (base == 2) { - PRINT_OOCH ('0', 1); + PRINT_OOCH ('#', 1); PRINT_OOCH ('b', 1); } if (base == 8) { - PRINT_OOCH ('0', 1); + PRINT_OOCH ('#', 1); + PRINT_OOCH ('o', 1); } else if (base == 16 || base == -16) { - PRINT_OOCH ('0', 1); + PRINT_OOCH ('#', 1); PRINT_OOCH ('x', 1); } } diff --git a/lib/logfmtv.h b/lib/logfmtv.h index 06bea0e..d7ac3e2 100644 --- a/lib/logfmtv.h +++ b/lib/logfmtv.h @@ -179,7 +179,7 @@ static int logfmtv (hcl_t* hcl, const fmtchar_t* fmt, hcl_fmtout_t* data, va_lis lm_flag = 0; lm_dflag = 0; flagc = 0; sprintn = sprintn_lower; -reswitch: + reswitch: switch (ch = *fmt++) { case '%': /* %% */ @@ -800,7 +800,7 @@ reswitch: #endif -handle_nosign: + handle_nosign: sign = 0; if (lm_flag & LF_J) { @@ -845,7 +845,7 @@ handle_nosign: num = va_arg (ap, unsigned int); goto number; -handle_sign: + handle_sign: if (lm_flag & LF_J) { #if defined(__GNUC__) && \ @@ -889,7 +889,7 @@ handle_sign: else num = va_arg (ap, int); -number: + number: if (sign && (hcl_intmax_t)num < 0) { neg = 1; @@ -899,8 +899,7 @@ number: nbufp = sprintn (nbuf, num, base, &tmp); if ((flagc & FLAGC_SHARP) && num != 0) { - if (base == 8) tmp++; - else if (base == 16) tmp += 2; + if (base == 2 || base == 8 || base == 16) tmp += 2; } if (neg) tmp++; else if (flagc & FLAGC_SIGN) tmp++; @@ -925,18 +924,20 @@ number: if ((flagc & FLAGC_SHARP) && num != 0) { + /* it follows the HCL's number notation, not C's */ if (base == 2) { - PUT_OOCH ('0', 1); + PUT_OOCH ('#', 1); PUT_OOCH ('b', 1); } if (base == 8) { - PUT_OOCH ('0', 1); + PUT_OOCH ('#', 1); + PUT_OOCH ('o', 1); } else if (base == 16) { - PUT_OOCH ('0', 1); + PUT_OOCH ('#', 1); PUT_OOCH ('x', 1); } } @@ -960,7 +961,7 @@ number: } break; -invalid_format: + invalid_format: #if defined(FMTCHAR_IS_OOCH) PUT_OOCS (percent, fmt - percent); #else diff --git a/lib/print.c b/lib/print.c index 10119ef..43bbffc 100644 --- a/lib/print.c +++ b/lib/print.c @@ -122,7 +122,7 @@ static HCL_INLINE int print_single_char (hcl_t* hcl, int mask, hcl_ooch_t ch, hc if (chu < ' ' || chu >= 0x80) #endif { - hcl_ooch_t escaped; + hcl_oochu_t escaped; switch (chu) { @@ -151,13 +151,31 @@ static HCL_INLINE int print_single_char (hcl_t* hcl, int mask, hcl_ooch_t ch, hc escaped = 'a'; break; default: - escaped = ch; + escaped = chu; break; } - if (escaped == ch) + if (escaped == chu) { - if (outbfmt(hcl, mask, "\\x%X", chu) <= -1) return -1; + #if (HCL_SIZEOF_OOCH_T >= 4) + if (chu >= 0x10000u) + { + if (outbfmt(hcl, mask, "\\U%X", chu) <= -1) return -1; + } + else + #endif + { + #if (HCL_SIZEOF_OOCH_T >= 2) + if (chu >= 0x100u) + { + if (outbfmt(hcl, mask, "\\u%X", chu) <= -1) return -1; + } + else + #endif + { + if (outbfmt(hcl, mask, "\\x%X", chu) <= -1) return -1; + } + } } else { diff --git a/lib/read.c b/lib/read.c index ee61c1d..085a0aa 100644 --- a/lib/read.c +++ b/lib/read.c @@ -421,7 +421,7 @@ static int get_char (hcl_t* hcl) if (n == 0) { return_eof: - hcl->c->curinp->lxc.c = HCL_UCI_EOF; + hcl->c->curinp->lxc.c = HCL_OOCI_EOF; hcl->c->curinp->lxc.l.line = hcl->c->curinp->line; hcl->c->curinp->lxc.l.colm = hcl->c->curinp->colm; hcl->c->curinp->lxc.l.file = hcl->c->curinp->name; @@ -490,7 +490,7 @@ static int skip_comment (hcl_t* hcl) do { GET_CHAR_TO (hcl, c); - if (c == HCL_UCI_EOF) + if (c == HCL_OOCI_EOF) { break; } @@ -526,7 +526,7 @@ static int get_string (hcl_t* hcl, hcl_ooch_t end_char, hcl_ooch_t esc_char, int { GET_CHAR_TO (hcl, c); - if (c == HCL_UCI_EOF) + if (c == HCL_OOCI_EOF) { hcl_setsynerr (hcl, HCL_SYNERR_STRCHRNC, TOKEN_LOC(hcl) /*LEXER_LOC(hcl)*/, HCL_NULL); return -1; @@ -536,6 +536,7 @@ static int get_string (hcl_t* hcl, hcl_ooch_t end_char, hcl_ooch_t esc_char, int { if (c >= '0' && c <= '7') { + /* more octal digits */ c_acc = c_acc * 8 + c - '0'; digit_count++; if (digit_count >= escaped) @@ -640,20 +641,24 @@ static int get_string (hcl_t* hcl, hcl_ooch_t end_char, hcl_ooch_t esc_char, int c_acc = 0; continue; } - else if (c == 'u' && HCL_SIZEOF(hcl_ooch_t) >= 2) + #if (HCL_SIZEOF_OOCH_T >= 2) + else if (c == 'u') { escaped = 4; digit_count = 0; c_acc = 0; continue; } - else if (c == 'U' && HCL_SIZEOF(hcl_ooch_t) >= 4) + #endif + #if (HCL_SIZEOF_OOCH_T >= 4) + else if (c == 'U') { escaped = 8; digit_count = 0; c_acc = 0; continue; } + #endif else if (regex) { /* if the following character doesn't compose a proper @@ -734,7 +739,6 @@ static int get_sharp_token (hcl_t* hcl) * #false * #include * #\C character - * #\XHHHH unicode character * #\xHHHH unicode character * #\UHHHH unicode character * #\uHHHH unicode character @@ -783,17 +787,20 @@ static int get_sharp_token (hcl_t* hcl) if (TOKEN_NAME_LEN(hcl) >= 4) { - if (TOKEN_NAME_CHAR(hcl, 2) == 'P' || TOKEN_NAME_CHAR(hcl, 2) == 'p') - { - SET_TOKEN_TYPE (hcl, HCL_IOTOK_SMPTRLIT); - goto hexcharlit; - } - else if (TOKEN_NAME_CHAR(hcl, 2) == 'X' || TOKEN_NAME_CHAR(hcl, 2) == 'x' || - TOKEN_NAME_CHAR(hcl, 2) == 'U' || TOKEN_NAME_CHAR(hcl, 2) == 'u') + int max_digit_count = 0; + + if (TOKEN_NAME_CHAR(hcl, 2) == 'x') { hcl_oow_t i; + max_digit_count = 2; hexcharlit: + if (TOKEN_NAME_LEN(hcl) - 3 > max_digit_count) + { + hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), + "invalid hexadecimal character in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); + return -1; + } c = 0; for (i = 3; i < TOKEN_NAME_LEN(hcl); i++) { @@ -803,9 +810,29 @@ static int get_sharp_token (hcl_t* hcl) "invalid hexadecimal character in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); return -1; } -/* TODO: check for the max charcter value and raise an error... */ + c = c * 16 + CHAR_TO_NUM(hcl->c->tok.name.ptr[i], 16); /* don't care if it is for 'p' */ } + + } + #if (HCL_SIZEOF_OOCH_T >= 2) + else if (TOKEN_NAME_CHAR(hcl, 2) == 'u') + { + max_digit_count = 4; + goto hexcharlit; + } + #endif + #if (HCL_SIZEOF_OOCH_T >= 4) + else if (TOKEN_NAME_CHAR(hcl, 2) == 'U') + { + max_digit_count = 8; + goto hexcharlit; + } + #endif + else if (TOKEN_NAME_CHAR(hcl, 2) == 'P' || TOKEN_NAME_CHAR(hcl, 2) == 'p') + { + SET_TOKEN_TYPE (hcl, HCL_IOTOK_SMPTRLIT); + goto hexcharlit; } else if (TOKEN_NAME_CHAR(hcl, 2) == 'E' || TOKEN_NAME_CHAR(hcl, 2) == 'e') { @@ -976,7 +1003,7 @@ retry: switch (c) { - case HCL_UCI_EOF: + case HCL_OOCI_EOF: { int n;