changed the number of maximum digits after \x in an awk string

This commit is contained in:
hyung-hwan 2013-01-19 16:21:32 +00:00
parent 4191538b24
commit 164b85a6f9
2 changed files with 50 additions and 26 deletions

View File

@ -13,9 +13,9 @@ tranforms them to an internal form for execution.
An QSEAWK program can be composed of the following elements at the top level. An QSEAWK program can be composed of the following elements at the top level.
- pattern-action blocks
- *BEGIN* blocks - *BEGIN* blocks
- *END* blocks - *END* blocks
- pattern-action blocks
- user-defined functions - user-defined functions
- comments - comments
- \@global variables - \@global variables
@ -107,17 +107,17 @@ point with a preceeding number.
34.56e # 34.56 34.56e # 34.56
34.56E3 34.56E3
An integer can be prefixed with 0x, 0, 0b for a hexa-decimal number, an octal number, An integer can be prefixed with 0x, 0, 0b for a hexa-decimal number, an octal
and a binary number respectively. For a hexa-decimal number, letters from A to F number, and a binary number respectively. For a hexa-decimal number, letters
can form a number case-insenstively in addition to numeric digits. from A to F can form a number case-insenstively in addition to numeric digits.
0xA1 # 161 0xA1 # 161
0xB0b0 # 45232 0xB0b0 # 45232
020 # 16 020 # 16
0b101 # 5 0b101 # 5
If the prefix is not followed by any numeric digits, it is still a valid token and If the prefix is not followed by any numeric digits, it is still a valid token
represents the value of 0. and represents the value of 0.
0x # 0x0 but not desirable. 0x # 0x0 but not desirable.
0b # 0b0 but not desirable. 0b # 0b0 but not desirable.
@ -129,19 +129,42 @@ A string is enclosed in a pair of double quotes or single quotes.
A character in a string encosed in the double-quotes, when preceded with A character in a string encosed in the double-quotes, when preceded with
a back-slash, changes the meaning. a back-slash, changes the meaning.
- \\ - \\a - alert
- \a - \\b - backspace
- \b - \\f - formfeed
- \uXXXX - \\n - newline
- \UXXXXXXXX - \\r - carriage return
- \\t - horizontal tab
- \\v - vertical tab
- \\\\ - backslash
- \\" - double quote
You can use \\u and \\U in a string to specify a character by unicode if You can specify a character with an octal number or a hexadecimal number.
[Character Type](@ref installation) chosen for building is the wide character The actual value can range between 0 and 255 inclusive.
type.
BEGIN { - \\OOO - O is an octal digit.
print "\uC720\uB2C8\uCF54\uB4DC \U00007D71\U00004E00\U000078BC"; - \\xXX - X is a hexadecimal digit.
}
In the octal sequence, you can specify up to 3 octal digits after \\; In the
hexadecimal sequence, you can specify as many hexadecimal digits as possible
after \\x.
If the number doesn't fit in the range that the default character type
can represent, the character generated from the sequence is undefined.
You can use \\u and \\U in a string to specify a character by a Unicode code
point if [Character Type](@ref installation) chosen for building is the
wide character type.
- \\uXXXX - X is a hexadecimal digit.
- \\UXXXXXXXX - X is a hexadecimal digit.
~~~~~{.awk}
BEGIN {
print "\uC720\uB2C8\uCF54\uB4DC \U00007D71\U00004E00\U000078BC";
}
~~~~~
This program should print 유니코드 統一碼. This program should print 유니코드 統一碼.

View File

@ -5415,11 +5415,11 @@ static int get_number (qse_awk_t* awk, qse_awk_tok_t* tok)
static int get_string ( static int get_string (
qse_awk_t* awk, qse_char_t end_char, qse_awk_t* awk, qse_char_t end_char,
qse_char_t esc_char, int keep_esc_char, qse_char_t esc_char, int keep_esc_char,
int preescaped, qse_awk_tok_t* tok) qse_size_t preescaped, qse_awk_tok_t* tok)
{ {
qse_cint_t c; qse_cint_t c;
int escaped = preescaped; qse_size_t escaped = preescaped;
int digit_count = 0; qse_size_t digit_count = 0;
qse_cint_t c_acc = 0; qse_cint_t c_acc = 0;
while (1) while (1)
@ -5440,6 +5440,8 @@ static int get_string (
digit_count++; digit_count++;
if (digit_count >= escaped) if (digit_count >= escaped)
{ {
/* should i limit the max to 0xFF/0377?
* if (c_acc > 0377) c_acc = 0377;*/
ADD_TOKEN_CHAR (awk, tok, c_acc); ADD_TOKEN_CHAR (awk, tok, c_acc);
escaped = 0; escaped = 0;
} }
@ -5451,7 +5453,7 @@ static int get_string (
escaped = 0; escaped = 0;
} }
} }
else if (escaped == 2 || escaped == 4 || escaped == 8) else if (escaped == QSE_TYPE_MAX(qse_size_t) || escaped == 4 || escaped == 8)
{ {
if (c >= QSE_T('0') && c <= QSE_T('9')) if (c >= QSE_T('0') && c <= QSE_T('9'))
{ {
@ -5490,9 +5492,8 @@ static int get_string (
{ {
qse_char_t rc; qse_char_t rc;
rc = (escaped == 2)? QSE_T('x'): rc = (escaped == QSE_TYPE_MAX(qse_size_t))? QSE_T('x'):
(escaped == 4)? QSE_T('u'): QSE_T('U'); (escaped == 4)? QSE_T('u'): QSE_T('U');
if (digit_count == 0) if (digit_count == 0)
ADD_TOKEN_CHAR (awk, tok, rc); ADD_TOKEN_CHAR (awk, tok, rc);
else ADD_TOKEN_CHAR (awk, tok, c_acc); else ADD_TOKEN_CHAR (awk, tok, c_acc);
@ -5533,12 +5534,12 @@ static int get_string (
} }
else if (c == QSE_T('x')) else if (c == QSE_T('x'))
{ {
escaped = 2; escaped = QSE_TYPE_MAX(qse_size_t);
digit_count = 0; digit_count = 0;
c_acc = 0; c_acc = 0;
continue; continue;
} }
#ifdef QSE_CHAR_IS_WCHAR #if defined(QSE_CHAR_IS_WCHAR)
else if (c == QSE_T('u') && QSE_SIZEOF(qse_char_t) >= 2) else if (c == QSE_T('u') && QSE_SIZEOF(qse_char_t) >= 2)
{ {
escaped = 4; escaped = 4;