changed the number of maximum digits after \x in an awk string
This commit is contained in:
parent
4191538b24
commit
164b85a6f9
@ -13,9 +13,9 @@ tranforms them to an internal form for execution.
|
||||
|
||||
An QSEAWK program can be composed of the following elements at the top level.
|
||||
|
||||
- pattern-action blocks
|
||||
- *BEGIN* blocks
|
||||
- *END* blocks
|
||||
- pattern-action blocks
|
||||
- user-defined functions
|
||||
- comments
|
||||
- \@global variables
|
||||
@ -107,17 +107,17 @@ point with a preceeding number.
|
||||
34.56e # 34.56
|
||||
34.56E3
|
||||
|
||||
An integer can be prefixed with 0x, 0, 0b for a hexa-decimal number, an octal number,
|
||||
and a binary number respectively. For a hexa-decimal number, letters from A to F
|
||||
can form a number case-insenstively in addition to numeric digits.
|
||||
An integer can be prefixed with 0x, 0, 0b for a hexa-decimal number, an octal
|
||||
number, and a binary number respectively. For a hexa-decimal number, letters
|
||||
from A to F can form a number case-insenstively in addition to numeric digits.
|
||||
|
||||
0xA1 # 161
|
||||
0xB0b0 # 45232
|
||||
020 # 16
|
||||
0b101 # 5
|
||||
|
||||
If the prefix is not followed by any numeric digits, it is still a valid token and
|
||||
represents the value of 0.
|
||||
If the prefix is not followed by any numeric digits, it is still a valid token
|
||||
and represents the value of 0.
|
||||
|
||||
0x # 0x0 but not desirable.
|
||||
0b # 0b0 but not desirable.
|
||||
@ -127,21 +127,44 @@ represents the value of 0.
|
||||
A string is enclosed in a pair of double quotes or single quotes.
|
||||
|
||||
A character in a string encosed in the double-quotes, when preceded with
|
||||
a back-slash, changes the meaning.
|
||||
a back-slash, changes the meaning.
|
||||
|
||||
- \\
|
||||
- \a
|
||||
- \b
|
||||
- \uXXXX
|
||||
- \UXXXXXXXX
|
||||
- \\a - alert
|
||||
- \\b - backspace
|
||||
- \\f - formfeed
|
||||
- \\n - newline
|
||||
- \\r - carriage return
|
||||
- \\t - horizontal tab
|
||||
- \\v - vertical tab
|
||||
- \\\\ - backslash
|
||||
- \\" - double quote
|
||||
|
||||
You can use \\u and \\U in a string to specify a character by unicode if
|
||||
[Character Type](@ref installation) chosen for building is the wide character
|
||||
type.
|
||||
You can specify a character with an octal number or a hexadecimal number.
|
||||
The actual value can range between 0 and 255 inclusive.
|
||||
|
||||
BEGIN {
|
||||
print "\uC720\uB2C8\uCF54\uB4DC \U00007D71\U00004E00\U000078BC";
|
||||
}
|
||||
- \\OOO - O is an octal digit.
|
||||
- \\xXX - X is a hexadecimal digit.
|
||||
|
||||
In the octal sequence, you can specify up to 3 octal digits after \\; In the
|
||||
hexadecimal sequence, you can specify as many hexadecimal digits as possible
|
||||
after \\x.
|
||||
|
||||
If the number doesn't fit in the range that the default character type
|
||||
can represent, the character generated from the sequence is undefined.
|
||||
|
||||
You can use \\u and \\U in a string to specify a character by a Unicode code
|
||||
point if [Character Type](@ref installation) chosen for building is the
|
||||
wide character type.
|
||||
|
||||
- \\uXXXX - X is a hexadecimal digit.
|
||||
- \\UXXXXXXXX - X is a hexadecimal digit.
|
||||
|
||||
|
||||
~~~~~{.awk}
|
||||
BEGIN {
|
||||
print "\uC720\uB2C8\uCF54\uB4DC \U00007D71\U00004E00\U000078BC";
|
||||
}
|
||||
~~~~~
|
||||
|
||||
This program should print 유니코드 統一碼.
|
||||
|
||||
|
@ -5415,11 +5415,11 @@ static int get_number (qse_awk_t* awk, qse_awk_tok_t* tok)
|
||||
static int get_string (
|
||||
qse_awk_t* awk, qse_char_t end_char,
|
||||
qse_char_t esc_char, int keep_esc_char,
|
||||
int preescaped, qse_awk_tok_t* tok)
|
||||
qse_size_t preescaped, qse_awk_tok_t* tok)
|
||||
{
|
||||
qse_cint_t c;
|
||||
int escaped = preescaped;
|
||||
int digit_count = 0;
|
||||
qse_size_t escaped = preescaped;
|
||||
qse_size_t digit_count = 0;
|
||||
qse_cint_t c_acc = 0;
|
||||
|
||||
while (1)
|
||||
@ -5440,6 +5440,8 @@ static int get_string (
|
||||
digit_count++;
|
||||
if (digit_count >= escaped)
|
||||
{
|
||||
/* should i limit the max to 0xFF/0377?
|
||||
* if (c_acc > 0377) c_acc = 0377;*/
|
||||
ADD_TOKEN_CHAR (awk, tok, c_acc);
|
||||
escaped = 0;
|
||||
}
|
||||
@ -5451,7 +5453,7 @@ static int get_string (
|
||||
escaped = 0;
|
||||
}
|
||||
}
|
||||
else if (escaped == 2 || escaped == 4 || escaped == 8)
|
||||
else if (escaped == QSE_TYPE_MAX(qse_size_t) || escaped == 4 || escaped == 8)
|
||||
{
|
||||
if (c >= QSE_T('0') && c <= QSE_T('9'))
|
||||
{
|
||||
@ -5490,9 +5492,8 @@ static int get_string (
|
||||
{
|
||||
qse_char_t rc;
|
||||
|
||||
rc = (escaped == 2)? QSE_T('x'):
|
||||
rc = (escaped == QSE_TYPE_MAX(qse_size_t))? QSE_T('x'):
|
||||
(escaped == 4)? QSE_T('u'): QSE_T('U');
|
||||
|
||||
if (digit_count == 0)
|
||||
ADD_TOKEN_CHAR (awk, tok, rc);
|
||||
else ADD_TOKEN_CHAR (awk, tok, c_acc);
|
||||
@ -5533,12 +5534,12 @@ static int get_string (
|
||||
}
|
||||
else if (c == QSE_T('x'))
|
||||
{
|
||||
escaped = 2;
|
||||
escaped = QSE_TYPE_MAX(qse_size_t);
|
||||
digit_count = 0;
|
||||
c_acc = 0;
|
||||
continue;
|
||||
}
|
||||
#ifdef QSE_CHAR_IS_WCHAR
|
||||
#if defined(QSE_CHAR_IS_WCHAR)
|
||||
else if (c == QSE_T('u') && QSE_SIZEOF(qse_char_t) >= 2)
|
||||
{
|
||||
escaped = 4;
|
||||
|
Loading…
Reference in New Issue
Block a user