changed the number of maximum digits after \x in an awk string
This commit is contained in:
parent
4191538b24
commit
164b85a6f9
@ -13,9 +13,9 @@ tranforms them to an internal form for execution.
|
|||||||
|
|
||||||
An QSEAWK program can be composed of the following elements at the top level.
|
An QSEAWK program can be composed of the following elements at the top level.
|
||||||
|
|
||||||
- pattern-action blocks
|
|
||||||
- *BEGIN* blocks
|
- *BEGIN* blocks
|
||||||
- *END* blocks
|
- *END* blocks
|
||||||
|
- pattern-action blocks
|
||||||
- user-defined functions
|
- user-defined functions
|
||||||
- comments
|
- comments
|
||||||
- \@global variables
|
- \@global variables
|
||||||
@ -107,17 +107,17 @@ point with a preceeding number.
|
|||||||
34.56e # 34.56
|
34.56e # 34.56
|
||||||
34.56E3
|
34.56E3
|
||||||
|
|
||||||
An integer can be prefixed with 0x, 0, 0b for a hexa-decimal number, an octal number,
|
An integer can be prefixed with 0x, 0, 0b for a hexa-decimal number, an octal
|
||||||
and a binary number respectively. For a hexa-decimal number, letters from A to F
|
number, and a binary number respectively. For a hexa-decimal number, letters
|
||||||
can form a number case-insenstively in addition to numeric digits.
|
from A to F can form a number case-insenstively in addition to numeric digits.
|
||||||
|
|
||||||
0xA1 # 161
|
0xA1 # 161
|
||||||
0xB0b0 # 45232
|
0xB0b0 # 45232
|
||||||
020 # 16
|
020 # 16
|
||||||
0b101 # 5
|
0b101 # 5
|
||||||
|
|
||||||
If the prefix is not followed by any numeric digits, it is still a valid token and
|
If the prefix is not followed by any numeric digits, it is still a valid token
|
||||||
represents the value of 0.
|
and represents the value of 0.
|
||||||
|
|
||||||
0x # 0x0 but not desirable.
|
0x # 0x0 but not desirable.
|
||||||
0b # 0b0 but not desirable.
|
0b # 0b0 but not desirable.
|
||||||
@ -129,19 +129,42 @@ A string is enclosed in a pair of double quotes or single quotes.
|
|||||||
A character in a string encosed in the double-quotes, when preceded with
|
A character in a string encosed in the double-quotes, when preceded with
|
||||||
a back-slash, changes the meaning.
|
a back-slash, changes the meaning.
|
||||||
|
|
||||||
- \\
|
- \\a - alert
|
||||||
- \a
|
- \\b - backspace
|
||||||
- \b
|
- \\f - formfeed
|
||||||
- \uXXXX
|
- \\n - newline
|
||||||
- \UXXXXXXXX
|
- \\r - carriage return
|
||||||
|
- \\t - horizontal tab
|
||||||
|
- \\v - vertical tab
|
||||||
|
- \\\\ - backslash
|
||||||
|
- \\" - double quote
|
||||||
|
|
||||||
You can use \\u and \\U in a string to specify a character by unicode if
|
You can specify a character with an octal number or a hexadecimal number.
|
||||||
[Character Type](@ref installation) chosen for building is the wide character
|
The actual value can range between 0 and 255 inclusive.
|
||||||
type.
|
|
||||||
|
|
||||||
|
- \\OOO - O is an octal digit.
|
||||||
|
- \\xXX - X is a hexadecimal digit.
|
||||||
|
|
||||||
|
In the octal sequence, you can specify up to 3 octal digits after \\; In the
|
||||||
|
hexadecimal sequence, you can specify as many hexadecimal digits as possible
|
||||||
|
after \\x.
|
||||||
|
|
||||||
|
If the number doesn't fit in the range that the default character type
|
||||||
|
can represent, the character generated from the sequence is undefined.
|
||||||
|
|
||||||
|
You can use \\u and \\U in a string to specify a character by a Unicode code
|
||||||
|
point if [Character Type](@ref installation) chosen for building is the
|
||||||
|
wide character type.
|
||||||
|
|
||||||
|
- \\uXXXX - X is a hexadecimal digit.
|
||||||
|
- \\UXXXXXXXX - X is a hexadecimal digit.
|
||||||
|
|
||||||
|
|
||||||
|
~~~~~{.awk}
|
||||||
BEGIN {
|
BEGIN {
|
||||||
print "\uC720\uB2C8\uCF54\uB4DC \U00007D71\U00004E00\U000078BC";
|
print "\uC720\uB2C8\uCF54\uB4DC \U00007D71\U00004E00\U000078BC";
|
||||||
}
|
}
|
||||||
|
~~~~~
|
||||||
|
|
||||||
This program should print 유니코드 統一碼.
|
This program should print 유니코드 統一碼.
|
||||||
|
|
||||||
|
@ -5415,11 +5415,11 @@ static int get_number (qse_awk_t* awk, qse_awk_tok_t* tok)
|
|||||||
static int get_string (
|
static int get_string (
|
||||||
qse_awk_t* awk, qse_char_t end_char,
|
qse_awk_t* awk, qse_char_t end_char,
|
||||||
qse_char_t esc_char, int keep_esc_char,
|
qse_char_t esc_char, int keep_esc_char,
|
||||||
int preescaped, qse_awk_tok_t* tok)
|
qse_size_t preescaped, qse_awk_tok_t* tok)
|
||||||
{
|
{
|
||||||
qse_cint_t c;
|
qse_cint_t c;
|
||||||
int escaped = preescaped;
|
qse_size_t escaped = preescaped;
|
||||||
int digit_count = 0;
|
qse_size_t digit_count = 0;
|
||||||
qse_cint_t c_acc = 0;
|
qse_cint_t c_acc = 0;
|
||||||
|
|
||||||
while (1)
|
while (1)
|
||||||
@ -5440,6 +5440,8 @@ static int get_string (
|
|||||||
digit_count++;
|
digit_count++;
|
||||||
if (digit_count >= escaped)
|
if (digit_count >= escaped)
|
||||||
{
|
{
|
||||||
|
/* should i limit the max to 0xFF/0377?
|
||||||
|
* if (c_acc > 0377) c_acc = 0377;*/
|
||||||
ADD_TOKEN_CHAR (awk, tok, c_acc);
|
ADD_TOKEN_CHAR (awk, tok, c_acc);
|
||||||
escaped = 0;
|
escaped = 0;
|
||||||
}
|
}
|
||||||
@ -5451,7 +5453,7 @@ static int get_string (
|
|||||||
escaped = 0;
|
escaped = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (escaped == 2 || escaped == 4 || escaped == 8)
|
else if (escaped == QSE_TYPE_MAX(qse_size_t) || escaped == 4 || escaped == 8)
|
||||||
{
|
{
|
||||||
if (c >= QSE_T('0') && c <= QSE_T('9'))
|
if (c >= QSE_T('0') && c <= QSE_T('9'))
|
||||||
{
|
{
|
||||||
@ -5490,9 +5492,8 @@ static int get_string (
|
|||||||
{
|
{
|
||||||
qse_char_t rc;
|
qse_char_t rc;
|
||||||
|
|
||||||
rc = (escaped == 2)? QSE_T('x'):
|
rc = (escaped == QSE_TYPE_MAX(qse_size_t))? QSE_T('x'):
|
||||||
(escaped == 4)? QSE_T('u'): QSE_T('U');
|
(escaped == 4)? QSE_T('u'): QSE_T('U');
|
||||||
|
|
||||||
if (digit_count == 0)
|
if (digit_count == 0)
|
||||||
ADD_TOKEN_CHAR (awk, tok, rc);
|
ADD_TOKEN_CHAR (awk, tok, rc);
|
||||||
else ADD_TOKEN_CHAR (awk, tok, c_acc);
|
else ADD_TOKEN_CHAR (awk, tok, c_acc);
|
||||||
@ -5533,12 +5534,12 @@ static int get_string (
|
|||||||
}
|
}
|
||||||
else if (c == QSE_T('x'))
|
else if (c == QSE_T('x'))
|
||||||
{
|
{
|
||||||
escaped = 2;
|
escaped = QSE_TYPE_MAX(qse_size_t);
|
||||||
digit_count = 0;
|
digit_count = 0;
|
||||||
c_acc = 0;
|
c_acc = 0;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
#ifdef QSE_CHAR_IS_WCHAR
|
#if defined(QSE_CHAR_IS_WCHAR)
|
||||||
else if (c == QSE_T('u') && QSE_SIZEOF(qse_char_t) >= 2)
|
else if (c == QSE_T('u') && QSE_SIZEOF(qse_char_t) >= 2)
|
||||||
{
|
{
|
||||||
escaped = 4;
|
escaped = 4;
|
||||||
|
Loading…
Reference in New Issue
Block a user