changed the number of maximum digits after \x in an awk string
This commit is contained in:
		| @ -13,9 +13,9 @@ tranforms them to an internal form for execution. | |||||||
|  |  | ||||||
| An QSEAWK program can be composed of the following elements at the top level. | An QSEAWK program can be composed of the following elements at the top level. | ||||||
|  |  | ||||||
|  - pattern-action blocks |  | ||||||
|  - *BEGIN* blocks |  - *BEGIN* blocks | ||||||
|  - *END* blocks |  - *END* blocks | ||||||
|  |  - pattern-action blocks | ||||||
|  - user-defined functions |  - user-defined functions | ||||||
|  - comments |  - comments | ||||||
|  - \@global variables |  - \@global variables | ||||||
| @ -107,17 +107,17 @@ point with a preceeding number. | |||||||
|     34.56e # 34.56  |     34.56e # 34.56  | ||||||
|     34.56E3  |     34.56E3  | ||||||
|  |  | ||||||
| An integer can be prefixed with 0x, 0, 0b for a hexa-decimal number, an octal number, | An integer can be prefixed with 0x, 0, 0b for a hexa-decimal number, an octal  | ||||||
| and a binary number respectively. For a hexa-decimal number, letters from A to F | number, and a binary number respectively. For a hexa-decimal number, letters  | ||||||
| can form a number case-insenstively in addition to numeric digits. | from A to F can form a number case-insenstively in addition to numeric digits. | ||||||
|  |  | ||||||
|     0xA1   # 161 |     0xA1   # 161 | ||||||
|     0xB0b0 # 45232 |     0xB0b0 # 45232 | ||||||
|     020    # 16 |     020    # 16 | ||||||
|     0b101  # 5 |     0b101  # 5 | ||||||
|  |  | ||||||
| If the prefix is not followed by any numeric digits, it is still a valid token and | If the prefix is not followed by any numeric digits, it is still a valid token  | ||||||
| represents the value of 0. | and represents the value of 0. | ||||||
|  |  | ||||||
|     0x # 0x0 but not desirable. |     0x # 0x0 but not desirable. | ||||||
|     0b # 0b0 but not desirable. |     0b # 0b0 but not desirable. | ||||||
| @ -129,19 +129,42 @@ A string is enclosed in a pair of double quotes or single quotes. | |||||||
| A character in a string encosed in the double-quotes, when preceded with  | A character in a string encosed in the double-quotes, when preceded with  | ||||||
| a back-slash, changes the meaning.  | a back-slash, changes the meaning.  | ||||||
|  |  | ||||||
|  - \\ |  - \\a - alert | ||||||
|  - \a |  - \\b - backspace | ||||||
|  - \b |  - \\f - formfeed | ||||||
|  - \uXXXX |  - \\n - newline | ||||||
|  - \UXXXXXXXX |  - \\r - carriage return | ||||||
|  |  - \\t - horizontal tab | ||||||
|  |  - \\v - vertical tab | ||||||
|  |  - \\\\ - backslash | ||||||
|  |  - \\" - double quote | ||||||
|  |  | ||||||
| You can use \\u and \\U in a string to specify a character by unicode if   | You can specify a character with an octal number or a hexadecimal number. | ||||||
| [Character Type](@ref installation) chosen for building is the wide character | The actual value can range between 0 and 255 inclusive. | ||||||
| type. |  | ||||||
|  |  | ||||||
|     BEGIN {  |  - \\OOO - O is an octal digit.   | ||||||
|         print "\uC720\uB2C8\uCF54\uB4DC \U00007D71\U00004E00\U000078BC";  |  - \\xXX - X is a hexadecimal digit.  | ||||||
|     } |  | ||||||
|  | In the octal sequence, you can specify up to 3 octal digits after \\; In the  | ||||||
|  | hexadecimal sequence, you can specify as many hexadecimal digits as possible  | ||||||
|  | after \\x.   | ||||||
|  |  | ||||||
|  | If the number doesn't fit in the range that the default character type  | ||||||
|  | can represent, the character generated from the sequence is undefined. | ||||||
|  |  | ||||||
|  | You can use \\u and \\U in a string to specify a character by a Unicode code  | ||||||
|  | point if  [Character Type](@ref installation) chosen for building is the  | ||||||
|  | wide character type. | ||||||
|  |  | ||||||
|  |  - \\uXXXX - X is a hexadecimal digit. | ||||||
|  |  - \\UXXXXXXXX - X is a hexadecimal digit. | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ~~~~~{.awk} | ||||||
|  |  BEGIN {  | ||||||
|  |    print "\uC720\uB2C8\uCF54\uB4DC \U00007D71\U00004E00\U000078BC";  | ||||||
|  |  } | ||||||
|  | ~~~~~ | ||||||
|  |  | ||||||
| This program should print 유니코드 統一碼. | This program should print 유니코드 統一碼. | ||||||
|  |  | ||||||
|  | |||||||
| @ -5415,11 +5415,11 @@ static int get_number (qse_awk_t* awk, qse_awk_tok_t* tok) | |||||||
| static int get_string ( | static int get_string ( | ||||||
| 	qse_awk_t* awk, qse_char_t end_char,  | 	qse_awk_t* awk, qse_char_t end_char,  | ||||||
| 	qse_char_t esc_char, int keep_esc_char, | 	qse_char_t esc_char, int keep_esc_char, | ||||||
| 	int preescaped, qse_awk_tok_t* tok) | 	qse_size_t preescaped, qse_awk_tok_t* tok) | ||||||
| { | { | ||||||
| 	qse_cint_t c; | 	qse_cint_t c; | ||||||
| 	int escaped = preescaped; | 	qse_size_t escaped = preescaped; | ||||||
| 	int digit_count = 0; | 	qse_size_t digit_count = 0; | ||||||
| 	qse_cint_t c_acc = 0; | 	qse_cint_t c_acc = 0; | ||||||
|  |  | ||||||
| 	while (1) | 	while (1) | ||||||
| @ -5440,6 +5440,8 @@ static int get_string ( | |||||||
| 				digit_count++; | 				digit_count++; | ||||||
| 				if (digit_count >= escaped)  | 				if (digit_count >= escaped)  | ||||||
| 				{ | 				{ | ||||||
|  | 					/* should i limit the max to 0xFF/0377?  | ||||||
|  | 					 * if (c_acc > 0377) c_acc = 0377;*/ | ||||||
| 					ADD_TOKEN_CHAR (awk, tok, c_acc); | 					ADD_TOKEN_CHAR (awk, tok, c_acc); | ||||||
| 					escaped = 0; | 					escaped = 0; | ||||||
| 				} | 				} | ||||||
| @ -5451,7 +5453,7 @@ static int get_string ( | |||||||
| 				escaped = 0; | 				escaped = 0; | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| 		else if (escaped == 2 || escaped == 4 || escaped == 8) | 		else if (escaped == QSE_TYPE_MAX(qse_size_t) || escaped == 4 || escaped == 8) | ||||||
| 		{ | 		{ | ||||||
| 			if (c >= QSE_T('0') && c <= QSE_T('9')) | 			if (c >= QSE_T('0') && c <= QSE_T('9')) | ||||||
| 			{ | 			{ | ||||||
| @ -5490,9 +5492,8 @@ static int get_string ( | |||||||
| 			{ | 			{ | ||||||
| 				qse_char_t rc; | 				qse_char_t rc; | ||||||
|  |  | ||||||
| 				rc = (escaped == 2)? QSE_T('x'): | 				rc = (escaped == QSE_TYPE_MAX(qse_size_t))? QSE_T('x'): | ||||||
| 				     (escaped == 4)? QSE_T('u'): QSE_T('U'); | 				     (escaped == 4)? QSE_T('u'): QSE_T('U'); | ||||||
|  |  | ||||||
| 				if (digit_count == 0)  | 				if (digit_count == 0)  | ||||||
| 					ADD_TOKEN_CHAR (awk, tok, rc); | 					ADD_TOKEN_CHAR (awk, tok, rc); | ||||||
| 				else ADD_TOKEN_CHAR (awk, tok, c_acc); | 				else ADD_TOKEN_CHAR (awk, tok, c_acc); | ||||||
| @ -5533,12 +5534,12 @@ static int get_string ( | |||||||
| 			} | 			} | ||||||
| 			else if (c == QSE_T('x'))  | 			else if (c == QSE_T('x'))  | ||||||
| 			{ | 			{ | ||||||
| 				escaped = 2; | 				escaped = QSE_TYPE_MAX(qse_size_t); | ||||||
| 				digit_count = 0; | 				digit_count = 0; | ||||||
| 				c_acc = 0; | 				c_acc = 0; | ||||||
| 				continue; | 				continue; | ||||||
| 			} | 			} | ||||||
| 		#ifdef QSE_CHAR_IS_WCHAR | 		#if defined(QSE_CHAR_IS_WCHAR) | ||||||
| 			else if (c == QSE_T('u') && QSE_SIZEOF(qse_char_t) >= 2)  | 			else if (c == QSE_T('u') && QSE_SIZEOF(qse_char_t) >= 2)  | ||||||
| 			{ | 			{ | ||||||
| 				escaped = 4; | 				escaped = 4; | ||||||
|  | |||||||
		Reference in New Issue
	
	Block a user