fixed a bug in handling r and a command

This commit is contained in:
2011-09-11 10:14:38 +00:00
parent 3db2c566a2
commit 00e15a42e9
6 changed files with 226 additions and 118 deletions

View File

@ -129,31 +129,26 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/* Wide character support, no multibyte support. */
#define GET_NEXT_WCHAR() \
do { \
prev_c = next_c; \
if (type == STR_BYTE) \
{ \
pos++; \
if (len >= 0 && pos >= len) \
next_c = '\0'; \
else \
next_c = (unsigned char)(*str_byte++); \
do { \
prev_c = next_c; \
if (type == STR_BYTE) \
{ \
pos++; \
if (len >= 0 && pos >= len) next_c = QSE_MT('\0'); \
else next_c = (unsigned char)(*str_byte++); \
} \
else if (type == STR_WIDE) \
{ \
pos++; \
if (len >= 0 && pos >= len) \
next_c = QSE_T('\0'); \
else \
next_c = *str_wide++; \
else if (type == STR_WIDE) \
{ \
pos++; \
if (len >= 0 && pos >= len) next_c = QSE_T('\0'); \
else next_c = *str_wide++; \
} \
else if (type == STR_USER) \
{ \
pos += pos_add_next; \
str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
str_source->context); \
} \
} while(/*CONSTCOND*/0)
else if (type == STR_USER) \
{ \
pos += pos_add_next; \
str_user_end = str_source->get_next_char(&next_c, &pos_add_next, str_source->context); \
} \
} while(/*CONSTCOND*/0)
#endif /* !TRE_MULTIBYTE */

View File

@ -280,40 +280,40 @@ tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
else if (re + 1 < ctx->re_end
&& *re == CHAR_LBRACKET && *(re + 1) == CHAR_COLON)
{
#if 0
char tmp_str[64];
#endif
const tre_char_t *endptr = re + 2;
int len;
DPRINT(("tre_parse_bracket: class: '%.*" STRF "'\n", REST(re)));
while (endptr < ctx->re_end && *endptr != CHAR_COLON)
endptr++;
while (endptr < ctx->re_end && *endptr != CHAR_COLON) endptr++;
if (endptr != ctx->re_end)
{
len = MIN(endptr - re - 2, 63);
if (qse_getctypebyxname (re + 2, len, &class) <= -1) status = REG_ECTYPE;
/* Optimize character classes for 8 bit character sets. */
if (status == REG_OK && TRE_MB_CUR_MAX == 1)
/* QSE: bug fix of not checking ending ] */
if (*(endptr + 1) != CHAR_RBRACKET) status = REG_ECTYPE;
else
{
status = tre_expand_ctype(ctx->mem, class, items,
/* END QSE */
len = MIN(endptr - re - 2, 63);
if (qse_getctypebyxname (re + 2, len, &class) <= -1) status = REG_ECTYPE;
/* Optimize character classes for 8 bit character sets. */
if (status == REG_OK && TRE_MB_CUR_MAX == 1)
{
status = tre_expand_ctype(ctx->mem, class, items,
&i, &max_i, ctx->cflags);
class = (tre_ctype_t)0;
skip = 1;
class = (tre_ctype_t)0;
skip = 1;
}
re = endptr + 2;
}
re = endptr + 2;
}
else
status = REG_ECTYPE;
else status = REG_ECTYPE;
min = 0;
max = TRE_CHAR_MAX;
}
else
{
DPRINT(("tre_parse_bracket: char: '%.*" STRF "'\n", REST(re)));
if (*re == CHAR_MINUS && *(re + 1) != CHAR_RBRACKET
&& ctx->re != re)
if (*re == CHAR_MINUS && *(re + 1) != CHAR_RBRACKET && ctx->re != re)
/* Two ranges are not allowed to share and endpoint. */
status = REG_ERANGE;
min = max = *re++;

View File

@ -55,6 +55,82 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef _QSE_LIB_CMN_TRE_H_
#define _QSE_LIB_CMN_TRE_H_
/* TODO: MAKE TRE WORK LIKE GNU
PATTERN: \(.\{0,1\}\)\(~[^,]*\)\([0-9]\)\(\.*\),\([^;]*\)\(;\([^;]*\(\3[^;]*\)\).*X*\1\(.*\)\)
INPUT: ~02.,3~3;0123456789;9876543210
------------------------------------------------------
samples/cmn/tre01 gives the following output. this does not seem wrong, though.
SUBMATCH[7],[8],[9].
SUBMATCH[0] = [~02.,3~3;0123456789;9876543210]
SUBMATCH[1] = []
SUBMATCH[2] = [~0]
SUBMATCH[3] = [2]
SUBMATCH[4] = [.]
SUBMATCH[5] = [3~3]
SUBMATCH[6] = [;0123456789;9876543210]
SUBMATCH[7] = [012]
SUBMATCH[8] = [2]
SUBMATCH[9] = [3456789;9876543210
------------------------------------------------------
Using the GNU regcomp(),regexec(), the following
is printed.
#include <sys/types.h>
#include <regex.h>
#include <stdio.h>
int main (int argc, char* argv[])
{
regex_t tre;
regmatch_t mat[10];
int i;
regcomp (&tre, argv[1], 0);
regexec (&tre, argv[2], 10, mat, 0);
for (i = 0; i < 10; i++)
{
if (mat[i].rm_so == -1) break;
printf ("SUBMATCH[%u] = [%.*s]\n", i,
(int)(mat[i].rm_eo - mat[i].rm_so), &argv[2][mat[i].rm_so]);
}
regfree (&tre);
return 0;
}
SUBMATCH[0] = [~02.,3~3;0123456789;9876543210]
SUBMATCH[1] = []
SUBMATCH[2] = [~0]
SUBMATCH[3] = [2]
SUBMATCH[4] = [.]
SUBMATCH[5] = [3~3]
SUBMATCH[6] = [;0123456789;9876543210]
SUBMATCH[7] = [0123456789]
SUBMATCH[8] = [23456789]
SUBMATCH[9] = []
------------------------------------------------------
One more example here:
$ ./tre01 "\(x*\)ab\(\(c*\1\)\(.*\)\)" "abcdefg"
Match: YES
SUBMATCH[0] = [abcdefg]
SUBMATCH[1] = []
SUBMATCH[2] = [cdefg]
SUBMATCH[3] = []
SUBMATCH[4] = [cdefg]
$ ./reg "\(x*\)ab\(\(c*\1\)\(.*\)\)" "abcdefg"
SUBMATCH[0] = [abcdefg]
SUBMATCH[1] = []
SUBMATCH[2] = [cdefg]
SUBMATCH[3] = [c]
SUBMATCH[4] = [defg]
*/
#include <qse/cmn/tre.h>
#ifdef QSE_CHAR_IS_WCHAR