fixed a bug in handling r and a command

2011-09-11 10:14:38 +00:00
parent 3db2c566a2
commit 00e15a42e9
6 changed files with 226 additions and 118 deletions
--- a/qse/lib/cmn/tre-match-utils.h
+++ b/qse/lib/cmn/tre-match-utils.h
@ -129,31 +129,26 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 /* Wide character support, no multibyte support. */

 #define GET_NEXT_WCHAR()						      \
-  do {									      \
-    prev_c = next_c;							      \
-    if (type == STR_BYTE)						      \
-      {									      \
-	pos++;								      \
-	if (len >= 0 && pos >= len)					      \
-	  next_c = '\0';						      \
-	else								      \
-	  next_c = (unsigned char)(*str_byte++);			      \
+do {									      \
+	prev_c = next_c;							      \
+	if (type == STR_BYTE)						      \
+	{									      \
+		pos++;								      \
+		if (len >= 0 && pos >= len) next_c = QSE_MT('\0'); \
+		else	next_c = (unsigned char)(*str_byte++);		  \
      }									      \
-    else if (type == STR_WIDE)						      \
-      {									      \
-	pos++;								      \
-	if (len >= 0 && pos >= len)					      \
-	  next_c = QSE_T('\0');						      \
-	else								      \
-	  next_c = *str_wide++;						      \
+	else if (type == STR_WIDE)						      \
+	{									      \
+		pos++;								      \
+		if (len >= 0 && pos >= len) next_c = QSE_T('\0');	\
+		else next_c = *str_wide++;					\
      }									      \
-    else if (type == STR_USER)						      \
-      {									      \
-        pos += pos_add_next;					      	      \
-	str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
-                                                 str_source->context);	      \
-      }									      \
-  } while(/*CONSTCOND*/0)
+	else if (type == STR_USER)						      \
+	{									      \
+		pos += pos_add_next;					      	      \
+		str_user_end = str_source->get_next_char(&next_c, &pos_add_next, str_source->context);	      \
+	}	\
+} while(/*CONSTCOND*/0)

 #endif /* !TRE_MULTIBYTE */

--- a/qse/lib/cmn/tre-parse.c
+++ b/qse/lib/cmn/tre-parse.c
@ -280,40 +280,40 @@ tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
 			else if (re + 1 < ctx->re_end
 			         && *re == CHAR_LBRACKET && *(re + 1) == CHAR_COLON)
 			{
-#if 0
-				char tmp_str[64];
-#endif
 				const tre_char_t *endptr = re + 2;
 				int len;
 				DPRINT(("tre_parse_bracket:  class: '%.*" STRF "'\n", REST(re)));
-				while (endptr < ctx->re_end && *endptr != CHAR_COLON)
-					endptr++;
+				while (endptr < ctx->re_end && *endptr != CHAR_COLON) endptr++;
 				if (endptr != ctx->re_end)
 				{
-					len = MIN(endptr - re - 2, 63);
-
-					if (qse_getctypebyxname (re + 2, len, &class) <= -1) status = REG_ECTYPE;
-
-					/* Optimize character classes for 8 bit character sets. */
-					if (status == REG_OK && TRE_MB_CUR_MAX == 1)
+					/* QSE: bug fix of not checking ending ] */
+					if (*(endptr + 1) != CHAR_RBRACKET) status = REG_ECTYPE;
+					else
 					{
-						status = tre_expand_ctype(ctx->mem, class, items,
+					/* END QSE */
+						len = MIN(endptr - re - 2, 63);
+
+						if (qse_getctypebyxname (re + 2, len, &class) <= -1) status = REG_ECTYPE;
+
+						/* Optimize character classes for 8 bit character sets. */
+						if (status == REG_OK && TRE_MB_CUR_MAX == 1)
+						{
+							status = tre_expand_ctype(ctx->mem, class, items,
 						                          &i, &max_i, ctx->cflags);
-						class = (tre_ctype_t)0;
-						skip = 1;
+							class = (tre_ctype_t)0;
+							skip = 1;
+						}
+						re = endptr + 2;
 					}
-					re = endptr + 2;
 				}
-				else
-					status = REG_ECTYPE;
+				else status = REG_ECTYPE;
 				min = 0;
 				max = TRE_CHAR_MAX;
 			}
 			else
 			{
 				DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
-				if (*re == CHAR_MINUS && *(re + 1) != CHAR_RBRACKET
-				        && ctx->re != re)
+				if (*re == CHAR_MINUS && *(re + 1) != CHAR_RBRACKET && ctx->re != re)
 					/* Two ranges are not allowed to share and endpoint. */
 					status = REG_ERANGE;
 				min = max = *re++;
--- a/qse/lib/cmn/tre.h
+++ b/qse/lib/cmn/tre.h
@ -55,6 +55,82 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #ifndef _QSE_LIB_CMN_TRE_H_
 #define _QSE_LIB_CMN_TRE_H_

+/* TODO: MAKE TRE WORK LIKE GNU
+
+PATTERN: \(.\{0,1\}\)\(~[^,]*\)\([0-9]\)\(\.*\),\([^;]*\)\(;\([^;]*\(\3[^;]*\)\).*X*\1\(.*\)\)
+INPUT: ~02.,3~3;0123456789;9876543210
+
+------------------------------------------------------
+samples/cmn/tre01 gives the following output. this does not seem wrong, though.
+
+SUBMATCH[7],[8],[9].
+
+SUBMATCH[0] = [~02.,3~3;0123456789;9876543210]
+SUBMATCH[1] = []
+SUBMATCH[2] = [~0]
+SUBMATCH[3] = [2]
+SUBMATCH[4] = [.]
+SUBMATCH[5] = [3~3]
+SUBMATCH[6] = [;0123456789;9876543210]
+SUBMATCH[7] = [012]
+SUBMATCH[8] = [2]
+SUBMATCH[9] = [3456789;9876543210
+
+------------------------------------------------------
+
+Using the GNU regcomp(),regexec(), the following
+is printed.
+
+#include <sys/types.h>
+#include <regex.h>
+#include <stdio.h>
+int main (int argc, char* argv[])
+{
+     regex_t tre;
+     regmatch_t mat[10];
+     int i;
+     regcomp (&tre, argv[1], 0);
+     regexec (&tre, argv[2], 10, mat, 0);
+     for (i = 0; i < 10; i++)
+     {
+          if (mat[i].rm_so == -1) break;
+          printf ("SUBMATCH[%u] = [%.*s]\n", i,
+               (int)(mat[i].rm_eo - mat[i].rm_so), &argv[2][mat[i].rm_so]);
+     }
+     regfree (&tre);
+     return 0;
+}
+
+SUBMATCH[0] = [~02.,3~3;0123456789;9876543210]
+SUBMATCH[1] = []
+SUBMATCH[2] = [~0]
+SUBMATCH[3] = [2]
+SUBMATCH[4] = [.]
+SUBMATCH[5] = [3~3]
+SUBMATCH[6] = [;0123456789;9876543210]
+SUBMATCH[7] = [0123456789]
+SUBMATCH[8] = [23456789]
+SUBMATCH[9] = []
+
+
+------------------------------------------------------
+One more example here:
+$ ./tre01 "\(x*\)ab\(\(c*\1\)\(.*\)\)" "abcdefg"
+Match: YES
+SUBMATCH[0] = [abcdefg]
+SUBMATCH[1] = []
+SUBMATCH[2] = [cdefg]
+SUBMATCH[3] = []
+SUBMATCH[4] = [cdefg]
+
+$ ./reg "\(x*\)ab\(\(c*\1\)\(.*\)\)" "abcdefg"
+SUBMATCH[0] = [abcdefg]
+SUBMATCH[1] = []
+SUBMATCH[2] = [cdefg]
+SUBMATCH[3] = [c]
+SUBMATCH[4] = [defg]
+*/
+
 #include <qse/cmn/tre.h>

 #ifdef QSE_CHAR_IS_WCHAR