fixed a bug in handling a regular expression starting with a backslash.
- a regular expression like /\// could not be handled properly without this fix
This commit is contained in:
		| @ -1,5 +1,5 @@ | ||||
| /* | ||||
|  * $Id: err.c 75 2009-02-22 14:10:34Z hyunghwan.chung $ | ||||
|  * $Id: err.c 113 2009-03-25 14:53:10Z hyunghwan.chung $ | ||||
|  * | ||||
|    Copyright 2006-2009 Chung, Hyung-Hwan. | ||||
|  | ||||
| @ -65,7 +65,7 @@ static const qse_char_t* __geterrstr (int errnum) | ||||
|  | ||||
| 		QSE_T("unexpected end of source"), | ||||
| 		QSE_T("a comment not closed properly"), | ||||
| 		QSE_T("a string not closed with a quote"), | ||||
| 		QSE_T("a string or a regular expression not closed"), | ||||
| 		QSE_T("unexpected end of a regular expression"), | ||||
| 		QSE_T("a left brace expected in place of '${0}'"), | ||||
| 		QSE_T("a left parenthesis expected in place of '${0}'"), | ||||
|  | ||||
| @ -1,5 +1,5 @@ | ||||
| /* | ||||
|  * $Id: parse.c 85 2009-02-26 10:56:12Z hyunghwan.chung $ | ||||
|  * $Id: parse.c 113 2009-03-25 14:53:10Z hyunghwan.chung $ | ||||
|  * | ||||
|    Copyright 2006-2009 Chung, Hyung-Hwan. | ||||
|  | ||||
| @ -220,7 +220,7 @@ static int get_charstr (qse_awk_t* awk); | ||||
| static int get_rexstr (qse_awk_t* awk); | ||||
| static int get_string ( | ||||
| 	qse_awk_t* awk, qse_char_t end_char, | ||||
| 	qse_char_t esc_char, qse_bool_t keep_esc_char); | ||||
| 	qse_char_t esc_char, qse_bool_t keep_esc_char, int preescaped); | ||||
| static int get_char (qse_awk_t* awk); | ||||
| static int unget_char (qse_awk_t* awk, qse_cint_t c); | ||||
| static int skip_spaces (qse_awk_t* awk); | ||||
| @ -2930,7 +2930,9 @@ static qse_awk_nde_t* parse_primary (qse_awk_t* awk, qse_size_t line) | ||||
| 		int errnum; | ||||
|  | ||||
| 		/* the regular expression is tokenized here because  | ||||
| 		 * of the context-sensitivity of the slash symbol */ | ||||
| 		 * of the context-sensitivity of the slash symbol. | ||||
| 		 * if TOKEN_DIV is seen as a primary, it tries to compile | ||||
| 		 * it as a regular expression */ | ||||
| 		SET_TOKEN_TYPE (awk, TOKEN_REX); | ||||
|  | ||||
| 		qse_str_clear (awk->token.name); | ||||
| @ -4567,7 +4569,6 @@ static qse_awk_nde_t* parse_print (qse_awk_t* awk, qse_size_t line, int type) | ||||
| 	return (qse_awk_nde_t*)nde; | ||||
| } | ||||
|  | ||||
|  | ||||
| static int get_token (qse_awk_t* awk) | ||||
| { | ||||
| 	qse_cint_t c; | ||||
| @ -5091,7 +5092,7 @@ static int get_charstr (qse_awk_t* awk) | ||||
| 		 * has been called */ | ||||
| 		ADD_TOKEN_CHAR (awk, awk->src.lex.curc); | ||||
| 	} | ||||
| 	return get_string (awk, QSE_T('\"'), QSE_T('\\'), QSE_FALSE); | ||||
| 	return get_string (awk, QSE_T('\"'), QSE_T('\\'), QSE_FALSE, 0); | ||||
| } | ||||
|  | ||||
| static int get_rexstr (qse_awk_t* awk) | ||||
| @ -5099,23 +5100,44 @@ static int get_rexstr (qse_awk_t* awk) | ||||
| 	if (awk->src.lex.curc == QSE_T('/'))  | ||||
| 	{ | ||||
| 		/* this part of the function is different from get_charstr | ||||
| 		 * because of the way this function is called */ | ||||
| 		 * because of the way this function is called.  | ||||
| 		 * this condition is met when the input is //. | ||||
| 		 * the first / has been tokenized to TOKEN_DIV already. | ||||
| 		 * if TOKEN_DIV is seen as a primary, this function is called. | ||||
| 		 * as the token buffer has been cleared by the caller and | ||||
| 		 * the token type is set to TOKEN_REX, this function can | ||||
| 		 * just return after reading the next character */ | ||||
| 		GET_CHAR (awk); | ||||
| 		return 0; | ||||
| 	} | ||||
| 	else  | ||||
| 	{ | ||||
| 		ADD_TOKEN_CHAR (awk, awk->src.lex.curc); | ||||
| 		return get_string (awk, QSE_T('/'), QSE_T('\\'), QSE_TRUE); | ||||
| 		int escaped = 0; | ||||
| 		if (awk->src.lex.curc == QSE_T('\\'))  | ||||
| 		{		 | ||||
| 			/* for input like /\//, this condition is met.  | ||||
| 			 * the initial escape character is added when the | ||||
| 			 * second charater is handled in get_string() */ | ||||
| 			escaped = 1; | ||||
| 		} | ||||
| 		else  | ||||
| 		{ | ||||
| 			/* add other initial characters here as get_string() | ||||
| 			 * begins with reading the next character */ | ||||
| 			ADD_TOKEN_CHAR (awk, awk->src.lex.curc); | ||||
| 		} | ||||
| 		return get_string (awk,  | ||||
| 			QSE_T('/'), QSE_T('\\'), QSE_TRUE, escaped); | ||||
| 	} | ||||
| } | ||||
|  | ||||
| static int get_string ( | ||||
| 	qse_awk_t* awk, qse_char_t end_char,  | ||||
| 	qse_char_t esc_char, qse_bool_t keep_esc_char) | ||||
| 	qse_char_t esc_char, qse_bool_t keep_esc_char, | ||||
| 	int preescaped) | ||||
| { | ||||
| 	qse_cint_t c; | ||||
| 	int escaped = 0; | ||||
| 	int escaped = preescaped; | ||||
| 	int digit_count = 0; | ||||
| 	qse_cint_t c_acc = 0; | ||||
|  | ||||
|  | ||||
| @ -152,6 +152,7 @@ const qse_char_t* qse_sed_geterrmsg (qse_sed_t* sed) | ||||
| 		QSE_T("address 2 prohibited"), | ||||
| 		QSE_T("a new line expected"), | ||||
| 		QSE_T("a backslash expected"), | ||||
| 		QSE_T("a backslash used as a delimiter"), | ||||
| 		QSE_T("garbage after a backslash"), | ||||
| 		QSE_T("a semicolon expected"), | ||||
| 		QSE_T("label name too long"), | ||||
| @ -159,7 +160,7 @@ const qse_char_t* qse_sed_geterrmsg (qse_sed_t* sed) | ||||
| 		QSE_T("duplicate label name"), | ||||
| 		QSE_T("empty file name"), | ||||
| 		QSE_T("illegal file name"), | ||||
| 		QSE_T("translation set not terminated"), | ||||
| 		QSE_T("command not terminated properly"), | ||||
| 		QSE_T("strings in translation set not the same length"), | ||||
| 		QSE_T("group brackets not balanced"), | ||||
| 		QSE_T("group nesting too deep") | ||||
| @ -227,7 +228,6 @@ static void* compile_regex (qse_sed_t* sed, qse_char_t rxend) | ||||
| 			} | ||||
|  | ||||
| 			if (c == QSE_T('n')) c = QSE_T('\n'); | ||||
| 			else if (c == QSE_T('r')) c = QSE_T('\r'); | ||||
| 			// TODO: support more escaped characters?? | ||||
| 		} | ||||
|  | ||||
| @ -576,7 +576,6 @@ static int get_file_name (qse_sed_t* sed, qse_sed_cmd_t* cmd) | ||||
| 			} | ||||
|  | ||||
| 			if (c == QSE_T('n')) c = QSE_T('\n'); | ||||
| 			else if (c == QSE_T('r')) c = QSE_T('\r'); | ||||
| 		} | ||||
|  | ||||
| 		if (qse_str_ccat (t, c) == (qse_size_t)-1)  | ||||
| @ -613,15 +612,16 @@ static int get_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd) | ||||
| 	c = CURSC (sed); | ||||
| 	if (c == QSE_CHAR_EOF || IS_LINTERM(c)) | ||||
| 	{ | ||||
| 		//sed->errnum = QSE_SED_ESUNTR; | ||||
| 		/* not terminated properly */ | ||||
| 		sed->errnum = QSE_SED_ENOTRM; | ||||
| 		goto oops; | ||||
| 	} | ||||
|  | ||||
| 	delim = c;	 | ||||
| 	if (delim == QSE_T('\\')) | ||||
| 	{ | ||||
| 		/* illegal delimiter */ | ||||
| 		//sed->errnum = QSE_SED_ESUILD; | ||||
| 		/* backspace is an illegal delimiter */ | ||||
| 		sed->errnum = QSE_SED_EBSDEL; | ||||
| 		goto oops; | ||||
| 	} | ||||
|  | ||||
| @ -635,7 +635,32 @@ static int get_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd) | ||||
| 	c = NXTSC (sed); | ||||
| 	while (c != delim) | ||||
| 	{ | ||||
| 	} | ||||
| 		if (c == QSE_CHAR_EOF || IS_LINTERM(c)) | ||||
| 		{ | ||||
| 			sed->errnum = QSE_SED_ENOTRM; | ||||
| 			goto oops; | ||||
| 		} | ||||
|  | ||||
| 		if (c == QSE_T('\\')) | ||||
| 		{ | ||||
| 			c = NXTSC (sed); | ||||
| 			if (c == QSE_CHAR_EOF || IS_LINTERM(c)) | ||||
| 			{ | ||||
| 				sed->errnum = QSE_SED_ENOTRM; | ||||
| 				goto oops; | ||||
| 			} | ||||
|  | ||||
| 			if (c == QSE_T('n')) c = QSE_T('\n'); | ||||
| 		} | ||||
|  | ||||
| 		if (qse_str_ccat (t, c) == (qse_size_t)-1) | ||||
| 		{ | ||||
| 			sed->errnum = QSE_SED_ENOMEM; | ||||
| 			goto oops; | ||||
| 		} | ||||
|  | ||||
| 		c = NXTSC (sed); | ||||
| 	}	 | ||||
|  | ||||
| oops: | ||||
| 	if (t != QSE_NULL) qse_str_close (t); | ||||
| @ -652,11 +677,17 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd) | ||||
| 	if (c == QSE_CHAR_EOF || IS_LINTERM(c)) | ||||
| 	{ | ||||
| 		/* translation set terminated prematurely*/ | ||||
| 		sed->errnum = QSE_SED_ETSNTR; | ||||
| 		sed->errnum = QSE_SED_ENOTRM; | ||||
| 		goto oops; | ||||
| 	} | ||||
|  | ||||
| 	delim = c;	 | ||||
| 	if (delim == QSE_T('\\')) | ||||
| 	{ | ||||
| 		/* backspace is an illegal delimiter */ | ||||
| 		sed->errnum = QSE_SED_EBSDEL; | ||||
| 		goto oops; | ||||
| 	} | ||||
|  | ||||
| 	t = qse_str_open (sed->mmgr, 0, 32); | ||||
| 	if (t == QSE_NULL)  | ||||
| @ -672,7 +703,7 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd) | ||||
|  | ||||
| 		if (c == QSE_CHAR_EOF || IS_LINTERM(c)) | ||||
| 		{ | ||||
| 			sed->errnum = QSE_SED_ETSNTR; | ||||
| 			sed->errnum = QSE_SED_ENOTRM; | ||||
| 			goto oops; | ||||
| 		} | ||||
|  | ||||
| @ -681,12 +712,11 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd) | ||||
| 			c = NXTSC (sed); | ||||
| 			if (c == QSE_CHAR_EOF || IS_LINTERM(c)) | ||||
| 			{ | ||||
| 				sed->errnum = QSE_SED_ETSNTR; | ||||
| 				sed->errnum = QSE_SED_ENOTRM; | ||||
| 				goto oops; | ||||
| 			} | ||||
|  | ||||
| 			if (c == QSE_T('n')) c = QSE_T('\n'); | ||||
| 			else if (c == QSE_T('r')) c = QSE_T('\r'); | ||||
| 		} | ||||
|  | ||||
| 		b[0] = c; | ||||
| @ -704,7 +734,7 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd) | ||||
| 	{ | ||||
| 		if (c == QSE_CHAR_EOF || IS_LINTERM(c)) | ||||
| 		{ | ||||
| 			sed->errnum = QSE_SED_ETSNTR; | ||||
| 			sed->errnum = QSE_SED_ENOTRM; | ||||
| 			goto oops; | ||||
| 		} | ||||
|  | ||||
| @ -713,12 +743,11 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd) | ||||
| 			c = NXTSC (sed); | ||||
| 			if (c == QSE_CHAR_EOF || IS_LINTERM(c)) | ||||
| 			{ | ||||
| 				sed->errnum = QSE_SED_ETSNTR; | ||||
| 				sed->errnum = QSE_SED_ENOTRM; | ||||
| 				goto oops; | ||||
| 			} | ||||
|  | ||||
| 			if (c == QSE_T('n')) c = QSE_T('\n'); | ||||
| 			else if (c == QSE_T('r')) c = QSE_T('\r'); | ||||
| 		} | ||||
|  | ||||
| 		if (pos >= QSE_STR_LEN(t)) | ||||
|  | ||||
		Reference in New Issue
	
	Block a user