fixed a bug of wrong field buffer management in sed
This commit is contained in:
		| @ -421,7 +421,7 @@ struct opttab_t | ||||
| 	{ QSE_T("ncmponstr"),    QSE_AWK_NCMPONSTR,      QSE_T("perform numeric comparsion on numeric strings") }, | ||||
| 	{ QSE_T("strictnaming"), QSE_AWK_STRICTNAMING,   QSE_T("enable the strict naming rule") }, | ||||
| 	{ QSE_T("include"),      QSE_AWK_INCLUDE,        QSE_T("enable '@include'") }, | ||||
| 	{ QSE_T("tolerant"),     QSE_AWK_TOLERANT,       QSE_T("make more I/O fault-tolerant") }, | ||||
| 	{ QSE_T("tolerant"),     QSE_AWK_TOLERANT,       QSE_T("make more fault-tolerant") }, | ||||
| 	{ QSE_T("abort"),        QSE_AWK_ABORT,          QSE_T("enable 'abort'") }, | ||||
| 	{ QSE_NULL,              0,                      QSE_NULL } | ||||
| }; | ||||
|  | ||||
| @ -3,7 +3,6 @@ | ||||
| @section sed_contents CONTENTS | ||||
| - \ref sed_intro | ||||
| - \ref sed_command | ||||
| - \ref sed_embed | ||||
|  | ||||
| @section sed_intro INTRODUCTION | ||||
|  | ||||
| @ -261,8 +260,7 @@ Replaces all occurrences of characters in @b src with characters in @b dst. | ||||
| - <b>c/selector/opts</b> | ||||
| Selects characters or fields from the pattern space as specified by the | ||||
| @b selector and update the pattern space with the selected text. A selector | ||||
| is a comma-separated list of selector atoms. A selector atom is one of | ||||
| the followings: | ||||
| is a comma-separated list of specifiers. A specifier is one of the followings: | ||||
| <ul> | ||||
|  <li>@b d specifies the input field delimiter with the next character. e.g) d: | ||||
|  <li>@b D sepcifies the output field delimiter with the next character. e.g) D; | ||||
| @ -291,16 +289,29 @@ Prints the last line. If #QSE_SED_QUIET is on, try <b>$p</b>. | ||||
| - <b>1!G;h;$!d</b> | ||||
| Prints input lines in the reverse order. That is, it prints the last line  | ||||
| first and the first line last. | ||||
| @code | ||||
| $ echo -e "a\nb\nc" | qsesed '1!G;h;$!d' | ||||
| c | ||||
| b | ||||
| a | ||||
| @endcode | ||||
|  | ||||
| - <b>s/[[:space:]]{2,}/ /g</b> | ||||
| Compacts whitespaces if #QSE_SED_REXBOUND is on. | ||||
|  | ||||
| @section sed_embed HOW TO EMBED | ||||
|  | ||||
| In the simplest form,  | ||||
| - Create a stream editor - qse_sed_open() | ||||
| - Compile editing commands - qse_sed_comp() | ||||
| - Executes compiled commands - qse_sed_exec() | ||||
| - Destroy the stream editor - qse_sed_close() | ||||
| - <b>C/d:,f3,1/</b> | ||||
| Prints the third field and the first field from a colon separated text. | ||||
| @code | ||||
| $ head -5 /etc/passwd | ||||
| root:x:0:0:root:/root:/bin/bash | ||||
| daemon:x:1:1:daemon:/usr/sbin:/bin/sh | ||||
| bin:x:2:2:bin:/bin:/bin/sh | ||||
| sys:x:3:3:sys:/dev:/bin/sh | ||||
| sync:x:4:65534:sync:/bin:/bin/sync | ||||
| $ qsesed '1,3C/d:,f3,1/;4,$d' /etc/passwd  | ||||
| 0 root | ||||
| 1 daemon | ||||
| 2 bin | ||||
| @endcode | ||||
|  | ||||
| */ | ||||
|  | ||||
| @ -2981,12 +2981,6 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd) | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
| static QSE_INLINE int isdelim (qse_sed_cmd_t* cmd, qse_char_t c) | ||||
| { | ||||
| 	return (cmd->u.cut.w && QSE_ISSPACE(c)) || | ||||
| 	       (!cmd->u.cut.w && c == cmd->u.cut.delim[0]); | ||||
| } | ||||
|  | ||||
| static int split_into_fields_for_cut ( | ||||
| 	qse_sed_t* sed, qse_sed_cmd_t* cmd, const qse_cstr_t* str) | ||||
| { | ||||
| @ -2997,15 +2991,34 @@ static int split_into_fields_for_cut ( | ||||
|  | ||||
| 	for (i = 0; i < str->len; ) | ||||
| 	{ | ||||
| 		int isdelim = 0; | ||||
| 		qse_char_t c = str->ptr[i++]; | ||||
|  | ||||
| 		if (isdelim(cmd,c)) | ||||
| 		if (cmd->u.cut.w) | ||||
| 		{  | ||||
| 			/* the w option ignores the d specifier */ | ||||
| 			if (QSE_ISSPACE(c)) | ||||
| 			{ | ||||
| 				/* the w option assumes the f option */ | ||||
| 				while (i < str->len && QSE_ISSPACE(str->ptr[i])) i++; | ||||
| 				isdelim = 1; | ||||
| 			} | ||||
| 		} | ||||
| 		else | ||||
| 		{ | ||||
| 			if (c == cmd->u.cut.delim[0]) | ||||
| 			{ | ||||
| 				if (cmd->u.cut.f) | ||||
| 				{ | ||||
| 				while (i < str->len && isdelim(cmd,str->ptr[i])) i++; | ||||
| 					/* fold consecutive delimiters */ | ||||
| 					while (i < str->len && str->ptr[i] == cmd->u.cut.delim[0]) i++; | ||||
| 				} | ||||
| 				isdelim = 1; | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		if (isdelim) | ||||
| 		{ | ||||
| 			sed->e.cutf.flds[x++].len = xl; | ||||
|  | ||||
| 			if (x >= sed->e.cutf.cflds) | ||||
| @ -3014,10 +3027,10 @@ static int split_into_fields_for_cut ( | ||||
| 				qse_size_t nsz; | ||||
|  | ||||
| 				nsz = sed->e.cutf.cflds; | ||||
| 				if (nsz > 100000) nsz += 100000; | ||||
| 				if (nsz > 50000) nsz += 50000; | ||||
| 				else nsz *= 2; | ||||
| 				 | ||||
| 				if (sed->e.cutf.flds != sed->e.cutf.sflds) | ||||
| 				if (sed->e.cutf.flds == sed->e.cutf.sflds) | ||||
| 				{ | ||||
| 					tmp = QSE_MMGR_ALLOC (sed->mmgr, QSE_SIZEOF(*tmp) * nsz); | ||||
| 					if (tmp == QSE_NULL)  | ||||
| @ -3025,12 +3038,7 @@ static int split_into_fields_for_cut ( | ||||
| 						SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL); | ||||
| 						return -1; | ||||
| 					} | ||||
|  | ||||
| 					QSE_MEMCPY (tmp, sed->e.cutf.flds, QSE_SIZEOF(*tmp) * sed->e.cutf.cflds); | ||||
| 					QSE_MMGR_FREE (sed->mmgr, sed->e.cutf.flds); | ||||
|  | ||||
| 					if (sed->e.cutf.flds != sed->e.cutf.sflds) | ||||
| 						QSE_MMGR_FREE (sed->mmgr, sed->e.cutf.flds); | ||||
| 				} | ||||
| 				else | ||||
| 				{ | ||||
| @ -3048,6 +3056,8 @@ static int split_into_fields_for_cut ( | ||||
|  | ||||
| 			xl = 0; | ||||
| 			sed->e.cutf.flds[x].ptr = &str->ptr[i]; | ||||
|  | ||||
| 			/* mark that this line is delimited at least once */ | ||||
| 			sed->e.cutf.delimited = 1;  | ||||
| 		} | ||||
| 		else xl++; | ||||
|  | ||||
		Reference in New Issue
	
	Block a user