From 84376d6d92607432d8d4c786359cb825e061dd27 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Fri, 26 Nov 2010 08:58:36 +0000 Subject: [PATCH] fixed bugs in processing chunked request --- qse/include/qse/utl/http.h | 27 +++--- qse/lib/utl/http.c | 173 ++++++++++++++++++++++--------------- 2 files changed, 119 insertions(+), 81 deletions(-) diff --git a/qse/include/qse/utl/http.h b/qse/include/qse/utl/http.h index 570c06d2..59f0558d 100644 --- a/qse/include/qse/utl/http.h +++ b/qse/include/qse/utl/http.h @@ -38,22 +38,23 @@ struct qse_http_t QSE_DEFINE_COMMON_FIELDS (http) qse_http_errnum_t errnum; + struct { - int crlf; /* crlf status */ - qse_size_t plen; /* raw request length excluding crlf */ - qse_size_t need; /* number of octets needed for contents */ - struct { - qse_size_t len; - qse_size_t count; - int phase; - } chunk; - } state; + int crlf; /* crlf status */ + qse_size_t plen; /* raw request length excluding crlf */ + qse_size_t need; /* number of octets needed for contents */ + + struct + { + qse_size_t len; + qse_size_t count; + int phase; + } chunk; + } state; - struct - { qse_http_octb_t raw; qse_http_octb_t con; @@ -170,6 +171,10 @@ void qse_http_fini ( qse_http_t* http ); +void qse_http_clear ( + qse_http_t* http +); + #ifdef __cplusplus } #endif diff --git a/qse/lib/utl/http.c b/qse/lib/utl/http.c index 76e814a4..948c6670 100644 --- a/qse/lib/utl/http.c +++ b/qse/lib/utl/http.c @@ -352,6 +352,7 @@ static QSE_INLINE void clear_request (qse_http_t* http) { /* clear necessary part of the request before * reading the next request */ + QSE_MEMSET (&http->req.state, 0, QSE_SIZEOF(http->req.state)); QSE_MEMSET (&http->req.attr, 0, QSE_SIZEOF(http->req.attr)); qse_htb_clear (&http->req.hdr.tab); clear_combined_headers (http); @@ -404,9 +405,6 @@ qse_http_t* qse_http_init (qse_http_t* http, qse_mmgr_t* mmgr) QSE_MEMSET (http, 0, QSE_SIZEOF(*http)); http->mmgr = mmgr; - /*http->state.pending = 0;*/ - http->state.crlf = 0; - http->state.plen = 0; init_buffer (http, &http->req.raw); init_buffer (http, &http->req.con); @@ -564,6 +562,11 @@ qse_printf (QSE_T("BADREQ\n")); return QSE_NULL; } +void qse_http_clear (qse_http_t* http) +{ + clear_request (http); +} + #define octet_tolower(c) (((c) >= 'A' && (c) <= 'Z') ? ((c) | 0x20) : (c)) #define octet_toupper(c) (((c) >= 'a' && (c) <= 'z') ? ((c) & ~0x20) : (c)) @@ -983,12 +986,25 @@ static QSE_INLINE int parse_request ( return 0; } +/* chunk parsing phases */ +#define GET_CHUNK_DONE 0 +#define GET_CHUNK_LEN 1 +#define GET_CHUNK_DATA 2 +#define GET_CHUNK_CRLF 3 +#define GET_CHUNK_TRAILERS 4 + static const qse_byte_t* getchunklen (qse_http_t* http, const qse_byte_t* ptr, qse_size_t len) { const qse_byte_t* end = ptr + len; - if (http->state.chunk.count == 0) + /* this function must be called in the GET_CHUNK_LEN context */ + QSE_ASSERT (http->req.state.chunk.phase == GET_CHUNK_LEN); + +//qse_printf (QSE_T("CALLING getchunklen [%d]\n"), *ptr); + if (http->req.state.chunk.count <= 0) { + /* skip leading spaces if the first character of + * the chunk length has not been read yet */ while (ptr < end && is_space_octet(*ptr)) ptr++; } @@ -997,25 +1013,46 @@ static const qse_byte_t* getchunklen (qse_http_t* http, const qse_byte_t* ptr, q int n = xdigit_to_num (*ptr); if (n <= -1) break; - http->state.chunk.len = http->state.chunk.len * 16 + n; - http->state.chunk.count++; + http->req.state.chunk.len = http->req.state.chunk.len * 16 + n; + http->req.state.chunk.count++; ptr++; } - if (http->state.chunk.count > 0) - { - while (ptr < end && is_space_octet(*ptr)) ptr++; - } + /* skip trailing spaces if the length has been read */ + while (ptr < end && is_space_octet(*ptr)) ptr++; if (ptr < end) { if (*ptr == '\n') { - http->state.need = http->state.chunk.len; + /* the chunk length line ended properly */ + + if (http->req.state.chunk.count <= 0) + { + /* empty line - no more chunk */ +//qse_printf (QSE_T("empty line chunk done....\n")); + http->req.state.chunk.phase = GET_CHUNK_DONE; + } + else if (http->req.state.chunk.len <= 0) + { + /* length explicity specified to 0 + get trailing headers .... */ + /*TODO: => http->req.state.chunk.phase = GET_CHUNK_TRAILERS;*/ + http->req.state.chunk.phase = GET_CHUNK_DATA; + } + else + { + /* ready to read the chunk data... */ + http->req.state.chunk.phase = GET_CHUNK_DATA; +//qse_printf (QSE_T("SWITCH TO GET_CHUNK_DATA....\n")); + } + + http->req.state.need = http->req.state.chunk.len; ptr++; } else { +//qse_printf (QSE_T("XXXXXXXXXXXXXXXXXxxx [%c]\n"), *ptr); http->errnum = QSE_HTTP_EBADREQ; return QSE_NULL; } @@ -1024,13 +1061,6 @@ static const qse_byte_t* getchunklen (qse_http_t* http, const qse_byte_t* ptr, q return ptr; } -/* chunk parsing phases */ -#define GET_CHUNK_DONE 0 -#define GET_CHUNK_LEN 1 -#define GET_CHUNK_DATA 2 -#define GET_CHUNK_CRLF 3 -#define GET_CHUNK_TRAILERS 4 - /* feed the percent encoded string */ int qse_http_feed (qse_http_t* http, const qse_byte_t* ptr, qse_size_t len) { @@ -1038,14 +1068,14 @@ int qse_http_feed (qse_http_t* http, const qse_byte_t* ptr, qse_size_t len) const qse_byte_t* req = ptr; /* does this goto drop code maintainability? */ - if (http->state.need > 0) goto content_resume; - switch (http->state.chunk.phase) + if (http->req.state.need > 0) goto content_resume; + switch (http->req.state.chunk.phase) { case GET_CHUNK_LEN: goto dechunk_resume; case GET_CHUNK_DATA: - /* this won't be reached as http->state.need + /* this won't be reached as http->req.state.need * is greater than 0 if GET_CHUNK_DATA is true */ goto content_resume; @@ -1060,9 +1090,9 @@ int qse_http_feed (qse_http_t* http, const qse_byte_t* ptr, qse_size_t len) while (ptr < end) { - qse_byte_t b = *ptr++; + register qse_byte_t b = *ptr++; - if (http->state.plen <= 0 && is_whspace_octet(b)) + if (http->req.state.plen <= 0 && is_whspace_octet(b)) { /* let's drop leading whitespaces across multiple * lines */ @@ -1072,28 +1102,28 @@ int qse_http_feed (qse_http_t* http, const qse_byte_t* ptr, qse_size_t len) if (b == '\n') { - if (http->state.crlf <= 1) + if (http->req.state.crlf <= 1) { - /* http->state.crlf == 0, CR was not seen - * http->state.crlf == 1, CR was seen + /* http->req.state.crlf == 0, CR was not seen + * http->req.state.crlf == 1, CR was seen * whatever the current case is, mark the * first LF is seen here. */ - http->state.crlf = 2; + http->req.state.crlf = 2; } else { - /* http->state.crlf == 2, no 2nd CR before LF - * http->state.crlf == 3, 2nd CR before LF + /* http->req.state.crlf == 2, no 2nd CR before LF + * http->req.state.crlf == 3, 2nd CR before LF */ /* we got a complete request. */ - QSE_ASSERT (http->state.crlf <= 3); + QSE_ASSERT (http->req.state.crlf <= 3); /* reset the crlf state */ - http->state.crlf = 0; + http->req.state.crlf = 0; /* reset the raw request length */ - http->state.plen = 0; + http->req.state.plen = 0; if (parse_request (http, req, ptr - req) <= -1) return -1; @@ -1104,38 +1134,28 @@ int qse_http_feed (qse_http_t* http, const qse_byte_t* ptr, qse_size_t len) QSE_ASSERT (http->req.attr.content_length <= 0); dechunk_start: - http->state.chunk.phase = GET_CHUNK_LEN; - http->state.chunk.len = 0; - http->state.chunk.count = 0; + http->req.state.chunk.phase = GET_CHUNK_LEN; + http->req.state.chunk.len = 0; + http->req.state.chunk.count = 0; dechunk_resume: ptr = getchunklen (http, ptr, end - ptr); if (ptr == QSE_NULL) return -1; - if (http->state.chunk.count <= 0) + if (http->req.state.chunk.phase == GET_CHUNK_LEN) { - /* empty line - end of the chunk */ - http->state.chunk.phase = GET_CHUNK_DONE; - } - else if (http->state.need <= 0) - { - /* length explicity specified to 0 - get trailing headers .... */ - /*http->state.chunk.phase = GET_CHUNK_TRAILERS;*/ - http->state.chunk.phase = GET_CHUNK_DATA; - } - else - { - /* let's piggy back on the normal content-length data acquisition */ - http->state.chunk.phase = GET_CHUNK_DATA; + /* still in the GET_CHUNK_LEN state. + * the length has been partially read. */ + goto feedme_more; } } else { - http->state.need = http->req.attr.content_length; + /* we need to read as many octets as Content-Length */ + http->req.state.need = http->req.attr.content_length; } - if (http->state.need > 0) + if (http->req.state.need > 0) { /* content-length or chunked data length specified */ @@ -1144,27 +1164,27 @@ int qse_http_feed (qse_http_t* http, const qse_byte_t* ptr, qse_size_t len) content_resume: avail = end - ptr; - if (avail < http->state.need) + if (avail < http->req.state.need) { /* the data is not as large as needed */ if (push_to_buffer (http, &http->req.con, ptr, avail) <= -1) return -1; - http->state.need -= avail; + http->req.state.need -= avail; /* we didn't get a complete content yet */ - goto abort; + goto feedme_more; } else { /* we are given all needed or more than needed */ - if (push_to_buffer (http, &http->req.con, ptr, http->state.need) <= -1) return -1; - ptr += http->state.need; - http->state.need = 0; + if (push_to_buffer (http, &http->req.con, ptr, http->req.state.need) <= -1) return -1; + ptr += http->req.state.need; + http->req.state.need = 0; } } - if (http->state.chunk.phase == GET_CHUNK_DATA) + if (http->req.state.chunk.phase == GET_CHUNK_DATA) { - QSE_ASSERT (http->state.need == 0); - http->state.chunk.phase = GET_CHUNK_CRLF; + QSE_ASSERT (http->req.state.need == 0); + http->req.state.chunk.phase = GET_CHUNK_CRLF; dechunk_crlf: while (ptr < end && is_space_octet(*ptr)) ptr++; @@ -1172,9 +1192,22 @@ int qse_http_feed (qse_http_t* http, const qse_byte_t* ptr, qse_size_t len) { if (*ptr == '\n') { - /* end of chunk data. let's decode the next chunk */ + /* end of chunk data. */ ptr++; - goto dechunk_start; + + /* more octets still available. + * let it decode the next chunk */ + if (ptr < end) goto dechunk_start; + + /* no more octets available after chunk data. + * the chunk state variables need to be + * reset when a jump is made to dechunk_resume + * upon the next call */ + http->req.state.chunk.phase = GET_CHUNK_LEN; + http->req.state.chunk.len = 0; + http->req.state.chunk.count = 0; + + goto feedme_more; } else { @@ -1186,7 +1219,7 @@ int qse_http_feed (qse_http_t* http, const qse_byte_t* ptr, qse_size_t len) else { /* data not enough */ - goto abort; + goto feedme_more; } } @@ -1205,9 +1238,9 @@ if (http->req.con.size > 0) } else if (b == '\r') { - if (http->state.crlf == 0 || http->state.crlf == 2) - http->state.crlf++; - else http->state.crlf = 1; + if (http->req.state.crlf == 0 || http->req.state.crlf == 2) + http->req.state.crlf++; + else http->req.state.crlf = 1; } else if (b == '\0') { @@ -1220,9 +1253,9 @@ if (http->req.con.size > 0) { /* increment length of a request in raw * excluding crlf */ - http->state.plen++; + http->req.state.plen++; /* mark that neither CR nor LF was seen */ - http->state.crlf = 0; + http->req.state.crlf = 0; } } @@ -1232,7 +1265,7 @@ if (http->req.con.size > 0) if (push_to_buffer (http, &http->req.raw, req, ptr - req) <= -1) return -1; } -abort: +feedme_more: return 0; }