From 45cb237aa6a5e06886b1fea0efb709b7c58173c9 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Fri, 14 Nov 2014 16:56:51 +0000 Subject: [PATCH] added fields to keep an original query path in qse_htre_t changed proxying routines to use the original query path instead of percent-encoding back --- qse/cmd/http/httpd.c | 82 +++++++++++++++++++++++-------------- qse/include/qse/http/htre.h | 32 +++++++-------- qse/include/qse/http/http.h | 37 ++++++++++++----- qse/lib/awk/std.c | 4 +- qse/lib/http/htrd.c | 44 ++++++++++++++------ qse/lib/http/htre.c | 68 +++++++++++++++++++++++------- qse/lib/http/http.c | 50 ++++++++++++++++------ qse/lib/http/httpd-proxy.c | 8 +++- 8 files changed, 225 insertions(+), 100 deletions(-) diff --git a/qse/cmd/http/httpd.c b/qse/cmd/http/httpd.c index 7244c5de..d55e5d63 100644 --- a/qse/cmd/http/httpd.c +++ b/qse/cmd/http/httpd.c @@ -497,18 +497,15 @@ static int get_server_root ( if (qinfo->client->status & QSE_HTTPD_CLIENT_INTERCEPTED) { -printf ("intercepted....\n"); /* transparent proxying */ if (loccfg->proxy.allow_intercept <= 0) { -printf ("intercepted. not allowed...\n"); root->type = QSE_HTTPD_SERVERSTD_ROOT_ERROR; root->u.error.code = 403; /* forbidden */ return 0; } else if (loccfg->proxy.allow_intercept <= 1) { -printf ("intercepted. not allowed to go thru...\n"); root->type = QSE_HTTPD_SERVERSTD_ROOT_PROXY; root->u.proxy.dst.nwad = qinfo->client->orgdst_addr; /* if TPROXY is used, set the source to the original source. @@ -522,7 +519,6 @@ printf ("intercepted. not allowed to go thru...\n"); goto proxy_ok; } -printf ("intercepted. to be handled locally ...\n"); } if (mth == QSE_HTTP_CONNECT) @@ -568,39 +564,63 @@ printf ("intercepted. to be handled locally ...\n"); if (slash && slash - host > 0) { + qse_size_t len_before_slash; + qse_mchar_t* org_qpath = QSE_NULL; + + len_before_slash = slash - qpath; + + if (!(qinfo->req->flags & QSE_HTRE_QPATH_PERDEC) || + qse_mbszcmp (qpath, (org_qpath = qse_htre_getorgqpath(qinfo->req)), len_before_slash) == 0) + { + /* this block ensures to proxy a request whose protocol and + * host name part were not percent-encoded in the original + * request */ + + /* e.g. proto://hostname/XXXX + * slash should point to the slash before XXXX. + * if hostname is empty, this 'if' block is skipped. */ + + root->type = QSE_HTTPD_SERVERSTD_ROOT_PROXY; + + if (loccfg->proxy.pseudonym[0]) + root->u.proxy.pseudonym = loccfg->proxy.pseudonym; + /* TODO: refrain from manipulating the request like this */ - root->type = QSE_HTTPD_SERVERSTD_ROOT_PROXY; + /* move the host name part backward by 1 byte to make a room for + * terminating null. An orginal input of http://www.yahoo.com/ab/cd + * becomes http:/www.yahoo.com\0ab/cd. host gets to point to + * www.yahoo.com. qpath(qinfo->req.u.q.path) is updated to ab/cd. */ + qse_memmove (host - 1, host, slash - host); + slash[-1] = QSE_MT('\0'); + host = host - 1; + root->u.proxy.host = host; - if (loccfg->proxy.pseudonym[0]) - root->u.proxy.pseudonym = loccfg->proxy.pseudonym; + if (proto_len == 8) root->u.proxy.flags |= QSE_HTTPD_RSRC_PROXY_DST_SECURE; + if (qse_mbstonwad (host, &root->u.proxy.dst.nwad) <= -1) + { + root->u.proxy.flags |= QSE_HTTPD_RSRC_PROXY_DST_STR; + root->u.proxy.dst.str = host; + } + else + { + /* make the source binding type the same as destination */ + if (qse_getnwadport(&root->u.proxy.dst.nwad) == 0) + qse_setnwadport (&root->u.proxy.dst.nwad, qse_hton16(80)); + root->u.proxy.src.nwad.type = root->u.proxy.dst.nwad.type; + } - /* move the host name part backward by 1 byte to make a room for - * terminating null. An orginal input of http://www.yahoo.com/ab/cd - * becomes http:/www.yahoo.com\0ab/cd. host gets to point to the - * www.yahoo.com. qpath(qinfo->req.u.q.path) is updated to ab/cd. */ - qse_memmove (host - 1, host, slash - host); - slash[-1] = QSE_MT('\0'); - host = host - 1; - root->u.proxy.host = host; + /* TODO: refrain from manipulating the request like this */ + qinfo->req->u.q.path.len -= len_before_slash; + qinfo->req->u.q.path.ptr = slash; /* TODO: use setqpath or something... */ + if (org_qpath) + { + qinfo->req->orgqpath.len -= len_before_slash; + qinfo->req->orgqpath.ptr += len_before_slash; + } - if (proto_len == 8) root->u.proxy.flags |= QSE_HTTPD_RSRC_PROXY_DST_SECURE; - if (qse_mbstonwad (host, &root->u.proxy.dst.nwad) <= -1) - { - root->u.proxy.flags |= QSE_HTTPD_RSRC_PROXY_DST_STR; - root->u.proxy.dst.str = host; + goto proxy_ok; } - else - { - /* make the source binding type the same as destination */ - if (qse_getnwadport(&root->u.proxy.dst.nwad) == 0) - qse_setnwadport (&root->u.proxy.dst.nwad, qse_hton16(80)); - root->u.proxy.src.nwad.type = root->u.proxy.dst.nwad.type; - } - -/* TODO: refrain from manipulating the request like this */ - qinfo->req->u.q.path = slash; /* TODO: use setqpath or something... */ - goto proxy_ok; } else { diff --git a/qse/include/qse/http/htre.h b/qse/include/qse/http/htre.h index 430ec597..b069732a 100644 --- a/qse/include/qse/http/htre.h +++ b/qse/include/qse/http/htre.h @@ -79,8 +79,8 @@ struct qse_htre_t qse_http_method_t type; const qse_mchar_t* name; } method; - qse_mchar_t* path; - qse_mchar_t* param; + qse_mcstr_t path; + qse_mcstr_t param; } q; struct { @@ -102,6 +102,17 @@ struct qse_htre_t #define QSE_HTRE_QPATH_PERDEC (1 << 6) /* the qpath has been percent-decoded */ int flags; + /* original query path for a request. + * meaningful if QSE_HTRE_QPATH_PERDEC is set in the flags */ + struct + { + qse_mchar_t* buf; /* buffer pointer */ + qse_size_t capa; /* buffer capacity */ + + qse_mchar_t* ptr; + qse_size_t len; + } orgqpath; + /* special attributes derived from the header */ struct { @@ -132,8 +143,9 @@ struct qse_htre_t #define qse_htre_getqmethodtype(re) ((re)->u.q.method.type) #define qse_htre_getqmethodname(re) ((re)->u.q.method.name) -#define qse_htre_getqpath(re) ((re)->u.q.path) -#define qse_htre_getqparam(re) ((re)->u.q.param) +#define qse_htre_getqpath(re) ((re)->u.q.path.ptr) +#define qse_htre_getqparam(re) ((re)->u.q.param.ptr) +#define qse_htre_getorgqpath(re) ((re)->orgqpath.ptr) #define qse_htre_getscodeval(re) ((re)->u.s.code.val) #define qse_htre_getscodestr(re) ((re)->u.s.code.str) @@ -169,18 +181,6 @@ QSE_EXPORT void qse_htre_clear ( qse_htre_t* re ); -QSE_EXPORT int qse_htre_setstrfromcstr ( - qse_htre_t* re, - qse_mbs_t* str, - const qse_mcstr_t* cstr -); - -QSE_EXPORT int qse_htre_setstrfromxstr ( - qse_htre_t* re, - qse_mbs_t* str, - const qse_mcstr_t* xstr -); - QSE_EXPORT const qse_htre_hdrval_t* qse_htre_getheaderval ( const qse_htre_t* re, const qse_mchar_t* key diff --git a/qse/include/qse/http/http.h b/qse/include/qse/http/http.h index 8e751986..5e5bf6ca 100644 --- a/qse/include/qse/http/http.h +++ b/qse/include/qse/http/http.h @@ -202,22 +202,39 @@ QSE_EXPORT qse_mchar_t* qse_fmthttptime ( qse_size_t bufsz ); -/* percent-decode a string. - * returns the number of affected characters. - * for example, 0 means that no characters in the input required decoding. */ -QSE_EXPORT qse_size_t qse_perdechttpstr ( - const qse_mchar_t* str, - qse_mchar_t* buf +/** + * The qse_isperencedhttpstr() function determines if the given string + * contains a valid percent-encoded sequence. + */ +QSE_EXPORT int qse_isperencedhttpstr ( + const qse_mchar_t* str ); +/** + * The qse_perdechttpstr() function performs percent-decoding over a string. + * The caller must ensure that the output buffer \a buf is large enough. + * If \a ndecs is not #QSE_NULL, it is set to the number of characters + * decoded. 0 means no characters in the input string required decoding + * \return the length of the output string. + */ +QSE_EXPORT qse_size_t qse_perdechttpstr ( + const qse_mchar_t* str, + qse_mchar_t* buf, + qse_size_t* ndecs +); -/* percent-encode a string. - * returns the number of affected characters. - * for example, 0 means that no characters in the input required encoding. */ +/** + * The qse_perenchttpstr() function performs percent-encoding over a string. + * The caller must ensure that the output buffer \a buf is large enough. + * If \a nencs is not #QSE_NULL, it is set to the number of characters + * encoded. 0 means no characters in the input string required encoding. + * \return the length of the output string. + */ QSE_EXPORT qse_size_t qse_perenchttpstr ( int opt, /**< 0 or bitwise-OR'ed of #qse_perenchttpstr_opt_t */ const qse_mchar_t* str, - qse_mchar_t* buf + qse_mchar_t* buf, + qse_size_t* nencs ); QSE_EXPORT qse_mchar_t* qse_perenchttpstrdup ( diff --git a/qse/lib/awk/std.c b/qse/lib/awk/std.c index 9eee379c..fbcde74b 100644 --- a/qse/lib/awk/std.c +++ b/qse/lib/awk/std.c @@ -80,8 +80,8 @@ typedef struct xtn_t struct { - const qse_char_t* ptr; - const qse_char_t* end; + const qse_char_t* ptr; + const qse_char_t* end; } str; } u; } in; diff --git a/qse/lib/http/htrd.c b/qse/lib/http/htrd.c index 82a35001..0df689c6 100644 --- a/qse/lib/http/htrd.c +++ b/qse/lib/http/htrd.c @@ -318,7 +318,7 @@ static qse_mchar_t* parse_initial_line (qse_htrd_t* htrd, qse_mchar_t* line) int q = xdigit_to_num(*(p+1)); int w = xdigit_to_num(*(p+2)); - + if (q >= 0 && w >= 0) { int t = (q << 4) + w; @@ -330,6 +330,8 @@ static qse_mchar_t* parse_initial_line (qse_htrd_t* htrd, qse_mchar_t* line) *out++ = t; p += 3; + + htrd->re.flags |= QSE_HTRE_QPATH_PERDEC; } else *out++ = *p++; } @@ -355,23 +357,32 @@ static qse_mchar_t* parse_initial_line (qse_htrd_t* htrd, qse_mchar_t* line) /* null-terminate the url part though we know the length */ *out = QSE_MT('\0'); + if (htrd->re.flags & QSE_HTRE_QPATH_PERDEC) + { + /* TODO: build the original qpath */ + htrd->re.orgpqath.ptr = XXX; + htrd->re.orgpath.len = XXXX; + } + if (param.ptr) { param.len = out - param.ptr; - htrd->re.u.q.path = tmp.ptr; - htrd->re.u.q.param = param.ptr; + htrd->re.u.q.path = tmp; + htrd->re.u.q.param = param; } else { tmp.len = out - tmp.ptr; - htrd->re.u.q.path = tmp.ptr; - htrd->re.u.q.param = QSE_NULL; + htrd->re.u.q.path = tmp; + htrd->re.u.q.param.ptr = QSE_NULL; + htrd->re.u.q.param.len = 0; } #else while (*p != QSE_MT('\0') && !is_space_octet(*p)) { if (*p == QSE_MT('?') && param.ptr == QSE_NULL) { + tmp.len = p - tmp.ptr; /* length of the path part */ *p++ = QSE_MT('\0'); /* null-terminate the path part */ param.ptr = p; } @@ -380,33 +391,40 @@ static qse_mchar_t* parse_initial_line (qse_htrd_t* htrd, qse_mchar_t* line) /* the url must be followed by a space */ if (!is_space_octet(*p)) goto badre; + param.len = p - param.ptr; /* length of the param part */ *p = QSE_MT('\0'); /* null-terminate the path or param part */ if (param.ptr) { - htrd->re.u.q.path = tmp.ptr; - htrd->re.u.q.param = param.ptr; + htrd->re.u.q.path = tmp; + htrd->re.u.q.param = param; } else { - htrd->re.u.q.path = tmp.ptr; - htrd->re.u.q.param = QSE_NULL; + htrd->re.u.q.path = tmp; + htrd->re.u.q.param.ptr = QSE_NULL; + htrd->re.u.q.param.len = 0; } #endif if (htrd->option & QSE_HTRD_CANONQPATH) { - qse_mchar_t* qpath = htrd->re.u.q.path; + qse_mchar_t* qpath = htrd->re.u.q.path.ptr; /* if the url begins with xxx://, * skip xxx:/ and canonicalize from the second slash */ while (is_alpha_octet(*qpath)) qpath++; if (qse_mbszcmp (qpath, QSE_MT("://"), 3) == 0) + { qpath = qpath + 2; /* set the position to the second / in :// */ + htrd->re.u.q.path.len = qse_canonmbspath (qpath, qpath, 0); + htrd->re.u.q.path.len += qpath - htrd->re.u.q.path.ptr; + } else - qpath = htrd->re.u.q.path; - - qse_canonmbspath (qpath, qpath, 0); + { + qpath = htrd->re.u.q.path.ptr; + htrd->re.u.q.path.len = qse_canonmbspath (qpath, qpath, 0); + } } /* skip spaces after the url part */ diff --git a/qse/lib/http/htre.c b/qse/lib/http/htre.c index 9628e75c..c80c1107 100644 --- a/qse/lib/http/htre.c +++ b/qse/lib/http/htre.c @@ -78,6 +78,9 @@ void qse_htre_fini (qse_htre_t* re) qse_mbs_fini (&re->content); qse_htb_fini (&re->trailers); qse_htb_fini (&re->hdrtab); + + if (re->orgqpath.buf) + QSE_MMGR_FREE (re->mmgr, re->orgqpath.buf); } void qse_htre_clear (qse_htre_t* re) @@ -95,6 +98,9 @@ void qse_htre_clear (qse_htre_t* re) re->state = 0; re->flags = 0; + re->orgqpath.ptr = QSE_NULL; + re->orgqpath.len = 0; + QSE_MEMSET (&re->version, 0, QSE_SIZEOF(re->version)); QSE_MEMSET (&re->attr, 0, QSE_SIZEOF(re->attr)); @@ -107,18 +113,6 @@ void qse_htre_clear (qse_htre_t* re) #endif } -int qse_htre_setstrfromcstr ( - qse_htre_t* re, qse_mbs_t* str, const qse_mcstr_t* cstr) -{ - return (qse_mbs_ncpy (str, cstr->ptr, cstr->len) == (qse_size_t)-1)? -1: 0; -} - -int qse_htre_setstrfromxstr ( - qse_htre_t* re, qse_mbs_t* str, const qse_mcstr_t* xstr) -{ - return (qse_mbs_ncpy (str, xstr->ptr, xstr->len) == (qse_size_t)-1)? -1: 0; -} - const qse_htre_hdrval_t* qse_htre_getheaderval ( const qse_htre_t* re, const qse_mchar_t* name) { @@ -180,7 +174,7 @@ int qse_htre_walktrailers ( qse_htb_walk (&re->trailers, walk_headers, &hwctx); return hwctx.ret; } - + int qse_htre_addcontent ( qse_htre_t* re, const qse_mchar_t* ptr, qse_size_t len) { @@ -266,11 +260,55 @@ void qse_htre_setconcb (qse_htre_t* re, qse_htre_concb_t concb, void* ctx) int qse_htre_perdecqpath (qse_htre_t* re) { - /* percent decode the query path */ + qse_size_t dec_count; + + /* percent decode the query path*/ + if (re->type != QSE_HTRE_Q || (re->flags & QSE_HTRE_QPATH_PERDEC)) return -1; - if (qse_perdechttpstr (re->u.q.path, re->u.q.path) > 0) + + QSE_ASSERT (re->orgqpath.len <= 0); + QSE_ASSERT (re->orgqpath.ptr == QSE_NULL); + + if (qse_isperencedhttpstr(re->u.q.path.ptr)) { + /* the string is percent-encoded. keep the original request + * in a separately allocated buffer */ + + if (re->orgqpath.buf && re->u.q.path.len <= re->orgqpath.capa) + { + re->orgqpath.len = qse_mbscpy (re->orgqpath.buf, re->u.q.path.ptr); + re->orgqpath.ptr = re->orgqpath.buf; + } + else + { + if (re->orgqpath.buf) + { + QSE_MMGR_FREE (re->mmgr, re->orgqpath.buf); + re->orgqpath.capa = 0; + } + + re->orgqpath.buf = qse_mbsxdup (re->u.q.path.ptr, re->u.q.path.len, re->mmgr); + if (!re->orgqpath.buf) return -1; + re->orgqpath.capa = re->u.q.path.len; + + re->orgqpath.ptr = re->orgqpath.buf; + re->orgqpath.len = re->orgqpath.capa; + + /* orgqpath.buf and orgqpath.ptr are the same here. the caller + * is free to change orgqpath.ptr to point to a differnt position + * in the buffer. */ + } + } + + re->u.q.path.len = qse_perdechttpstr (re->u.q.path.ptr, re->u.q.path.ptr, &dec_count); + if (dec_count > 0) + { + /* this assertion is to ensure that qse_isperencedhttpstr() + * returned true when dec_count is greater than 0 */ + QSE_ASSERT (re->orgqpath.buf != QSE_NULL); + QSE_ASSERT (re->orgqpath.ptr != QSE_NULL); re->flags |= QSE_HTRE_QPATH_PERDEC; } + return 0; } diff --git a/qse/lib/http/http.c b/qse/lib/http/http.c index 5ff260ba..c23a48d8 100644 --- a/qse/lib/http/http.c +++ b/qse/lib/http/http.c @@ -387,7 +387,30 @@ qse_mchar_t* qse_fmthttptime ( return buf; } -qse_size_t qse_perdechttpstr (const qse_mchar_t* str, qse_mchar_t* buf) +int qse_isperencedhttpstr (const qse_mchar_t* str) +{ + const qse_mchar_t* p = str; + + while (*p != QSE_T('\0')) + { + if (*p == QSE_MT('%') && *(p + 1) != QSE_MT('\0') && *(p + 2) != QSE_MT('\0')) + { + int q = QSE_MXDIGITTONUM (*(p + 1)); + if (q >= 0) + { + /* return true if the first valid percent-encoded sequence is found */ + int w = QSE_MXDIGITTONUM (*(p + 2)); + if (w >= 0) return 1; + } + } + + p++; + } + + return 1; +} + +qse_size_t qse_perdechttpstr (const qse_mchar_t* str, qse_mchar_t* buf, qse_size_t* ndecs) { const qse_mchar_t* p = str; qse_mchar_t* out = buf; @@ -395,16 +418,15 @@ qse_size_t qse_perdechttpstr (const qse_mchar_t* str, qse_mchar_t* buf) while (*p != QSE_T('\0')) { - if (*p == QSE_MT('%') && *(p+1) != QSE_MT('\0') && *(p+2) != QSE_MT('\0')) + if (*p == QSE_MT('%') && *(p + 1) != QSE_MT('\0') && *(p + 2) != QSE_MT('\0')) { - int q = QSE_MXDIGITTONUM (*(p+1)); + int q = QSE_MXDIGITTONUM (*(p + 1)); if (q >= 0) { - int w = QSE_MXDIGITTONUM (*(p+2)); + int w = QSE_MXDIGITTONUM (*(p + 2)); if (w >= 0) { - /* unlike the path part, we don't care if it - * contains a null character */ + /* we don't care if it contains a null character */ *out++ = ((q << 4) + w); p += 3; dec_count++; @@ -412,12 +434,13 @@ qse_size_t qse_perdechttpstr (const qse_mchar_t* str, qse_mchar_t* buf) } } } + *out++ = *p++; } *out = QSE_MT('\0'); - /*return out - buf;*/ - return dec_count; + if (ndecs) *ndecs = dec_count; + return out - buf; } #define IS_UNRESERVED(c) \ @@ -429,12 +452,15 @@ qse_size_t qse_perdechttpstr (const qse_mchar_t* str, qse_mchar_t* buf) #define TO_HEX(v) (QSE_MT("0123456789ABCDEF")[(v) & 15]) -qse_size_t qse_perenchttpstr (int opt, const qse_mchar_t* str, qse_mchar_t* buf) +qse_size_t qse_perenchttpstr (int opt, const qse_mchar_t* str, qse_mchar_t* buf, qse_size_t* nencs) { const qse_mchar_t* p = str; qse_mchar_t* out = buf; qse_size_t enc_count = 0; + /* this function doesn't accept the size of the buffer. the caller must + * ensure that the buffer is large enough */ + if (opt & QSE_PERENCHTTPSTR_KEEP_SLASH) { while (*p != QSE_T('\0')) @@ -466,8 +492,8 @@ qse_size_t qse_perenchttpstr (int opt, const qse_mchar_t* str, qse_mchar_t* buf) } } *out = QSE_MT('\0'); - /*return out - buf;*/ - return enc_count; + if (nencs) *nencs = enc_count; + return out - buf; } qse_mchar_t* qse_perenchttpstrdup (int opt, const qse_mchar_t* str, qse_mmgr_t* mmgr) @@ -500,7 +526,7 @@ qse_mchar_t* qse_perenchttpstrdup (int opt, const qse_mchar_t* str, qse_mmgr_t* if (buf == QSE_NULL) return QSE_NULL; /* perform actual escaping */ - qse_perenchttpstr (opt, str, buf); + qse_perenchttpstr (opt, str, buf, QSE_NULL); return buf; } diff --git a/qse/lib/http/httpd-proxy.c b/qse/lib/http/httpd-proxy.c index 7c3953f1..0585d4f7 100644 --- a/qse/lib/http/httpd-proxy.c +++ b/qse/lib/http/httpd-proxy.c @@ -1051,18 +1051,24 @@ static int task_init_proxy ( proxy->qpath_pos_in_reqfwdbuf = QSE_STR_LEN(proxy->reqfwdbuf); if (arg->req->flags & QSE_HTRE_QPATH_PERDEC) { - /* the query path has been percent-decoded. so encode it back */ + /* the query path has been percent-decoded. get the original qpath*/ + + /* qse_mchar_t* qpath, * qpath_enc; qse_size_t x; qpath = qse_htre_getqpath(arg->req); qpath_enc = qse_perenchttpstrdup (QSE_PERENCHTTPSTR_KEEP_SLASH, qpath, httpd->mmgr); if (qpath_enc == QSE_NULL) goto nomem_oops; + x = qse_mbs_cat (proxy->reqfwdbuf, qpath_enc); if (qpath != qpath_enc) QSE_MMGR_FREE (httpd->mmgr, qpath_enc); if (x == (qse_size_t)-1) goto nomem_oops; + */ + + if (qse_mbs_cat (proxy->reqfwdbuf, qse_htre_getorgqpath(arg->req)) == (qse_size_t)-1) goto nomem_oops; } else {