added http request parsing functions

This commit is contained in:
hyung-hwan 2010-10-26 07:04:11 +00:00
parent 299852beaa
commit f23fd6a4f6
2 changed files with 405 additions and 69 deletions

View File

@ -9,20 +9,22 @@
#include <qse/macros.h> #include <qse/macros.h>
typedef struct qse_http_buf_t qse_http_buf_t; typedef struct qse_http_octb_t qse_http_octb_t;
struct qse_http_buf_t struct qse_http_octb_t
{ {
qse_size_t capa; qse_size_t capa;
qse_size_t size; qse_size_t size;
qse_char_t* data; qse_byte_t* data;
}; };
enum qse_http_errnum_t enum qse_http_errnum_t
{ {
QSE_HTTP_ENOERR, QSE_HTTP_ENOERR,
QSE_HTTP_ENOMEM QSE_HTTP_ENOMEM,
QSE_HTTP_EBADREQ,
QSE_HTTP_EBADHDR
}; };
typedef enum qse_http_errnum_t qse_http_errnum_t; typedef enum qse_http_errnum_t qse_http_errnum_t;
@ -50,10 +52,32 @@ struct qse_http_t
struct struct
{ {
qse_http_buf_t buf; //qse_size_t pending;
int no;
int crlf; /* crlf status */
qse_size_t plen; /* raw request length excluding crlf */
} state; } state;
struct
{
qse_http_octb_t raw;
enum
{
QSE_HTTP_REQ_GET,
QSE_HTTP_REQ_HEAD,
QSE_HTTP_REQ_POST
} method;
const qse_byte_t* path;
const qse_byte_t* args;
struct
{
short major;
short minor;
} version;
} req;
}; };
@ -111,7 +135,7 @@ qse_http_t* qse_http_open (
* The qse_http_close() function destroys a http processor. * The qse_http_close() function destroys a http processor.
*/ */
void qse_http_close ( void qse_http_close (
qse_http_t* http qse_http_t* http
); );
qse_http_t* qse_http_init ( qse_http_t* qse_http_init (

View File

@ -24,14 +24,14 @@
QSE_IMPLEMENT_COMMON_FUNCTIONS (http) QSE_IMPLEMENT_COMMON_FUNCTIONS (http)
static int is_http_space (qse_char_t c) static QSE_INLINE int is_http_space (qse_char_t c)
{ {
return QSE_ISSPACE(c) && c != QSE_T('\r') && c != QSE_T('\n'); return QSE_ISSPACE(c) && c != QSE_T('\r') && c != QSE_T('\n');
} }
#define is_http_ctl(c) QSE_ISCNTRL(c) #define is_http_ctl(c) QSE_ISCNTRL(c)
static int is_http_separator (qse_char_t c) static QSE_INLINE int is_http_separator (qse_char_t c)
{ {
return c == QSE_T('(') || return c == QSE_T('(') ||
c == QSE_T(')') || c == QSE_T(')') ||
@ -54,12 +54,12 @@ static int is_http_separator (qse_char_t c)
c == QSE_T(' '); c == QSE_T(' ');
} }
static int is_http_token (qse_char_t c) static QSE_INLINE int is_http_token (qse_char_t c)
{ {
return QSE_ISPRINT(c) && !is_http_ctl(c) && !is_http_separator(c); return QSE_ISPRINT(c) && !is_http_ctl(c) && !is_http_separator(c);
} }
static int digit_to_num (qse_char_t c) static QSE_INLINE int dig_to_num (qse_char_t c)
{ {
if (c >= QSE_T('0') && c <= QSE_T('9')) return c - QSE_T('0'); if (c >= QSE_T('0') && c <= QSE_T('9')) return c - QSE_T('0');
if (c >= QSE_T('A') && c <= QSE_T('Z')) return c - QSE_T('A') + 10; if (c >= QSE_T('A') && c <= QSE_T('Z')) return c - QSE_T('A') + 10;
@ -67,9 +67,9 @@ static int digit_to_num (qse_char_t c)
return -1; return -1;
} }
qse_char_t* qse_parsehttpreq (qse_char_t* buf, qse_http_req_t* req) qse_char_t* qse_parsehttpreq (qse_char_t* octb, qse_http_req_t* req)
{ {
qse_char_t* p = buf, * x; qse_char_t* p = octb, * x;
/* ignore leading spaces */ /* ignore leading spaces */
while (is_http_space(*p)) p++; while (is_http_space(*p)) p++;
@ -98,7 +98,7 @@ qse_char_t* qse_parsehttpreq (qse_char_t* buf, qse_http_req_t* req)
{ {
if (*p == QSE_T('%') && QSE_ISXDIGIT(*(p+1)) && QSE_ISXDIGIT(*(p+2))) if (*p == QSE_T('%') && QSE_ISXDIGIT(*(p+1)) && QSE_ISXDIGIT(*(p+2)))
{ {
*x++ = (digit_to_num(*(p+1)) << 4) + digit_to_num(*(p+2)); *x++ = (dig_to_num(*(p+1)) << 4) + dig_to_num(*(p+2));
p += 3; p += 3;
} }
else if (*p == QSE_T('?') && req->args.ptr == QSE_NULL) else if (*p == QSE_T('?') && req->args.ptr == QSE_NULL)
@ -158,9 +158,9 @@ ok:
return p; return p;
} }
qse_char_t* qse_parsehttphdr (qse_char_t* buf, qse_http_hdr_t* hdr) qse_char_t* qse_parsehttphdr (qse_char_t* octb, qse_http_hdr_t* hdr)
{ {
qse_char_t* p = buf, * last; qse_char_t* p = octb, * last;
/* ignore leading spaces including CR and NL */ /* ignore leading spaces including CR and NL */
while (QSE_ISSPACE(*p)) p++; while (QSE_ISSPACE(*p)) p++;
@ -208,47 +208,99 @@ ok:
return p; return p;
} }
static QSE_INLINE void init_buffer (qse_http_t* http, qse_http_buf_t* buf) static QSE_INLINE int is_whspace_octet (qse_byte_t c)
{ {
buf->size = 0; return c == ' ' || c == '\t' || c == '\r' || c == '\n';
buf->capa = 0;
buf->data = QSE_NULL;
} }
static QSE_INLINE void fini_buffer (qse_http_t* http, qse_http_buf_t* buf) static QSE_INLINE int is_space_octet (qse_byte_t c)
{ {
if (buf->data) return c == ' ' || c == '\t' || c == '\r';
}
static QSE_INLINE int is_upalpha_octet (qse_byte_t c)
{
return c >= 'A' && c <= 'Z';
}
static QSE_INLINE int is_loalpha_octet (qse_byte_t c)
{
return c >= 'a' && c <= 'z';
}
static QSE_INLINE int is_alpha_octet (qse_byte_t c)
{
return (c >= 'A' && c <= 'Z') ||
(c >= 'a' && c <= 'z');
}
static QSE_INLINE int is_digit_octet (qse_byte_t c)
{
return c >= '0' && c <= '9';
}
static QSE_INLINE int is_xdigit_octet (qse_byte_t c)
{
return (c >= '0' && c <= '9') ||
(c >= 'A' && c <= 'F') ||
(c >= 'a' && c <= 'f');
}
static QSE_INLINE int digit_to_num (qse_byte_t c)
{
if (c >= '0' && c <= '9') return c - '0';
return -1;
}
static QSE_INLINE int xdigit_to_num (qse_byte_t c)
{
if (c >= '0' && c <= '9') return c - '0';
if (c >= 'A' && c <= 'Z') return c - 'A' + 10;
if (c >= 'a' && c <= 'z') return c - 'a' + 10;
return -1;
}
static QSE_INLINE void init_buffer (qse_http_t* http, qse_http_octb_t* octb)
{
octb->size = 0;
octb->capa = 0;
octb->data = QSE_NULL;
}
static QSE_INLINE void fini_buffer (qse_http_t* http, qse_http_octb_t* octb)
{
if (octb->data)
{ {
QSE_MMGR_FREE (http->mmgr, buf->data); QSE_MMGR_FREE (http->mmgr, octb->data);
buf->capa = 0; octb->capa = 0;
buf->size = 0; octb->size = 0;
buf->data = QSE_NULL; octb->data = QSE_NULL;
} }
} }
static QSE_INLINE_ALWAYS void clear_buffer (qse_http_t* http, qse_http_buf_t* buf) static QSE_INLINE_ALWAYS void clear_buffer (qse_http_t* http, qse_http_octb_t* octb)
{ {
buf->size = 0; octb->size = 0;
} }
static QSE_INLINE int push_to_buffer ( static QSE_INLINE int push_to_buffer (
qse_http_t* http, qse_http_buf_t* buf, qse_http_t* http, qse_http_octb_t* octb,
const qse_char_t* ptr, qse_size_t len) const qse_byte_t* ptr, qse_size_t len)
{ {
qse_size_t nsize = (buf)->size + len; qse_size_t nsize = (octb)->size + len;
const qse_char_t* end = ptr + len; const qse_byte_t* end = ptr + len;
if (nsize > (buf)->capa) if (nsize > (octb)->capa)
{ {
qse_size_t ncapa = (nsize > (buf)->capa * 2)? nsize: ((buf)->capa * 2); qse_size_t ncapa = (nsize > (octb)->capa * 2)? nsize: ((octb)->capa * 2);
do do
{ {
void* tmp = QSE_MMGR_REALLOC ((http)->mmgr, (buf)->data, ncapa * QSE_SIZEOF(*ptr)); void* tmp = QSE_MMGR_REALLOC ((http)->mmgr, (octb)->data, ncapa * QSE_SIZEOF(*ptr));
if (tmp) if (tmp)
{ {
(buf)->capa = ncapa; (octb)->capa = ncapa;
(buf)->data = tmp; (octb)->data = tmp;
break; break;
} }
@ -264,7 +316,7 @@ static QSE_INLINE int push_to_buffer (
while (1); while (1);
} }
while (ptr < end) (buf)->data[(buf)->size++] = *ptr++; while (ptr < end) (octb)->data[(octb)->size++] = *ptr++;
return 0; return 0;
} }
@ -314,58 +366,318 @@ qse_http_t* qse_http_init (qse_http_t* http, qse_mmgr_t* mmgr)
QSE_MEMSET (http, 0, QSE_SIZEOF(*http)); QSE_MEMSET (http, 0, QSE_SIZEOF(*http));
http->mmgr = mmgr; http->mmgr = mmgr;
init_buffer (http, &http->state.buf); /*http->state.pending = 0;*/
http->state.no = QSE_HTTP_STATE_REQ; http->state.crlf = 0;
http->state.plen = 0;
init_buffer (http, &http->req.raw);
return http; return http;
} }
void qse_http_fini (qse_http_t* http) void qse_http_fini (qse_http_t* http)
{ {
fini_buffer (http, &http->state.buf); fini_buffer (http, &http->req.raw);
} }
/* feed the percent encoded string */ static qse_byte_t* parse_http_req (qse_http_t* http, qse_byte_t* line)
int qse_http_feed (qse_http_t* http, const qse_char_t* ptr, qse_size_t len)
{ {
const qse_char_t* end = ptr + len; qse_byte_t* p = line;
const qse_char_t* blk = ptr; qse_byte_t* tmp;
qse_size_t tmplen;
#if 0
/* ignore leading spaces excluding crlf */
while (is_space_octet(*p)) p++;
#endif
/* the method should start with an alphabet */
if (!is_upalpha_octet(*p)) goto badreq;
/* get the method name */
tmp = p;
do { p++; } while (is_upalpha_octet(*p));
tmplen = p - tmp;
/* test the method name */
if (tmplen == 3)
{
/* GET */
if (tmp[0] == 'G' && tmp[1] == 'E' && tmp[2] == 'T')
http->req.method = QSE_HTTP_REQ_GET;
else goto badreq;
}
else if (tmplen == 4)
{
/* POST, HEAD */
if (tmp[0] == 'P' && tmp[1] == 'O' && tmp[2] == 'S' && tmp[3] == 'T')
http->req.method = QSE_HTTP_REQ_POST;
else if (tmp[0] == 'H' && tmp[1] == 'E' && tmp[2] == 'A' && tmp[3] == 'D')
http->req.method = QSE_HTTP_REQ_HEAD;
/* TODO: more methods */
else goto badreq;
}
else goto badreq;
/* skip spaces */
while (is_space_octet(*p)) p++;
/* process the url part */
http->req.path = p;
http->req.args = QSE_NULL;
tmp = p;
while (!is_space_octet(*p))
{
if (*p == '%')
{
int q = xdigit_to_num(*(p+1));
int w = xdigit_to_num(*(p+2));
if (q >= 0 && w >= 0)
{
int t = (q << 4) + w;
if (t == 0)
{
/* percent enconding contains a null character */
goto badreq;
}
*tmp++ = t;
p += 3;
}
else *tmp++ = *p++;
}
else if (*p == '?')
{
if (!http->req.args)
{
/* ? must be explicit to be a argument instroducer.
* %3f is just a literal. */
*tmp++ = '\0';
http->req.args = tmp;
p++;
}
else *tmp++ = *p++;
}
else *tmp++ = *p++;
}
/* the url must be followed by a space */
if (!is_space_octet(*p)) goto badreq;
/* null-terminate the url part */
*tmp = '\0';
/* skip spaces after the url part */
do { p++; } while (is_space_octet(*p));
/* check http version */
if ((p[0] == 'H' || p[0] == 'h') &&
(p[1] == 'T' || p[1] == 't') &&
(p[2] == 'T' || p[2] == 't') &&
(p[3] == 'P' || p[3] == 'p') &&
p[4] == '/' && p[6] == '.')
{
int q = digit_to_num(p[5]);
int w = digit_to_num(p[7]);
if (q >= 0 && w >= 0)
{
http->req.version.major = q;
http->req.version.minor = w;
p += 8;
}
else goto badreq;
}
else goto badreq;
/* skip trailing spaces on the line */
while (is_space_octet(*p)) p++;
/* if the line does not end with a new line, it is a bad request */
if (*p != QSE_T('\n')) goto badreq;
qse_printf (QSE_T("parse_http_req ....\n"));
return ++p;
badreq:
http->errnum = QSE_HTTP_EBADREQ;
return QSE_NULL;
}
qse_byte_t* parse_http_header (qse_http_t* http, qse_byte_t* line)
{
qse_byte_t* p = line, * mark;
struct
{
qse_byte_t* ptr;
qse_size_t len;
} name, value;
#if 0
/* ignore leading spaces excluding crlf */
while (is_space_octet(*p)) p++;
#endif
name.ptr = p;
while (!is_whspace_octet(*p) && *p != ':') p++;
name.len = p - name.ptr;
mark = p; while (is_space_octet(*p)) p++;
if (*p != ':') goto badhdr;
*mark = '\0';
/* skip the colon and spaces after it */
do { p++; } while (is_space_octet(*p));
value.ptr = p;
do { p++; } while (!is_whspace_octet(*p));
value.len = p - value.ptr;
/* skip trailing spaces on the line */
mark = p; while (is_space_octet(*p)) p++;
if (*p != '\n') goto badhdr; /* not ending with a new line */
*mark = '\0';
qse_printf (QSE_T("<<%S>> => <<%S>>\n"), name.ptr, value.ptr);
return p;
badhdr:
qse_printf (QSE_T("BADHDR\n"), name.ptr, value.ptr);
http->errnum = QSE_HTTP_EBADHDR;
return QSE_NULL;
}
/* feed the percent encoded string */
int qse_http_feed (qse_http_t* http, const qse_byte_t* ptr, qse_size_t len)
{
const qse_byte_t* end = ptr + len;
const qse_byte_t* req = ptr;
static const qse_byte_t nul = '\0';
while (ptr < end)
{
qse_byte_t b = *ptr++;
if (http->state.plen <= 0 && is_whspace_octet(b))
{
/* let's drop leading whitespaces across multiple
* lines */
req++;
continue;
}
if (b == '\n')
{
if (http->state.crlf <= 1) http->state.crlf = 2;
else
{
qse_byte_t* p;
QSE_ASSERT (http->state.crlf <= 3);
/* got a complete request */
http->state.crlf = 0;
http->state.plen = 0;
/* add the actual request */
if (push_to_buffer (http, &http->req.raw, req, ptr-req) <= -1) return -1;
/* add the terminating null for easier parsing */
if (push_to_buffer (http, &http->req.raw, &nul, 1) <= -1) return -1;
p = http->req.raw.data;
while (is_whspace_octet(*p)) p++;
QSE_ASSERT (*p != '\0');
p = parse_http_req (http, p);
if (p == QSE_NULL) return -1;
do
{
while (is_whspace_octet(*p)) p++;
if (*p == '\0') break;
p = parse_http_header (http, p);
if (p == QSE_NULL) return -1;
}
while (1);
clear_buffer (http, &http->req.raw);
req = ptr; /* let ptr point to the next character to '\n' */
}
}
else if (b == '\r')
{
if (http->state.crlf == 0 || http->state.crlf == 2)
http->state.crlf++;
else http->state.crlf = 1;
}
else if (b == '\0')
{
/* guarantee that the request does not contain a null character */
http->errnum = QSE_HTTP_EBADREQ;
return -1;
}
else
{
http->state.plen++;
http->state.crlf = 0;
}
}
if (ptr > req)
{
/* enbuffer the incomplete request */
if (push_to_buffer (http, &http->req.raw, req, ptr - req) <= -1) return -1;
}
#if 0
while (ptr < end) while (ptr < end)
{ {
if (*ptr++ == '\n') if (*ptr++ == '\n')
{ {
if (push_to_buffer (http, &http->state.buf, blk, ptr - blk) <= -1) return -1; qse_size_t reqlen = ptr - req;
int blank;
blk = ptr; /* let ptr point to the next character to '\n' */ if (http->state.pending > 0)
if (http->state.no == QSE_HTTP_STATE_REQ)
{ {
/* QSE_ASSERT (http->req.raw.size > 0);
if (parse_http_req (http, &http->state.buf) <= -1) blank = (reqlen + http->state.pending == 2 &&
{ http->req.raw.data[http->req.raw.size-1] == '\r');
return -1; http->state.pending = 0;
}
*/
} }
else else
{ {
/* blank = (reqlen == 1 || (reqlen == 2 && req[0] == '\r'));
if (parse_http_hdr (http, &http->state.buf) <= -1)
{
return -1;
}
*/
} }
qse_printf (QSE_T("[%.*s]\n"), (int)http->state.buf.size, http->state.buf.data); if (push_to_buffer (
clear_buffer (http, &http->state.buf); http, &http->req.raw, req, reqlen) <= -1) return -1;
if (blank)
{
/* blank line - we got a complete request.
* we didn't process the optinal message body yet, though */
/* DO SOMETHIGN ... */
qse_printf (QSE_T("[[[%.*S]]]]\n"), (int)http->req.raw.size, http->req.raw.data),
clear_buffer (http, &http->req.raw);
}
req = ptr; /* let ptr point to the next character to '\n' */
} }
} }
if (ptr > req)
/* enbuffer the unfinished data */ {
if (push_to_buffer (http, &http->state.buf, blk, ptr - blk) <= -1) return -1; /* enbuffer the unfinished data */
qse_printf (QSE_T("UNFINISHED [%.*s]\n"), (int)http->state.buf.size, http->state.buf.data); if (push_to_buffer (http, &http->req.raw, req, ptr - req) <= -1) return -1;
http->state.pending = ptr - req;
}
#endif
return 0; return 0;
} }