123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688 |
- /*
- * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
- * Shigeo Mitsunari
- *
- * The software is licensed under either the MIT License (below) or the Perl
- * license.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
- #include <assert.h> // for assert
- #include <stddef.h> // for NULL, size_t
- #include <string.h> // for memmove
- #include <sys/types.h> // for ssize_t
- #ifdef __SSE4_2__
- #ifdef _MSC_VER
- #include <nmmintrin.h>
- #else
- #include <x86intrin.h>
- #endif
- #endif
- #include "picohttpparser.h" // for phr_chunked_decoder, phr_header, phr_dec...
- #if __GNUC__ >= 3
- #define likely(x) __builtin_expect(!!(x), 1)
- #define unlikely(x) __builtin_expect(!!(x), 0)
- #else
- #define likely(x) (x)
- #define unlikely(x) (x)
- #endif
- #ifdef _MSC_VER
- #define ALIGNED(n) _declspec(align(n))
- #else
- #define ALIGNED(n) __attribute__((aligned(n)))
- #endif
- #define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
- #define CHECK_EOF() \
- if (buf == buf_end) { \
- *ret = -2; \
- return NULL; \
- }
- #define EXPECT_CHAR_NO_CHECK(ch) \
- if (*buf++ != ch) { \
- *ret = -1; \
- return NULL; \
- }
- #define EXPECT_CHAR(ch) \
- CHECK_EOF(); \
- EXPECT_CHAR_NO_CHECK(ch);
- #define ADVANCE_TOKEN(tok, toklen) \
- do { \
- const char* tok_start = buf; \
- static const char ALIGNED(16) ranges2[16] = "\000\040\177\177"; \
- int found2; \
- buf = findchar_fast(buf, buf_end, ranges2, 4, &found2); \
- if (!found2) { \
- CHECK_EOF(); \
- } \
- while (1) { \
- if (*buf == ' ') { \
- break; \
- } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \
- if ((unsigned char)*buf < '\040' || *buf == '\177') { \
- *ret = -1; \
- return NULL; \
- } \
- } \
- ++buf; \
- CHECK_EOF(); \
- } \
- tok = tok_start; \
- toklen = buf - tok_start; \
- } while (0)
- static const char* token_char_map =
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
- "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
- "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
- "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
- "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
- static const char* findchar_fast(const char* buf, const char* buf_end,
- const char* ranges, size_t ranges_size,
- int* found) {
- *found = 0;
- #if __SSE4_2__
- if (likely(buf_end - buf >= 16)) {
- __m128i ranges16 = _mm_loadu_si128((const __m128i*)ranges);
- size_t left = (buf_end - buf) & ~15;
- do {
- __m128i b16 = _mm_loadu_si128((const __m128i*)buf);
- int r = _mm_cmpestri(
- ranges16, ranges_size, b16, 16,
- _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
- if (unlikely(r != 16)) {
- buf += r;
- *found = 1;
- break;
- }
- buf += 16;
- left -= 16;
- } while (likely(left != 0));
- }
- #else
- /* suppress unused parameter warning */
- (void)buf_end;
- (void)ranges;
- (void)ranges_size;
- #endif
- return buf;
- }
- static const char* get_token_to_eol(const char* buf, const char* buf_end,
- const char** token, size_t* token_len,
- int* ret) {
- const char* token_start = buf;
- #ifdef __SSE4_2__
- static const char ALIGNED(16) ranges1[16] =
- "\0\010" /* allow HT */
- "\012\037" /* allow SP and up to but not including DEL */
- "\177\177"; /* allow chars w. MSB set */
- int found;
- buf = findchar_fast(buf, buf_end, ranges1, 6, &found);
- if (found)
- goto FOUND_CTL;
- #else
- /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
- while (likely(buf_end - buf >= 8)) {
- #define DOIT() \
- do { \
- if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \
- goto NonPrintable; \
- ++buf; \
- } while (0)
- DOIT();
- DOIT();
- DOIT();
- DOIT();
- DOIT();
- DOIT();
- DOIT();
- DOIT();
- #undef DOIT
- continue;
- NonPrintable:
- if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) ||
- unlikely(*buf == '\177')) {
- goto FOUND_CTL;
- }
- ++buf;
- }
- #endif
- for (;; ++buf) {
- CHECK_EOF();
- if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
- if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) ||
- unlikely(*buf == '\177')) {
- goto FOUND_CTL;
- }
- }
- }
- FOUND_CTL:
- if (likely(*buf == '\015')) {
- ++buf;
- EXPECT_CHAR('\012');
- *token_len = buf - 2 - token_start;
- } else if (*buf == '\012') {
- *token_len = buf - token_start;
- ++buf;
- } else {
- *ret = -1;
- return NULL;
- }
- *token = token_start;
- return buf;
- }
- static const char* is_complete(const char* buf, const char* buf_end,
- size_t last_len, int* ret) {
- int ret_cnt = 0;
- buf = last_len < 3 ? buf : buf + last_len - 3;
- while (1) {
- CHECK_EOF();
- if (*buf == '\015') {
- ++buf;
- CHECK_EOF();
- EXPECT_CHAR('\012');
- ++ret_cnt;
- } else if (*buf == '\012') {
- ++buf;
- ++ret_cnt;
- } else {
- ++buf;
- ret_cnt = 0;
- }
- if (ret_cnt == 2) {
- return buf;
- }
- }
- *ret = -2;
- return NULL;
- }
- #define PARSE_INT(valp_, mul_) \
- if (*buf < '0' || '9' < *buf) { \
- buf++; \
- *ret = -1; \
- return NULL; \
- } \
- *(valp_) = (mul_) * (*buf++ - '0');
- #define PARSE_INT_3(valp_) \
- do { \
- int res_ = 0; \
- PARSE_INT(&res_, 100) \
- *valp_ = res_; \
- PARSE_INT(&res_, 10) \
- *valp_ += res_; \
- PARSE_INT(&res_, 1) \
- *valp_ += res_; \
- } while (0)
- /* returned pointer is always within [buf, buf_end), or null */
- static const char* parse_token(const char* buf, const char* buf_end,
- const char** token, size_t* token_len,
- char next_char, int* ret) {
- /* We use pcmpestri to detect non-token characters. This instruction can take no more than eight character ranges (8*2*8=128
- * bits that is the size of a SSE register). Due to this restriction, characters `|` and `~` are handled in the slow loop. */
- static const char ALIGNED(16) ranges[] =
- "\x00 " /* control chars and up to SP */
- "\"\"" /* 0x22 */
- "()" /* 0x28,0x29 */
- ",," /* 0x2c */
- "//" /* 0x2f */
- ":@" /* 0x3a-0x40 */
- "[]" /* 0x5b-0x5d */
- "{\xff"; /* 0x7b-0xff */
- const char* buf_start = buf;
- int found;
- buf = findchar_fast(buf, buf_end, ranges, sizeof(ranges) - 1, &found);
- if (!found) {
- CHECK_EOF();
- }
- while (1) {
- if (*buf == next_char) {
- break;
- } else if (!token_char_map[(unsigned char)*buf]) {
- *ret = -1;
- return NULL;
- }
- ++buf;
- CHECK_EOF();
- }
- *token = buf_start;
- *token_len = buf - buf_start;
- return buf;
- }
- /* returned pointer is always within [buf, buf_end), or null */
- static const char* parse_http_version(const char* buf, const char* buf_end,
- int* minor_version, int* ret) {
- /* we want at least [HTTP/1.<two chars>] to try to parse */
- if (buf_end - buf < 9) {
- *ret = -2;
- return NULL;
- }
- EXPECT_CHAR_NO_CHECK('H');
- EXPECT_CHAR_NO_CHECK('T');
- EXPECT_CHAR_NO_CHECK('T');
- EXPECT_CHAR_NO_CHECK('P');
- EXPECT_CHAR_NO_CHECK('/');
- EXPECT_CHAR_NO_CHECK('1');
- EXPECT_CHAR_NO_CHECK('.');
- PARSE_INT(minor_version, 1);
- return buf;
- }
- static const char* parse_headers(const char* buf, const char* buf_end,
- struct phr_header* headers,
- size_t* num_headers, size_t max_headers,
- int* ret) {
- for (;; ++*num_headers) {
- CHECK_EOF();
- if (*buf == '\015') {
- ++buf;
- EXPECT_CHAR('\012');
- break;
- } else if (*buf == '\012') {
- ++buf;
- break;
- }
- if (*num_headers == max_headers) {
- *ret = -1;
- return NULL;
- }
- if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
- /* parsing name, but do not discard SP before colon, see
- * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
- if ((buf = parse_token(buf, buf_end, &headers[*num_headers].name,
- &headers[*num_headers].name_len, ':', ret)) ==
- NULL) {
- return NULL;
- }
- if (headers[*num_headers].name_len == 0) {
- *ret = -1;
- return NULL;
- }
- ++buf;
- for (;; ++buf) {
- CHECK_EOF();
- if (!(*buf == ' ' || *buf == '\t')) {
- break;
- }
- }
- } else {
- headers[*num_headers].name = NULL;
- headers[*num_headers].name_len = 0;
- }
- const char* value;
- size_t value_len;
- if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) ==
- NULL) {
- return NULL;
- }
- /* remove trailing SPs and HTABs */
- const char* value_end = value + value_len;
- for (; value_end != value; --value_end) {
- const char c = *(value_end - 1);
- if (!(c == ' ' || c == '\t')) {
- break;
- }
- }
- headers[*num_headers].value = value;
- headers[*num_headers].value_len = value_end - value;
- }
- return buf;
- }
- static const char* parse_request(const char* buf, const char* buf_end,
- const char** method, size_t* method_len,
- const char** path, size_t* path_len,
- int* minor_version, struct phr_header* headers,
- size_t* num_headers, size_t max_headers,
- int* ret) {
- /* skip first empty line (some clients add CRLF after POST content) */
- CHECK_EOF();
- if (*buf == '\015') {
- ++buf;
- EXPECT_CHAR('\012');
- } else if (*buf == '\012') {
- ++buf;
- }
- /* parse request line */
- if ((buf = parse_token(buf, buf_end, method, method_len, ' ', ret)) == NULL) {
- return NULL;
- }
- do {
- ++buf;
- CHECK_EOF();
- } while (*buf == ' ');
- ADVANCE_TOKEN(*path, *path_len);
- do {
- ++buf;
- CHECK_EOF();
- } while (*buf == ' ');
- if (*method_len == 0 || *path_len == 0) {
- *ret = -1;
- return NULL;
- }
- if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
- return NULL;
- }
- if (*buf == '\015') {
- ++buf;
- EXPECT_CHAR('\012');
- } else if (*buf == '\012') {
- ++buf;
- } else {
- *ret = -1;
- return NULL;
- }
- return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
- }
- int phr_parse_request(const char* buf_start, size_t len, const char** method,
- size_t* method_len, const char** path, size_t* path_len,
- int* minor_version, struct phr_header* headers,
- size_t* num_headers, size_t last_len) {
- const char *buf = buf_start, *buf_end = buf_start + len;
- size_t max_headers = *num_headers;
- int r;
- *method = NULL;
- *method_len = 0;
- *path = NULL;
- *path_len = 0;
- *minor_version = -1;
- *num_headers = 0;
- /* if last_len != 0, check if the request is complete (a fast countermeasure
- againt slowloris */
- if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
- return r;
- }
- if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len,
- minor_version, headers, num_headers, max_headers,
- &r)) == NULL) {
- return r;
- }
- return (int)(buf - buf_start);
- }
- static const char* parse_response(const char* buf, const char* buf_end,
- int* minor_version, int* status,
- const char** msg, size_t* msg_len,
- struct phr_header* headers,
- size_t* num_headers, size_t max_headers,
- int* ret) {
- /* parse "HTTP/1.x" */
- if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
- return NULL;
- }
- /* skip space */
- if (*buf != ' ') {
- *ret = -1;
- return NULL;
- }
- do {
- ++buf;
- CHECK_EOF();
- } while (*buf == ' ');
- /* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
- if (buf_end - buf < 4) {
- *ret = -2;
- return NULL;
- }
- PARSE_INT_3(status);
- /* get message including preceding space */
- if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
- return NULL;
- }
- if (*msg_len == 0) {
- /* ok */
- } else if (**msg == ' ') {
- /* Remove preceding space. Successful return from `get_token_to_eol` guarantees that we would hit something other than SP
- * before running past the end of the given buffer. */
- do {
- ++*msg;
- --*msg_len;
- } while (**msg == ' ');
- } else {
- /* garbage found after status code */
- *ret = -1;
- return NULL;
- }
- return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
- }
- int phr_parse_response(const char* buf_start, size_t len, int* minor_version,
- int* status, const char** msg, size_t* msg_len,
- struct phr_header* headers, size_t* num_headers,
- size_t last_len) {
- const char *buf = buf_start, *buf_end = buf + len;
- size_t max_headers = *num_headers;
- int r;
- *minor_version = -1;
- *status = 0;
- *msg = NULL;
- *msg_len = 0;
- *num_headers = 0;
- /* if last_len != 0, check if the response is complete (a fast countermeasure
- against slowloris */
- if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
- return r;
- }
- if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len,
- headers, num_headers, max_headers, &r)) == NULL) {
- return r;
- }
- return (int)(buf - buf_start);
- }
- int phr_parse_headers(const char* buf_start, size_t len,
- struct phr_header* headers, size_t* num_headers,
- size_t last_len) {
- const char *buf = buf_start, *buf_end = buf + len;
- size_t max_headers = *num_headers;
- int r;
- *num_headers = 0;
- /* if last_len != 0, check if the response is complete (a fast countermeasure
- against slowloris */
- if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
- return r;
- }
- if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers,
- &r)) == NULL) {
- return r;
- }
- return (int)(buf - buf_start);
- }
- enum {
- CHUNKED_IN_CHUNK_SIZE,
- CHUNKED_IN_CHUNK_EXT,
- CHUNKED_IN_CHUNK_DATA,
- CHUNKED_IN_CHUNK_CRLF,
- CHUNKED_IN_TRAILERS_LINE_HEAD,
- CHUNKED_IN_TRAILERS_LINE_MIDDLE
- };
- static int decode_hex(int ch) {
- if ('0' <= ch && ch <= '9') {
- return ch - '0';
- } else if ('A' <= ch && ch <= 'F') {
- return ch - 'A' + 0xa;
- } else if ('a' <= ch && ch <= 'f') {
- return ch - 'a' + 0xa;
- } else {
- return -1;
- }
- }
- ssize_t phr_decode_chunked(struct phr_chunked_decoder* decoder, char* buf,
- size_t* _bufsz) {
- size_t dst = 0, src = 0, bufsz = *_bufsz;
- ssize_t ret = -2; /* incomplete */
- while (1) {
- switch (decoder->_state) {
- case CHUNKED_IN_CHUNK_SIZE:
- for (;; ++src) {
- int v;
- if (src == bufsz)
- goto Exit;
- if ((v = decode_hex(buf[src])) == -1) {
- if (decoder->_hex_count == 0) {
- ret = -1;
- goto Exit;
- }
- break;
- }
- if (decoder->_hex_count == sizeof(size_t) * 2) {
- ret = -1;
- goto Exit;
- }
- decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
- ++decoder->_hex_count;
- }
- decoder->_hex_count = 0;
- decoder->_state = CHUNKED_IN_CHUNK_EXT;
- /* fallthru */
- case CHUNKED_IN_CHUNK_EXT:
- /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
- for (;; ++src) {
- if (src == bufsz)
- goto Exit;
- if (buf[src] == '\012')
- break;
- }
- ++src;
- if (decoder->bytes_left_in_chunk == 0) {
- if (decoder->consume_trailer) {
- decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
- break;
- } else {
- goto Complete;
- }
- }
- decoder->_state = CHUNKED_IN_CHUNK_DATA;
- /* fallthru */
- case CHUNKED_IN_CHUNK_DATA: {
- size_t avail = bufsz - src;
- if (avail < decoder->bytes_left_in_chunk) {
- if (dst != src)
- memmove(buf + dst, buf + src, avail);
- src += avail;
- dst += avail;
- decoder->bytes_left_in_chunk -= avail;
- goto Exit;
- }
- if (dst != src)
- memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
- src += decoder->bytes_left_in_chunk;
- dst += decoder->bytes_left_in_chunk;
- decoder->bytes_left_in_chunk = 0;
- decoder->_state = CHUNKED_IN_CHUNK_CRLF;
- }
- /* fallthru */
- case CHUNKED_IN_CHUNK_CRLF:
- for (;; ++src) {
- if (src == bufsz)
- goto Exit;
- if (buf[src] != '\015')
- break;
- }
- if (buf[src] != '\012') {
- ret = -1;
- goto Exit;
- }
- ++src;
- decoder->_state = CHUNKED_IN_CHUNK_SIZE;
- break;
- case CHUNKED_IN_TRAILERS_LINE_HEAD:
- for (;; ++src) {
- if (src == bufsz)
- goto Exit;
- if (buf[src] != '\015')
- break;
- }
- if (buf[src++] == '\012')
- goto Complete;
- decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
- /* fallthru */
- case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
- for (;; ++src) {
- if (src == bufsz)
- goto Exit;
- if (buf[src] == '\012')
- break;
- }
- ++src;
- decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
- break;
- default:
- assert(!"decoder is corrupt");
- }
- }
- Complete:
- ret = bufsz - src;
- Exit:
- if (dst != src)
- memmove(buf + dst, buf + src, bufsz - src);
- *_bufsz = dst;
- return ret;
- }
- int phr_decode_chunked_is_in_data(struct phr_chunked_decoder* decoder) {
- return decoder->_state == CHUNKED_IN_CHUNK_DATA;
- }
- #undef CHECK_EOF
- #undef EXPECT_CHAR
- #undef ADVANCE_TOKEN
|