| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688 | /* * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase, *                         Shigeo Mitsunari * * The software is licensed under either the MIT License (below) or the Perl * license. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */#include <assert.h>     // for assert#include <stddef.h>     // for NULL, size_t#include <string.h>     // for memmove#include <sys/types.h>  // for ssize_t#ifdef __SSE4_2__#ifdef _MSC_VER#include <nmmintrin.h>#else#include <x86intrin.h>#endif#endif#include "picohttpparser.h"  // for phr_chunked_decoder, phr_header, phr_dec...#if __GNUC__ >= 3#define likely(x) __builtin_expect(!!(x), 1)#define unlikely(x) __builtin_expect(!!(x), 0)#else#define likely(x) (x)#define unlikely(x) (x)#endif#ifdef _MSC_VER#define ALIGNED(n) _declspec(align(n))#else#define ALIGNED(n) __attribute__((aligned(n)))#endif#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)#define CHECK_EOF()     \  if (buf == buf_end) { \    *ret = -2;          \    return NULL;        \  }#define EXPECT_CHAR_NO_CHECK(ch) \  if (*buf++ != ch) {            \    *ret = -1;                   \    return NULL;                 \  }#define EXPECT_CHAR(ch) \  CHECK_EOF();          \  EXPECT_CHAR_NO_CHECK(ch);#define ADVANCE_TOKEN(tok, toklen)                                  \  do {                                                              \    const char* tok_start = buf;                                    \    static const char ALIGNED(16) ranges2[16] = "\000\040\177\177"; \    int found2;                                                     \    buf = findchar_fast(buf, buf_end, ranges2, 4, &found2);         \    if (!found2) {                                                  \      CHECK_EOF();                                                  \    }                                                               \    while (1) {                                                     \      if (*buf == ' ') {                                            \        break;                                                      \      } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {             \        if ((unsigned char)*buf < '\040' || *buf == '\177') {       \          *ret = -1;                                                \          return NULL;                                              \        }                                                           \      }                                                             \      ++buf;                                                        \      CHECK_EOF();                                                  \    }                                                               \    tok = tok_start;                                                \    toklen = buf - tok_start;                                       \  } while (0)static const char* token_char_map =    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"    "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"    "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"    "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";static const char* findchar_fast(const char* buf, const char* buf_end,                                 const char* ranges, size_t ranges_size,                                 int* found) {  *found = 0;#if __SSE4_2__  if (likely(buf_end - buf >= 16)) {    __m128i ranges16 = _mm_loadu_si128((const __m128i*)ranges);    size_t left = (buf_end - buf) & ~15;    do {      __m128i b16 = _mm_loadu_si128((const __m128i*)buf);      int r = _mm_cmpestri(          ranges16, ranges_size, b16, 16,          _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);      if (unlikely(r != 16)) {        buf += r;        *found = 1;        break;      }      buf += 16;      left -= 16;    } while (likely(left != 0));  }#else  /* suppress unused parameter warning */  (void)buf_end;  (void)ranges;  (void)ranges_size;#endif  return buf;}static const char* get_token_to_eol(const char* buf, const char* buf_end,                                    const char** token, size_t* token_len,                                    int* ret) {  const char* token_start = buf;#ifdef __SSE4_2__  static const char ALIGNED(16) ranges1[16] =      "\0\010"    /* allow HT */      "\012\037"  /* allow SP and up to but not including DEL */      "\177\177"; /* allow chars w. MSB set */  int found;  buf = findchar_fast(buf, buf_end, ranges1, 6, &found);  if (found)    goto FOUND_CTL;#else  /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */  while (likely(buf_end - buf >= 8)) {#define DOIT()                               \  do {                                       \    if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \      goto NonPrintable;                     \    ++buf;                                   \  } while (0)    DOIT();    DOIT();    DOIT();    DOIT();    DOIT();    DOIT();    DOIT();    DOIT();#undef DOIT    continue;  NonPrintable:    if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) ||        unlikely(*buf == '\177')) {      goto FOUND_CTL;    }    ++buf;  }#endif  for (;; ++buf) {    CHECK_EOF();    if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {      if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) ||          unlikely(*buf == '\177')) {        goto FOUND_CTL;      }    }  }FOUND_CTL:  if (likely(*buf == '\015')) {    ++buf;    EXPECT_CHAR('\012');    *token_len = buf - 2 - token_start;  } else if (*buf == '\012') {    *token_len = buf - token_start;    ++buf;  } else {    *ret = -1;    return NULL;  }  *token = token_start;  return buf;}static const char* is_complete(const char* buf, const char* buf_end,                               size_t last_len, int* ret) {  int ret_cnt = 0;  buf = last_len < 3 ? buf : buf + last_len - 3;  while (1) {    CHECK_EOF();    if (*buf == '\015') {      ++buf;      CHECK_EOF();      EXPECT_CHAR('\012');      ++ret_cnt;    } else if (*buf == '\012') {      ++buf;      ++ret_cnt;    } else {      ++buf;      ret_cnt = 0;    }    if (ret_cnt == 2) {      return buf;    }  }  *ret = -2;  return NULL;}#define PARSE_INT(valp_, mul_)    \  if (*buf < '0' || '9' < *buf) { \    buf++;                        \    *ret = -1;                    \    return NULL;                  \  }                               \  *(valp_) = (mul_) * (*buf++ - '0');#define PARSE_INT_3(valp_) \  do {                     \    int res_ = 0;          \    PARSE_INT(&res_, 100)  \    *valp_ = res_;         \    PARSE_INT(&res_, 10)   \    *valp_ += res_;        \    PARSE_INT(&res_, 1)    \    *valp_ += res_;        \  } while (0)/* returned pointer is always within [buf, buf_end), or null */static const char* parse_token(const char* buf, const char* buf_end,                               const char** token, size_t* token_len,                               char next_char, int* ret) {  /* We use pcmpestri to detect non-token characters. This instruction can take no more than eight character ranges (8*2*8=128     * bits that is the size of a SSE register). Due to this restriction, characters `|` and `~` are handled in the slow loop. */  static const char ALIGNED(16) ranges[] =      "\x00 "  /* control chars and up to SP */      "\"\""   /* 0x22 */      "()"     /* 0x28,0x29 */      ",,"     /* 0x2c */      "//"     /* 0x2f */      ":@"     /* 0x3a-0x40 */      "[]"     /* 0x5b-0x5d */      "{\xff"; /* 0x7b-0xff */  const char* buf_start = buf;  int found;  buf = findchar_fast(buf, buf_end, ranges, sizeof(ranges) - 1, &found);  if (!found) {    CHECK_EOF();  }  while (1) {    if (*buf == next_char) {      break;    } else if (!token_char_map[(unsigned char)*buf]) {      *ret = -1;      return NULL;    }    ++buf;    CHECK_EOF();  }  *token = buf_start;  *token_len = buf - buf_start;  return buf;}/* returned pointer is always within [buf, buf_end), or null */static const char* parse_http_version(const char* buf, const char* buf_end,                                      int* minor_version, int* ret) {  /* we want at least [HTTP/1.<two chars>] to try to parse */  if (buf_end - buf < 9) {    *ret = -2;    return NULL;  }  EXPECT_CHAR_NO_CHECK('H');  EXPECT_CHAR_NO_CHECK('T');  EXPECT_CHAR_NO_CHECK('T');  EXPECT_CHAR_NO_CHECK('P');  EXPECT_CHAR_NO_CHECK('/');  EXPECT_CHAR_NO_CHECK('1');  EXPECT_CHAR_NO_CHECK('.');  PARSE_INT(minor_version, 1);  return buf;}static const char* parse_headers(const char* buf, const char* buf_end,                                 struct phr_header* headers,                                 size_t* num_headers, size_t max_headers,                                 int* ret) {  for (;; ++*num_headers) {    CHECK_EOF();    if (*buf == '\015') {      ++buf;      EXPECT_CHAR('\012');      break;    } else if (*buf == '\012') {      ++buf;      break;    }    if (*num_headers == max_headers) {      *ret = -1;      return NULL;    }    if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {      /* parsing name, but do not discard SP before colon, see             * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */      if ((buf = parse_token(buf, buf_end, &headers[*num_headers].name,                             &headers[*num_headers].name_len, ':', ret)) ==          NULL) {        return NULL;      }      if (headers[*num_headers].name_len == 0) {        *ret = -1;        return NULL;      }      ++buf;      for (;; ++buf) {        CHECK_EOF();        if (!(*buf == ' ' || *buf == '\t')) {          break;        }      }    } else {      headers[*num_headers].name = NULL;      headers[*num_headers].name_len = 0;    }    const char* value;    size_t value_len;    if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) ==        NULL) {      return NULL;    }    /* remove trailing SPs and HTABs */    const char* value_end = value + value_len;    for (; value_end != value; --value_end) {      const char c = *(value_end - 1);      if (!(c == ' ' || c == '\t')) {        break;      }    }    headers[*num_headers].value = value;    headers[*num_headers].value_len = value_end - value;  }  return buf;}static const char* parse_request(const char* buf, const char* buf_end,                                 const char** method, size_t* method_len,                                 const char** path, size_t* path_len,                                 int* minor_version, struct phr_header* headers,                                 size_t* num_headers, size_t max_headers,                                 int* ret) {  /* skip first empty line (some clients add CRLF after POST content) */  CHECK_EOF();  if (*buf == '\015') {    ++buf;    EXPECT_CHAR('\012');  } else if (*buf == '\012') {    ++buf;  }  /* parse request line */  if ((buf = parse_token(buf, buf_end, method, method_len, ' ', ret)) == NULL) {    return NULL;  }  do {    ++buf;    CHECK_EOF();  } while (*buf == ' ');  ADVANCE_TOKEN(*path, *path_len);  do {    ++buf;    CHECK_EOF();  } while (*buf == ' ');  if (*method_len == 0 || *path_len == 0) {    *ret = -1;    return NULL;  }  if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {    return NULL;  }  if (*buf == '\015') {    ++buf;    EXPECT_CHAR('\012');  } else if (*buf == '\012') {    ++buf;  } else {    *ret = -1;    return NULL;  }  return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);}int phr_parse_request(const char* buf_start, size_t len, const char** method,                      size_t* method_len, const char** path, size_t* path_len,                      int* minor_version, struct phr_header* headers,                      size_t* num_headers, size_t last_len) {  const char *buf = buf_start, *buf_end = buf_start + len;  size_t max_headers = *num_headers;  int r;  *method = NULL;  *method_len = 0;  *path = NULL;  *path_len = 0;  *minor_version = -1;  *num_headers = 0;  /* if last_len != 0, check if the request is complete (a fast countermeasure       againt slowloris */  if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {    return r;  }  if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len,                           minor_version, headers, num_headers, max_headers,                           &r)) == NULL) {    return r;  }  return (int)(buf - buf_start);}static const char* parse_response(const char* buf, const char* buf_end,                                  int* minor_version, int* status,                                  const char** msg, size_t* msg_len,                                  struct phr_header* headers,                                  size_t* num_headers, size_t max_headers,                                  int* ret) {  /* parse "HTTP/1.x" */  if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {    return NULL;  }  /* skip space */  if (*buf != ' ') {    *ret = -1;    return NULL;  }  do {    ++buf;    CHECK_EOF();  } while (*buf == ' ');  /* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */  if (buf_end - buf < 4) {    *ret = -2;    return NULL;  }  PARSE_INT_3(status);  /* get message including preceding space */  if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {    return NULL;  }  if (*msg_len == 0) {    /* ok */  } else if (**msg == ' ') {    /* Remove preceding space. Successful return from `get_token_to_eol` guarantees that we would hit something other than SP         * before running past the end of the given buffer. */    do {      ++*msg;      --*msg_len;    } while (**msg == ' ');  } else {    /* garbage found after status code */    *ret = -1;    return NULL;  }  return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);}int phr_parse_response(const char* buf_start, size_t len, int* minor_version,                       int* status, const char** msg, size_t* msg_len,                       struct phr_header* headers, size_t* num_headers,                       size_t last_len) {  const char *buf = buf_start, *buf_end = buf + len;  size_t max_headers = *num_headers;  int r;  *minor_version = -1;  *status = 0;  *msg = NULL;  *msg_len = 0;  *num_headers = 0;  /* if last_len != 0, check if the response is complete (a fast countermeasure       against slowloris */  if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {    return r;  }  if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len,                            headers, num_headers, max_headers, &r)) == NULL) {    return r;  }  return (int)(buf - buf_start);}int phr_parse_headers(const char* buf_start, size_t len,                      struct phr_header* headers, size_t* num_headers,                      size_t last_len) {  const char *buf = buf_start, *buf_end = buf + len;  size_t max_headers = *num_headers;  int r;  *num_headers = 0;  /* if last_len != 0, check if the response is complete (a fast countermeasure       against slowloris */  if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {    return r;  }  if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers,                           &r)) == NULL) {    return r;  }  return (int)(buf - buf_start);}enum {  CHUNKED_IN_CHUNK_SIZE,  CHUNKED_IN_CHUNK_EXT,  CHUNKED_IN_CHUNK_DATA,  CHUNKED_IN_CHUNK_CRLF,  CHUNKED_IN_TRAILERS_LINE_HEAD,  CHUNKED_IN_TRAILERS_LINE_MIDDLE};static int decode_hex(int ch) {  if ('0' <= ch && ch <= '9') {    return ch - '0';  } else if ('A' <= ch && ch <= 'F') {    return ch - 'A' + 0xa;  } else if ('a' <= ch && ch <= 'f') {    return ch - 'a' + 0xa;  } else {    return -1;  }}ssize_t phr_decode_chunked(struct phr_chunked_decoder* decoder, char* buf,                           size_t* _bufsz) {  size_t dst = 0, src = 0, bufsz = *_bufsz;  ssize_t ret = -2; /* incomplete */  while (1) {    switch (decoder->_state) {      case CHUNKED_IN_CHUNK_SIZE:        for (;; ++src) {          int v;          if (src == bufsz)            goto Exit;          if ((v = decode_hex(buf[src])) == -1) {            if (decoder->_hex_count == 0) {              ret = -1;              goto Exit;            }            break;          }          if (decoder->_hex_count == sizeof(size_t) * 2) {            ret = -1;            goto Exit;          }          decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;          ++decoder->_hex_count;        }        decoder->_hex_count = 0;        decoder->_state = CHUNKED_IN_CHUNK_EXT;      /* fallthru */      case CHUNKED_IN_CHUNK_EXT:        /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */        for (;; ++src) {          if (src == bufsz)            goto Exit;          if (buf[src] == '\012')            break;        }        ++src;        if (decoder->bytes_left_in_chunk == 0) {          if (decoder->consume_trailer) {            decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;            break;          } else {            goto Complete;          }        }        decoder->_state = CHUNKED_IN_CHUNK_DATA;      /* fallthru */      case CHUNKED_IN_CHUNK_DATA: {        size_t avail = bufsz - src;        if (avail < decoder->bytes_left_in_chunk) {          if (dst != src)            memmove(buf + dst, buf + src, avail);          src += avail;          dst += avail;          decoder->bytes_left_in_chunk -= avail;          goto Exit;        }        if (dst != src)          memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);        src += decoder->bytes_left_in_chunk;        dst += decoder->bytes_left_in_chunk;        decoder->bytes_left_in_chunk = 0;        decoder->_state = CHUNKED_IN_CHUNK_CRLF;      }      /* fallthru */      case CHUNKED_IN_CHUNK_CRLF:        for (;; ++src) {          if (src == bufsz)            goto Exit;          if (buf[src] != '\015')            break;        }        if (buf[src] != '\012') {          ret = -1;          goto Exit;        }        ++src;        decoder->_state = CHUNKED_IN_CHUNK_SIZE;        break;      case CHUNKED_IN_TRAILERS_LINE_HEAD:        for (;; ++src) {          if (src == bufsz)            goto Exit;          if (buf[src] != '\015')            break;        }        if (buf[src++] == '\012')          goto Complete;        decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;      /* fallthru */      case CHUNKED_IN_TRAILERS_LINE_MIDDLE:        for (;; ++src) {          if (src == bufsz)            goto Exit;          if (buf[src] == '\012')            break;        }        ++src;        decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;        break;      default:        assert(!"decoder is corrupt");    }  }Complete:  ret = bufsz - src;Exit:  if (dst != src)    memmove(buf + dst, buf + src, bufsz - src);  *_bufsz = dst;  return ret;}int phr_decode_chunked_is_in_data(struct phr_chunked_decoder* decoder) {  return decoder->_state == CHUNKED_IN_CHUNK_DATA;}#undef CHECK_EOF#undef EXPECT_CHAR#undef ADVANCE_TOKEN
 |