match.inl 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. /* Reimplementation of pattern matching */
  2. /* This file is part of the CivetWeb web server.
  3. * See https://github.com/civetweb/civetweb/
  4. */
  5. /* Initialize structure with 0 matches */
  6. static void
  7. match_context_reset(struct mg_match_context *mcx)
  8. {
  9. mcx->num_matches = 0;
  10. memset(mcx->match, 0, sizeof(mcx->match));
  11. }
  12. /* Add a new match to the list of matches */
  13. static void
  14. match_context_push(const char *str, size_t len, struct mg_match_context *mcx)
  15. {
  16. if (mcx->num_matches < MG_MATCH_CONTEXT_MAX_MATCHES) {
  17. mcx->match[mcx->num_matches].str = str;
  18. mcx->match[mcx->num_matches].len = len;
  19. mcx->num_matches++;
  20. }
  21. }
  22. static ptrdiff_t
  23. mg_match_impl(const char *pat,
  24. size_t pat_len,
  25. const char *str,
  26. struct mg_match_context *mcx)
  27. {
  28. /* Parse string */
  29. size_t i_pat = 0; /* Pattern index */
  30. size_t i_str = 0; /* Pattern index */
  31. uint8_t case_sensitive = ((mcx != NULL) ? mcx->case_sensitive : 0);
  32. while (i_pat < pat_len) {
  33. /* Pattern ? matches one character, except / and NULL character */
  34. if ((pat[i_pat] == '?') && (str[i_str] != '\0')
  35. && (str[i_str] != '/')) {
  36. size_t i_str_start = i_str;
  37. do {
  38. /* Advance as long as there are ? */
  39. i_pat++;
  40. i_str++;
  41. } while ((pat[i_pat] == '?') && (str[i_str] != '\0')
  42. && (str[i_str] != '/') && (i_pat < pat_len));
  43. /* If we have a match context, add the substring we just found */
  44. if (mcx) {
  45. match_context_push(str + i_str_start, i_str - i_str_start, mcx);
  46. }
  47. /* Reached end of pattern ? */
  48. if (i_pat == pat_len) {
  49. return (ptrdiff_t)i_str;
  50. }
  51. }
  52. /* Pattern $ matches end of string */
  53. if (pat[i_pat] == '$') {
  54. return (str[i_str] == '\0') ? (ptrdiff_t)i_str : -1;
  55. }
  56. /* Pattern * or ** matches multiple characters */
  57. if (pat[i_pat] == '*') {
  58. size_t len; /* lenght matched by "*" or "**" */
  59. ptrdiff_t ret;
  60. i_pat++;
  61. if ((pat[i_pat] == '*') && (i_pat < pat_len)) {
  62. /* Pattern ** matches all */
  63. i_pat++;
  64. len = strlen(str + i_str);
  65. } else {
  66. /* Pattern * matches all except / character */
  67. len = strcspn(str + i_str, "/");
  68. }
  69. if (i_pat == pat_len) {
  70. /* End of pattern reached. Add all to match context. */
  71. if (mcx) {
  72. match_context_push(str + i_str, len, mcx);
  73. }
  74. return (i_str + len);
  75. }
  76. /* This loop searches for the longest possible match */
  77. do {
  78. ret = mg_match_impl(pat + i_pat,
  79. (pat_len - (size_t)i_pat),
  80. str + i_str + len,
  81. mcx);
  82. } while ((ret == -1) && (len-- > 0));
  83. /* If we have a match context, add the substring we just found */
  84. if (ret >= 0) {
  85. if (mcx) {
  86. match_context_push(str + i_str, len, mcx);
  87. }
  88. return (i_str + ret + len);
  89. }
  90. return -1;
  91. }
  92. /* Single character compare */
  93. if (case_sensitive) {
  94. if (pat[i_pat] != str[i_str]) {
  95. /* case sensitive compare: mismatch */
  96. return -1;
  97. }
  98. } else if (lowercase(&pat[i_pat]) != lowercase(&str[i_str])) {
  99. /* case insensitive compare: mismatch */
  100. return -1;
  101. }
  102. i_pat++;
  103. i_str++;
  104. }
  105. return (ptrdiff_t)i_str;
  106. }
  107. static ptrdiff_t
  108. mg_match_alternatives(const char *pat,
  109. size_t pat_len,
  110. const char *str,
  111. struct mg_match_context *mcx)
  112. {
  113. const char *match_alternative = (const char *)memchr(pat, '|', pat_len);
  114. if (mcx != NULL) {
  115. match_context_reset(mcx);
  116. }
  117. while (match_alternative != NULL) {
  118. /* Split at | for alternative match */
  119. size_t left_size = (size_t)(match_alternative - pat);
  120. /* Try left string first */
  121. ptrdiff_t ret = mg_match_impl(pat, left_size, str, mcx);
  122. if (ret >= 0) {
  123. /* A 0-byte match is also valid */
  124. return ret;
  125. }
  126. /* Reset possible incomplete match data */
  127. if (mcx != NULL) {
  128. match_context_reset(mcx);
  129. }
  130. /* If no match: try right side */
  131. pat += left_size + 1;
  132. pat_len -= left_size + 1;
  133. match_alternative = (const char *)memchr(pat, '|', pat_len);
  134. }
  135. /* Handled all | operators. This is the final string. */
  136. return mg_match_impl(pat, pat_len, str, mcx);
  137. }
  138. static int
  139. match_compare(const void *p1, const void *p2, void *user)
  140. {
  141. const struct mg_match_element *e1 = (const struct mg_match_element *)p1;
  142. const struct mg_match_element *e2 = (const struct mg_match_element *)p2;
  143. /* unused */
  144. (void)user;
  145. if (e1->str > e2->str) {
  146. return +1;
  147. }
  148. if (e1->str < e2->str) {
  149. return -1;
  150. }
  151. return 0;
  152. }
  153. #if defined(MG_EXPERIMENTAL_INTERFACES)
  154. CIVETWEB_API
  155. #else
  156. static
  157. #endif
  158. ptrdiff_t
  159. mg_match(const char *pat, const char *str, struct mg_match_context *mcx)
  160. {
  161. size_t pat_len = strlen(pat);
  162. ptrdiff_t ret = mg_match_alternatives(pat, pat_len, str, mcx);
  163. if (mcx != NULL) {
  164. if (ret < 0) {
  165. /* Remove possible incomplete data */
  166. match_context_reset(mcx);
  167. } else {
  168. /* Join "?*" to one pattern. */
  169. size_t i, j;
  170. /* Use difference of two array elements instead of sizeof, since
  171. * there may be some additional padding bytes. */
  172. size_t elmsize =
  173. (size_t)(&mcx->match[1]) - (size_t)(&mcx->match[0]);
  174. /* First sort the matches by address ("str" begin to end) */
  175. mg_sort(mcx->match, mcx->num_matches, elmsize, match_compare, NULL);
  176. /* Join consecutive matches */
  177. i = 1;
  178. while (i < mcx->num_matches) {
  179. if ((mcx->match[i - 1].str + mcx->match[i - 1].len)
  180. == mcx->match[i].str) {
  181. /* Two matches are consecutive. Join length. */
  182. mcx->match[i - 1].len += mcx->match[i].len;
  183. /* Shift all list elements. */
  184. for (j = i + 1; j < mcx->num_matches; j++) {
  185. mcx->match[j - 1].len = mcx->match[j].len;
  186. mcx->match[j - 1].str = mcx->match[j].str;
  187. }
  188. /* Remove/blank last list element. */
  189. mcx->num_matches--;
  190. mcx->match[mcx->num_matches].str = NULL;
  191. mcx->match[mcx->num_matches].len = 0;
  192. } else {
  193. i++;
  194. }
  195. }
  196. }
  197. }
  198. return ret;
  199. }
  200. static ptrdiff_t
  201. match_prefix(const char *pattern, size_t pattern_len, const char *str)
  202. {
  203. if (pattern == NULL) {
  204. return -1;
  205. }
  206. return mg_match_alternatives(pattern, pattern_len, str, NULL);
  207. }
  208. static ptrdiff_t
  209. match_prefix_strlen(const char *pattern, const char *str)
  210. {
  211. if (pattern == NULL) {
  212. return -1;
  213. }
  214. return mg_match_alternatives(pattern, strlen(pattern), str, NULL);
  215. }
  216. /* End of match.inl */