pb_common.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. /* pb_common.c: Common support functions for pb_encode.c and pb_decode.c.
  2. *
  3. * 2014 Petteri Aimonen <jpa@kapsi.fi>
  4. */
  5. #include "pb_common.h"
  6. static bool load_descriptor_values(pb_field_iter_t *iter)
  7. {
  8. uint32_t word0;
  9. uint32_t data_offset;
  10. int_least8_t size_offset;
  11. if (iter->index >= iter->descriptor->field_count)
  12. return false;
  13. word0 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index]);
  14. iter->type = (pb_type_t)((word0 >> 8) & 0xFF);
  15. switch(word0 & 3)
  16. {
  17. case 0: {
  18. /* 1-word format */
  19. iter->array_size = 1;
  20. iter->tag = (pb_size_t)((word0 >> 2) & 0x3F);
  21. size_offset = (int_least8_t)((word0 >> 24) & 0x0F);
  22. data_offset = (word0 >> 16) & 0xFF;
  23. iter->data_size = (pb_size_t)((word0 >> 28) & 0x0F);
  24. break;
  25. }
  26. case 1: {
  27. /* 2-word format */
  28. uint32_t word1 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 1]);
  29. iter->array_size = (pb_size_t)((word0 >> 16) & 0x0FFF);
  30. iter->tag = (pb_size_t)(((word0 >> 2) & 0x3F) | ((word1 >> 28) << 6));
  31. size_offset = (int_least8_t)((word0 >> 28) & 0x0F);
  32. data_offset = word1 & 0xFFFF;
  33. iter->data_size = (pb_size_t)((word1 >> 16) & 0x0FFF);
  34. break;
  35. }
  36. case 2: {
  37. /* 4-word format */
  38. uint32_t word1 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 1]);
  39. uint32_t word2 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 2]);
  40. uint32_t word3 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 3]);
  41. iter->array_size = (pb_size_t)(word0 >> 16);
  42. iter->tag = (pb_size_t)(((word0 >> 2) & 0x3F) | ((word1 >> 8) << 6));
  43. size_offset = (int_least8_t)(word1 & 0xFF);
  44. data_offset = word2;
  45. iter->data_size = (pb_size_t)word3;
  46. break;
  47. }
  48. default: {
  49. /* 8-word format */
  50. uint32_t word1 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 1]);
  51. uint32_t word2 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 2]);
  52. uint32_t word3 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 3]);
  53. uint32_t word4 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 4]);
  54. iter->array_size = (pb_size_t)word4;
  55. iter->tag = (pb_size_t)(((word0 >> 2) & 0x3F) | ((word1 >> 8) << 6));
  56. size_offset = (int_least8_t)(word1 & 0xFF);
  57. data_offset = word2;
  58. iter->data_size = (pb_size_t)word3;
  59. break;
  60. }
  61. }
  62. if (!iter->message)
  63. {
  64. /* Avoid doing arithmetic on null pointers, it is undefined */
  65. iter->pField = NULL;
  66. iter->pSize = NULL;
  67. }
  68. else
  69. {
  70. iter->pField = (char*)iter->message + data_offset;
  71. if (size_offset)
  72. {
  73. iter->pSize = (char*)iter->pField - size_offset;
  74. }
  75. else if (PB_HTYPE(iter->type) == PB_HTYPE_REPEATED &&
  76. (PB_ATYPE(iter->type) == PB_ATYPE_STATIC ||
  77. PB_ATYPE(iter->type) == PB_ATYPE_POINTER))
  78. {
  79. /* Fixed count array */
  80. iter->pSize = &iter->array_size;
  81. }
  82. else
  83. {
  84. iter->pSize = NULL;
  85. }
  86. if (PB_ATYPE(iter->type) == PB_ATYPE_POINTER && iter->pField != NULL)
  87. {
  88. iter->pData = *(void**)iter->pField;
  89. }
  90. else
  91. {
  92. iter->pData = iter->pField;
  93. }
  94. }
  95. if (PB_LTYPE_IS_SUBMSG(iter->type))
  96. {
  97. iter->submsg_desc = iter->descriptor->submsg_info[iter->submessage_index];
  98. }
  99. else
  100. {
  101. iter->submsg_desc = NULL;
  102. }
  103. return true;
  104. }
  105. static void advance_iterator(pb_field_iter_t *iter)
  106. {
  107. iter->index++;
  108. if (iter->index >= iter->descriptor->field_count)
  109. {
  110. /* Restart */
  111. iter->index = 0;
  112. iter->field_info_index = 0;
  113. iter->submessage_index = 0;
  114. iter->required_field_index = 0;
  115. }
  116. else
  117. {
  118. /* Increment indexes based on previous field type.
  119. * All field info formats have the following fields:
  120. * - lowest 2 bits tell the amount of words in the descriptor (2^n words)
  121. * - bits 2..7 give the lowest bits of tag number.
  122. * - bits 8..15 give the field type.
  123. */
  124. uint32_t prev_descriptor = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index]);
  125. pb_type_t prev_type = (prev_descriptor >> 8) & 0xFF;
  126. pb_size_t descriptor_len = (pb_size_t)(1 << (prev_descriptor & 3));
  127. /* Add to fields.
  128. * The cast to pb_size_t is needed to avoid -Wconversion warning.
  129. * Because the data is is constants from generator, there is no danger of overflow.
  130. */
  131. iter->field_info_index = (pb_size_t)(iter->field_info_index + descriptor_len);
  132. iter->required_field_index = (pb_size_t)(iter->required_field_index + (PB_HTYPE(prev_type) == PB_HTYPE_REQUIRED));
  133. iter->submessage_index = (pb_size_t)(iter->submessage_index + PB_LTYPE_IS_SUBMSG(prev_type));
  134. }
  135. }
  136. bool pb_field_iter_begin(pb_field_iter_t *iter, const pb_msgdesc_t *desc, void *message)
  137. {
  138. memset(iter, 0, sizeof(*iter));
  139. iter->descriptor = desc;
  140. iter->message = message;
  141. return load_descriptor_values(iter);
  142. }
  143. bool pb_field_iter_begin_extension(pb_field_iter_t *iter, pb_extension_t *extension)
  144. {
  145. const pb_msgdesc_t *msg = (const pb_msgdesc_t*)extension->type->arg;
  146. bool status;
  147. uint32_t word0 = PB_PROGMEM_READU32(msg->field_info[0]);
  148. if (PB_ATYPE(word0 >> 8) == PB_ATYPE_POINTER)
  149. {
  150. /* For pointer extensions, the pointer is stored directly
  151. * in the extension structure. This avoids having an extra
  152. * indirection. */
  153. status = pb_field_iter_begin(iter, msg, &extension->dest);
  154. }
  155. else
  156. {
  157. status = pb_field_iter_begin(iter, msg, extension->dest);
  158. }
  159. iter->pSize = &extension->found;
  160. return status;
  161. }
  162. bool pb_field_iter_next(pb_field_iter_t *iter)
  163. {
  164. advance_iterator(iter);
  165. (void)load_descriptor_values(iter);
  166. return iter->index != 0;
  167. }
  168. bool pb_field_iter_find(pb_field_iter_t *iter, uint32_t tag)
  169. {
  170. if (iter->tag == tag)
  171. {
  172. return true; /* Nothing to do, correct field already. */
  173. }
  174. else if (tag > iter->descriptor->largest_tag)
  175. {
  176. return false;
  177. }
  178. else
  179. {
  180. pb_size_t start = iter->index;
  181. uint32_t fieldinfo;
  182. if (tag < iter->tag)
  183. {
  184. /* Fields are in tag number order, so we know that tag is between
  185. * 0 and our start position. Setting index to end forces
  186. * advance_iterator() call below to restart from beginning. */
  187. iter->index = iter->descriptor->field_count;
  188. }
  189. do
  190. {
  191. /* Advance iterator but don't load values yet */
  192. advance_iterator(iter);
  193. /* Do fast check for tag number match */
  194. fieldinfo = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index]);
  195. if (((fieldinfo >> 2) & 0x3F) == (tag & 0x3F))
  196. {
  197. /* Good candidate, check further */
  198. (void)load_descriptor_values(iter);
  199. if (iter->tag == tag &&
  200. PB_LTYPE(iter->type) != PB_LTYPE_EXTENSION)
  201. {
  202. /* Found it */
  203. return true;
  204. }
  205. }
  206. } while (iter->index != start);
  207. /* Searched all the way back to start, and found nothing. */
  208. (void)load_descriptor_values(iter);
  209. return false;
  210. }
  211. }
  212. bool pb_field_iter_find_extension(pb_field_iter_t *iter)
  213. {
  214. if (PB_LTYPE(iter->type) == PB_LTYPE_EXTENSION)
  215. {
  216. return true;
  217. }
  218. else
  219. {
  220. pb_size_t start = iter->index;
  221. uint32_t fieldinfo;
  222. do
  223. {
  224. /* Advance iterator but don't load values yet */
  225. advance_iterator(iter);
  226. /* Do fast check for field type */
  227. fieldinfo = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index]);
  228. if (PB_LTYPE((fieldinfo >> 8) & 0xFF) == PB_LTYPE_EXTENSION)
  229. {
  230. return load_descriptor_values(iter);
  231. }
  232. } while (iter->index != start);
  233. /* Searched all the way back to start, and found nothing. */
  234. (void)load_descriptor_values(iter);
  235. return false;
  236. }
  237. }
  238. static void *pb_const_cast(const void *p)
  239. {
  240. /* Note: this casts away const, in order to use the common field iterator
  241. * logic for both encoding and decoding. The cast is done using union
  242. * to avoid spurious compiler warnings. */
  243. union {
  244. void *p1;
  245. const void *p2;
  246. } t;
  247. t.p2 = p;
  248. return t.p1;
  249. }
  250. bool pb_field_iter_begin_const(pb_field_iter_t *iter, const pb_msgdesc_t *desc, const void *message)
  251. {
  252. return pb_field_iter_begin(iter, desc, pb_const_cast(message));
  253. }
  254. bool pb_field_iter_begin_extension_const(pb_field_iter_t *iter, const pb_extension_t *extension)
  255. {
  256. return pb_field_iter_begin_extension(iter, (pb_extension_t*)pb_const_cast(extension));
  257. }
  258. bool pb_default_field_callback(pb_istream_t *istream, pb_ostream_t *ostream, const pb_field_t *field)
  259. {
  260. if (field->data_size == sizeof(pb_callback_t))
  261. {
  262. pb_callback_t *pCallback = (pb_callback_t*)field->pData;
  263. if (pCallback != NULL)
  264. {
  265. if (istream != NULL && pCallback->funcs.decode != NULL)
  266. {
  267. return pCallback->funcs.decode(istream, field, &pCallback->arg);
  268. }
  269. if (ostream != NULL && pCallback->funcs.encode != NULL)
  270. {
  271. return pCallback->funcs.encode(ostream, field, &pCallback->arg);
  272. }
  273. }
  274. }
  275. return true; /* Success, but didn't do anything */
  276. }
  277. #ifdef PB_VALIDATE_UTF8
  278. /* This function checks whether a string is valid UTF-8 text.
  279. *
  280. * Algorithm is adapted from https://www.cl.cam.ac.uk/~mgk25/ucs/utf8_check.c
  281. * Original copyright: Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/> 2005-03-30
  282. * Licensed under "Short code license", which allows use under MIT license or
  283. * any compatible with it.
  284. */
  285. bool pb_validate_utf8(const char *str)
  286. {
  287. const pb_byte_t *s = (const pb_byte_t*)str;
  288. while (*s)
  289. {
  290. if (*s < 0x80)
  291. {
  292. /* 0xxxxxxx */
  293. s++;
  294. }
  295. else if ((s[0] & 0xe0) == 0xc0)
  296. {
  297. /* 110XXXXx 10xxxxxx */
  298. if ((s[1] & 0xc0) != 0x80 ||
  299. (s[0] & 0xfe) == 0xc0) /* overlong? */
  300. return false;
  301. else
  302. s += 2;
  303. }
  304. else if ((s[0] & 0xf0) == 0xe0)
  305. {
  306. /* 1110XXXX 10Xxxxxx 10xxxxxx */
  307. if ((s[1] & 0xc0) != 0x80 ||
  308. (s[2] & 0xc0) != 0x80 ||
  309. (s[0] == 0xe0 && (s[1] & 0xe0) == 0x80) || /* overlong? */
  310. (s[0] == 0xed && (s[1] & 0xe0) == 0xa0) || /* surrogate? */
  311. (s[0] == 0xef && s[1] == 0xbf &&
  312. (s[2] & 0xfe) == 0xbe)) /* U+FFFE or U+FFFF? */
  313. return false;
  314. else
  315. s += 3;
  316. }
  317. else if ((s[0] & 0xf8) == 0xf0)
  318. {
  319. /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
  320. if ((s[1] & 0xc0) != 0x80 ||
  321. (s[2] & 0xc0) != 0x80 ||
  322. (s[3] & 0xc0) != 0x80 ||
  323. (s[0] == 0xf0 && (s[1] & 0xf0) == 0x80) || /* overlong? */
  324. (s[0] == 0xf4 && s[1] > 0x8f) || s[0] > 0xf4) /* > U+10FFFF? */
  325. return false;
  326. else
  327. s += 4;
  328. }
  329. else
  330. {
  331. return false;
  332. }
  333. }
  334. return true;
  335. }
  336. #endif