regextest.cpp 11 KB


  1. ///////////////////////////////////////////////////////////////////////////////
  2. // Name: tests/regex/regex.cpp
  3. // Purpose: Test the built-in regex lib and wxRegEx
  4. // Author: Mike Wetherell
  5. // Copyright: (c) 2004 Mike Wetherell
  6. // Licence: wxWindows licence
  7. ///////////////////////////////////////////////////////////////////////////////
  8. //
  9. // Notes:
  10. //
  11. // To run just one section, say wx_1, do this:
  12. // test regex.wx_1
  13. //
  14. // To run all the regex tests:
  15. // test regex
  16. //
  17. // Some tests must be skipped since they use features which we do not make
  18. // available through wxRegEx. To see the list of tests that have been skipped
  19. // turn on verbose logging, e.g.:
  20. // test --verbose regex
  21. //
  22. // The tests here are for the builtin library, tests for wxRegEx in general
  23. // should go in wxregex.cpp
  24. //
  25. // The tests are generated from Henry Spencer's reg.test, additional test
  26. // can be added in wxreg.test. These test files are then turned into a C++
  27. // include file 'regex.inc' (included below) using a script 'regex.pl'.
  28. //
  29. // For compilers that support precompilation, includes "wx/wx.h".
  30. #include "testprec.h"
  31. #ifdef __BORLANDC__
  32. #pragma hdrstop
  33. #endif
  34. #if wxUSE_REGEX
  35. // for all others, include the necessary headers
  36. #ifndef WX_PRECOMP
  37. #include "wx/wx.h"
  38. #endif
  39. // many of the tests are specific to the builtin regex lib, so only attempts
  40. // to do them when using the builtin regex lib.
  41. //
  42. #ifdef wxHAS_REGEX_ADVANCED
  43. #include "wx/regex.h"
  44. #include <string>
  45. #include <vector>
  46. using CppUnit::Test;
  47. using CppUnit::TestCase;
  48. using CppUnit::TestSuite;
  49. using CppUnit::Exception;
  50. using std::string;
  51. using std::vector;
  52. ///////////////////////////////////////////////////////////////////////////////
  53. // The test case - an instance represents a single test
  54. class RegExTestCase : public TestCase
  55. {
  56. public:
  57. // constructor - create a single testcase
  58. RegExTestCase(
  59. const string& name,
  60. const char *mode,
  61. const char *id,
  62. const char *flags,
  63. const char *pattern,
  64. const char *data,
  65. const vector<const char *>& expected);
  66. protected:
  67. // run this testcase
  68. void runTest();
  69. private:
  70. // workers
  71. wxString Conv(const char *str);
  72. void parseFlags(const wxString& flags);
  73. void doTest(int flavor);
  74. static wxString quote(const wxString& arg);
  75. const wxChar *convError() const { return wxT("<cannot convert>"); }
  76. // assertions - adds some information about the test that failed
  77. void fail(const wxString& msg) const;
  78. void failIf(bool condition, const wxString& msg) const
  79. { if (condition) fail(msg); }
  80. // mode, id, flags, pattern, test data, expected results...
  81. int m_mode;
  82. wxString m_id;
  83. wxString m_flags;
  84. wxString m_pattern;
  85. wxString m_data;
  86. wxArrayString m_expected;
  87. // the flag decoded
  88. int m_compileFlags;
  89. int m_matchFlags;
  90. bool m_basic;
  91. bool m_extended;
  92. bool m_advanced;
  93. };
  94. // constructor - throws Exception on failure
  95. //
  96. RegExTestCase::RegExTestCase(
  97. const string& name,
  98. const char *mode,
  99. const char *id,
  100. const char *flags,
  101. const char *pattern,
  102. const char *data,
  103. const vector<const char *>& expected)
  104. :
  105. TestCase(name),
  106. m_mode(mode[0]),
  107. m_id(Conv(id)),
  108. m_flags(Conv(flags)),
  109. m_pattern(Conv(pattern)),
  110. m_data(Conv(data)),
  111. m_compileFlags(0),
  112. m_matchFlags(0),
  113. m_basic(false),
  114. m_extended(false),
  115. m_advanced(false)
  116. {
  117. bool badconv = m_pattern == convError() || m_data == convError();
  118. //RN: Removing the std:: here will break MSVC6 compilation
  119. std::vector<const char *>::const_iterator it;
  120. for (it = expected.begin(); it != expected.end(); ++it) {
  121. m_expected.push_back(Conv(*it));
  122. badconv = badconv || *m_expected.rbegin() == convError();
  123. }
  124. failIf(badconv, wxT("cannot convert to default character encoding"));
  125. // the flags need further parsing...
  126. parseFlags(m_flags);
  127. #ifndef wxHAS_REGEX_ADVANCED
  128. failIf(!m_basic && !m_extended, wxT("advanced regexs not available"));
  129. #endif
  130. }
  131. int wxWcscmp(const wchar_t* s1, const wchar_t* s2)
  132. {
  133. size_t nLen1 = wxWcslen(s1);
  134. size_t nLen2 = wxWcslen(s2);
  135. if (nLen1 != nLen2)
  136. return nLen1 - nLen2;
  137. return memcmp(s1, s2, nLen1*sizeof(wchar_t));
  138. }
  139. // convert a string from UTF8 to the internal encoding
  140. //
  141. wxString RegExTestCase::Conv(const char *str)
  142. {
  143. const wxWCharBuffer wstr = wxConvUTF8.cMB2WC(str);
  144. const wxWC2WXbuf buf = wxConvCurrent->cWC2WX(wstr);
  145. if (!buf || wxWcscmp(wxConvCurrent->cWX2WC(buf), wstr) != 0)
  146. return convError();
  147. return buf;
  148. }
  149. // Parse flags
  150. //
  151. void RegExTestCase::parseFlags(const wxString& flags)
  152. {
  153. for ( wxString::const_iterator p = flags.begin(); p != flags.end(); ++p )
  154. {
  155. switch ( (*p).GetValue() ) {
  156. // noop
  157. case '-': break;
  158. // we don't fully support these flags, but they don't stop us
  159. // checking for success of failure of the match, so treat as noop
  160. case 'A': case 'B': case 'E': case 'H':
  161. case 'I': case 'L': case 'M': case 'N':
  162. case 'P': case 'Q': case 'R': case 'S':
  163. case 'T': case 'U': case '%':
  164. break;
  165. // match options
  166. case '^': m_matchFlags |= wxRE_NOTBOL; break;
  167. case '$': m_matchFlags |= wxRE_NOTEOL; break;
  168. #if wxUSE_UNICODE
  169. case '*': break;
  170. #endif
  171. // compile options
  172. case '&': m_advanced = m_basic = true; break;
  173. case 'b': m_basic = true; break;
  174. case 'e': m_extended = true; break;
  175. case 'i': m_compileFlags |= wxRE_ICASE; break;
  176. case 'o': m_compileFlags |= wxRE_NOSUB; break;
  177. case 'n': m_compileFlags |= wxRE_NEWLINE; break;
  178. case 't': if (strchr("ep", m_mode)) break; // else fall through...
  179. // anything else we must skip the test
  180. default:
  181. fail(wxString::Format(
  182. wxT("requires unsupported flag '%c'"), *p));
  183. }
  184. }
  185. }
  186. // Try test for all flavours of expression specified
  187. //
  188. void RegExTestCase::runTest()
  189. {
  190. if (m_basic)
  191. doTest(wxRE_BASIC);
  192. if (m_extended)
  193. doTest(wxRE_EXTENDED);
  194. #ifdef wxHAS_REGEX_ADVANCED
  195. if (m_advanced || (!m_basic && !m_extended))
  196. doTest(wxRE_ADVANCED);
  197. #endif
  198. }
  199. // Try the test for a single flavour of expression
  200. //
  201. void RegExTestCase::doTest(int flavor)
  202. {
  203. wxRegEx re(m_pattern, m_compileFlags | flavor);
  204. // 'e' - test that the pattern fails to compile
  205. if (m_mode == 'e') {
  206. failIf(re.IsValid(), wxT("compile succeeded (should fail)"));
  207. return;
  208. }
  209. failIf(!re.IsValid(), wxT("compile failed"));
  210. bool matches = re.Matches(m_data, m_matchFlags);
  211. // 'f' or 'p' - test that the pattern does not match
  212. if (m_mode == 'f' || m_mode == 'p') {
  213. failIf(matches, wxT("match succeeded (should fail)"));
  214. return;
  215. }
  216. // otherwise 'm' or 'i' - test the pattern does match
  217. failIf(!matches, wxT("match failed"));
  218. if (m_compileFlags & wxRE_NOSUB)
  219. return;
  220. // check wxRegEx has correctly counted the number of subexpressions
  221. wxString msg;
  222. msg << wxT("GetMatchCount() == ") << re.GetMatchCount()
  223. << wxT(", expected ") << m_expected.size();
  224. failIf(m_expected.size() != re.GetMatchCount(), msg);
  225. for (size_t i = 0; i < m_expected.size(); i++) {
  226. wxString result;
  227. size_t start, len;
  228. msg.clear();
  229. msg << wxT("wxRegEx::GetMatch failed for match ") << i;
  230. failIf(!re.GetMatch(&start, &len, i), msg);
  231. // m - check the match returns the strings given
  232. if (m_mode == 'm')
  233. {
  234. if (start < INT_MAX)
  235. result = m_data.substr(start, len);
  236. else
  237. result = wxT("");
  238. }
  239. // i - check the match returns the offsets given
  240. else if (m_mode == 'i')
  241. {
  242. if (start > INT_MAX)
  243. result = wxT("-1 -1");
  244. else if (start + len > 0)
  245. result << start << wxT(" ") << start + len - 1;
  246. else
  247. result << start << wxT(" -1");
  248. }
  249. msg.clear();
  250. msg << wxT("match(") << i << wxT(") == ") << quote(result)
  251. << wxT(", expected == ") << quote(m_expected[i]);
  252. failIf(result != m_expected[i], msg);
  253. }
  254. }
  255. // assertion - adds some information about the test that failed
  256. //
  257. void RegExTestCase::fail(const wxString& msg) const
  258. {
  259. wxString str;
  260. wxArrayString::const_iterator it;
  261. str << (wxChar)m_mode << wxT(" ") << m_id << wxT(" ") << m_flags << wxT(" ")
  262. << quote(m_pattern) << wxT(" ") << quote(m_data);
  263. for (it = m_expected.begin(); it != m_expected.end(); ++it)
  264. str << wxT(" ") << quote(*it);
  265. if (str.length() > 77)
  266. str = str.substr(0, 74) + wxT("...");
  267. str << wxT("\n ") << msg;
  268. // no lossy convs so using utf8
  269. CPPUNIT_FAIL(string(str.mb_str(wxConvUTF8)));
  270. }
  271. // quote a string so that it can be displayed (static)
  272. //
  273. wxString RegExTestCase::quote(const wxString& arg)
  274. {
  275. const wxChar *needEscape = wxT("\a\b\t\n\v\f\r\"\\");
  276. const wxChar *escapes = wxT("abtnvfr\"\\");
  277. wxString str;
  278. for (size_t i = 0; i < arg.length(); i++) {
  279. wxChar ch = (wxChar)arg[i];
  280. const wxChar *p = wxStrchr(needEscape, ch);
  281. if (p)
  282. str += wxString::Format(wxT("\\%c"), escapes[p - needEscape]);
  283. else if (wxIscntrl(ch))
  284. str += wxString::Format(wxT("\\%03o"), ch);
  285. else
  286. str += (wxChar)ch;
  287. }
  288. return str.length() == arg.length() && str.find(' ') == wxString::npos ?
  289. str : wxT("\"") + str + wxT("\"");
  290. }
  291. ///////////////////////////////////////////////////////////////////////////////
  292. // Test suite
  293. class RegExTestSuite : public TestSuite
  294. {
  295. public:
  296. RegExTestSuite(string name) : TestSuite(name) { }
  297. void add(const char *mode, const char *id, const char *flags,
  298. const char *pattern, const char *data, const char *expected, ...);
  299. };
  300. // Add a testcase to the suite
  301. //
  302. void RegExTestSuite::add(
  303. const char *mode,
  304. const char *id,
  305. const char *flags,
  306. const char *pattern,
  307. const char *data,
  308. const char *expected, ...)
  309. {
  310. string name = getName() + "." + id;
  311. vector<const char *> expected_results;
  312. va_list ap;
  313. for (va_start(ap, expected); expected; expected = va_arg(ap, const char *))
  314. expected_results.push_back(expected);
  315. va_end(ap);
  316. try {
  317. addTest(new RegExTestCase(
  318. name, mode, id, flags, pattern, data, expected_results));
  319. }
  320. catch (Exception& e) {
  321. wxLogInfo(wxString::Format(wxT("skipping: %s\n %s\n"),
  322. wxString(name.c_str(), wxConvUTF8).c_str(),
  323. wxString(e.what(), wxConvUTF8).c_str()));
  324. }
  325. }
  326. // Include the generated tests
  327. //
  328. #include "regex.inc"
  329. #endif // wxHAS_REGEX_ADVANCED
  330. #endif // wxUSE_REGEX