/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 822 by zherczeg, Fri Dec 23 20:37:29 2011 UTC revision 823 by ph10, Sat Dec 24 17:43:22 2011 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    
50  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
51  #include "config.h"  #include "config.h"
# Line 161  Makefile. */ Line 171  Makefile. */
171  #endif  #endif
172    
173  /* It is also possible, originally for the benefit of a version that was  /* It is also possible, originally for the benefit of a version that was
174  imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
175  without the interface to the DFA matcher (NODFA). In fact, we automatically cut  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
176  out the UTF8 support if PCRE is built without it. */  automatically cut out the UTF support if PCRE is built without it. */
177    
178  #ifndef SUPPORT_UTF8  #ifndef SUPPORT_UTF
179  #ifndef NOUTF8  #ifndef NOUTF
180  #define NOUTF8  #define NOUTF
181  #endif  #endif
182  #endif  #endif
183    
# Line 177  only from one place and is handled diffe Line 187  only from one place and is handled diffe
187  using a single macro to do this in a generic way, because of the many different  using a single macro to do this in a generic way, because of the many different
188  argument requirements. We know that at least one of SUPPORT_PCRE8 and  argument requirements. We know that at least one of SUPPORT_PCRE8 and
189  SUPPORT_PCRE16 must be set. First define macros for each individual mode; then  SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
190  use these in the definitions of generic macros. */  use these in the definitions of generic macros.
191    
192    **** Special note about the PCHARSxxx macros: the address of the string to be
193    printed is always given as two arguments: a base address followed by an offset.
194    The base address is cast to the correct data size for 8 or 16 bit data; the
195    offset is in units of this size. If the string were given as base+offset in one
196    argument, the casting might be incorrectly applied. */
197    
198  #ifdef SUPPORT_PCRE8  #ifdef SUPPORT_PCRE8
199    
# Line 605  COMPILE_PCRE16 is *not* set. */ Line 621  COMPILE_PCRE16 is *not* set. */
621  #endif  #endif
622    
623  static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };  static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
   
624  #endif  /* SUPPORT_PCRE16 */  #endif  /* SUPPORT_PCRE16 */
625    
626  /* If we have 8-bit support, default use_pcre16 to false; if there is also  /* If we have 8-bit support, default use_pcre16 to false; if there is also
# Line 631  static const char *errtexts[] = { Line 646  static const char *errtexts[] = {
646    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
647    "match limit exceeded",    "match limit exceeded",
648    "callout error code",    "callout error code",
649    NULL,  /* BADUTF8 is handled specially */    NULL,  /* BADUTF8/16 is handled specially */
650    "bad UTF-8 offset",    NULL,  /* BADUTF8/16 offset is handled specially */
651    NULL,  /* PARTIAL is handled specially */    NULL,  /* PARTIAL is handled specially */
652    "not used - internal error",    "not used - internal error",
653    "internal error - pattern overwritten?",    "internal error - pattern overwritten?",
# Line 646  static const char *errtexts[] = { Line 661  static const char *errtexts[] = {
661    "not used - internal error",    "not used - internal error",
662    "invalid combination of newline options",    "invalid combination of newline options",
663    "bad offset value",    "bad offset value",
664    NULL,  /* SHORTUTF8 is handled specially */    NULL,  /* SHORTUTF8/16 is handled specially */
665    "nested recursion at the same subject position",    "nested recursion at the same subject position",
666    "JIT stack limit reached",    "JIT stack limit reached",
667    "pattern compiled in wrong mode (8-bit/16-bit error)"    "pattern compiled in wrong mode (8-bit/16-bit error)"
# Line 1011  return (pcre_jit_stack *)arg; Line 1026  return (pcre_jit_stack *)arg;
1026  }  }
1027    
1028    
1029    #if !defined NOUTF
1030  /*************************************************  /*************************************************
1031  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
1032  *************************************************/  *************************************************/
# Line 1026  Returns:      >  0 => the number of byte Line 1042  Returns:      >  0 => the number of byte
1042                -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
1043  */  */
1044    
 #if !defined NOUTF8  
   
1045  static int  static int
1046  utf82ord(pcre_uint8 *utf8bytes, int *vptr)  utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1047  {  {
# Line 1068  if (j != i) return -(i+1); Line 1082  if (j != i) return -(i+1);
1082  *vptr = d;  *vptr = d;
1083  return i+1;  return i+1;
1084  }  }
1085    #endif  /* NOUTF */
 #endif  
1086    
1087    
1088    
1089    #if !defined NOUTF
1090  /*************************************************  /*************************************************
1091  *       Convert character value to UTF-8         *  *       Convert character value to UTF-8         *
1092  *************************************************/  *************************************************/
# Line 1087  Arguments: Line 1101  Arguments:
1101  Returns:     number of characters placed in the buffer  Returns:     number of characters placed in the buffer
1102  */  */
1103    
 #if !defined NOUTF8  
   
1104  static int  static int
1105  ord2utf8(int cvalue, pcre_uint8 *utf8bytes)  ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1106  {  {
# Line 1104  for (j = i; j > 0; j--) Line 1116  for (j = i; j > 0; j--)
1116  *utf8bytes = utf8_table2[i] | cvalue;  *utf8bytes = utf8_table2[i] | cvalue;
1117  return i + 1;  return i + 1;
1118  }  }
   
1119  #endif  #endif
1120    
1121    
# Line 1120  double, because up to 0xffff uses no mor Line 1131  double, because up to 0xffff uses no mor
1131  in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The  in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1132  result is always left in buffer16.  result is always left in buffer16.
1133    
1134    Note that this function does not object to surrogate values. This is
1135    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1136    for the purpose of testing that they are correctly faulted.
1137    
1138  Arguments:  Arguments:
1139    p          points to a byte string    p          points to a byte string
1140    utf        true if UTF-8 (to be converted to UTF-16)    utf        true if UTF-8 (to be converted to UTF-16)
# Line 1127  Arguments: Line 1142  Arguments:
1142    
1143  Returns:     number of 16-bit data items used (excluding trailing zero)  Returns:     number of 16-bit data items used (excluding trailing zero)
1144               OR -1 if a UTF-8 string is malformed               OR -1 if a UTF-8 string is malformed
1145                 OR -2 if a value > 0x10ffff is encountered
1146  */  */
1147    
1148  static int  static int
# Line 1160  else Line 1176  else
1176      {      {
1177      int chlen = utf82ord(p, &c);      int chlen = utf82ord(p, &c);
1178      if (chlen <= 0) return -1;      if (chlen <= 0) return -1;
1179        if (c > 0x10ffff) return -2;
1180      p += chlen;      p += chlen;
1181      len -= chlen;      len -= chlen;
1182      if (c < 0x10000) *pp++ = c; else      if (c < 0x10000) *pp++ = c; else
# Line 1365  if (length < 0) Line 1382  if (length < 0)
1382    
1383  while (length-- > 0)  while (length-- > 0)
1384    {    {
1385  #if !defined NOUTF8  #if !defined NOUTF
1386    if (use_utf)    if (use_utf)
1387      {      {
1388      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
# Line 1399  int len = 0; Line 1416  int len = 0;
1416  while (*p++ != 0) len++;  while (*p++ != 0) len++;
1417  return len;  return len;
1418  }  }
1419    #endif  /* SUPPORT_PCRE16 */
1420    
1421    
1422    #ifdef SUPPORT_PCRE16
1423  /*************************************************  /*************************************************
1424  *           Print 16-bit character string        *  *           Print 16-bit character string        *
1425  *************************************************/  *************************************************/
# Line 1419  if (length < 0) Line 1437  if (length < 0)
1437  while (length-- > 0)  while (length-- > 0)
1438    {    {
1439    int c = *p++ & 0xffff;    int c = *p++ & 0xffff;
1440  #if !defined NOUTF8  #if !defined NOUTF
1441    if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)    if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1442      {      {
1443      int d = *p & 0xffff;      int d = *p & 0xffff;
# Line 1436  while (length-- > 0) Line 1454  while (length-- > 0)
1454    
1455  return yield;  return yield;
1456  }  }
1457  #endif  #endif  /* SUPPORT_PCRE16 */
1458    
1459    
1460    
# Line 1462  if (pcre_get_stringnumber(re, (char *)(* Line 1480  if (pcre_get_stringnumber(re, (char *)(*
1480  *pp = npp;  *pp = npp;
1481  return p;  return p;
1482  }  }
1483  #endif  #endif  /* SUPPORT_PCRE8 */
1484    
1485    
1486    
# Line 1489  if (pcre16_get_stringnumber(re, (PCRE_SP Line 1507  if (pcre16_get_stringnumber(re, (PCRE_SP
1507  *pp = npp;  *pp = npp;
1508  return p;  return p;
1509  }  }
1510  #endif  #endif  /* SUPPORT_PCRE16 */
1511    
1512    
1513    
# Line 1680  if (rc < 0) fprintf(outfile, "Error %d f Line 1698  if (rc < 0) fprintf(outfile, "Error %d f
1698  *             Swap byte functions                *  *             Swap byte functions                *
1699  *************************************************/  *************************************************/
1700    
1701  /* The following functions swap the bytes of a pcre_uint16  /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1702  and pcre_uint32 value.  value, respectively.
1703    
1704  Arguments:  Arguments:
1705    value        any number    value        any number
# Line 1721  static void Line 1739  static void
1739  regexflip(pcre *ere, pcre_extra *extra)  regexflip(pcre *ere, pcre_extra *extra)
1740  {  {
1741  real_pcre *re = (real_pcre *)ere;  real_pcre *re = (real_pcre *)ere;
 int op;  
   
1742  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
1743    int op;
1744  pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;  pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1745  int length = re->name_count * re->name_entry_size;  int length = re->name_count * re->name_entry_size;
1746  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
# Line 2128  _setmode( _fileno( stdout ), _O_BINARY ) Line 2145  _setmode( _fileno( stdout ), _O_BINARY )
2145  #endif  #endif
2146    
2147  /* Get the version number: both pcre_version() and pcre16_version() give the  /* Get the version number: both pcre_version() and pcre16_version() give the
2148  same answer. We just need to ensure that we call one that is availab.e */  same answer. We just need to ensure that we call one that is available. */
2149    
2150  #ifdef SUPPORT_PCRE8  #ifdef SUPPORT_PCRE8
2151  version = pcre_version();  version = pcre_version();
# Line 2706  while (!done) Line 2723  while (!done)
2723  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
2724      if (use_pcre16)      if (use_pcre16)
2725        {        {
2726        if (to16(p, options & PCRE_UTF8, (int)strlen((char *)p)) < 0)        switch(to16(p, options & PCRE_UTF8, (int)strlen((char *)p)))
2727          {          {
2728            case -1:
2729          fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "          fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2730            "converted to UTF-16\n");            "converted to UTF-16\n");
2731          goto SKIP_DATA;          goto SKIP_DATA;
2732    
2733            case -2:
2734            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2735              "cannot be converted to UTF-16\n");
2736            goto SKIP_DATA;
2737    
2738            default:
2739            break;
2740          }          }
2741        p = (pcre_uint8 *)buffer16;        p = (pcre_uint8 *)buffer16;
2742        }        }
# Line 3231  while (!done) Line 3257  while (!done)
3257          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3258            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
3259    
3260  #if !defined NOUTF8  #if !defined NOUTF
3261          if (use_utf && c > 255)          if (use_utf && c > 255)
3262            {            {
3263            pcre_uint8 buff8[8];            pcre_uint8 buff8[8];
# Line 3247  while (!done) Line 3273  while (!done)
3273    
3274          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
3275    
3276  #if !defined NOUTF8  #if !defined NOUTF
3277          if (*p == '{')          if (*p == '{')
3278            {            {
3279            pcre_uint8 *pt = p;            pcre_uint8 *pt = p;
# Line 3593  while (!done) Line 3619  while (!done)
3619      if (use_pcre16)      if (use_pcre16)
3620        {        {
3621        len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);        len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3622        if (len < 0)        switch(len)
3623          {          {
3624            case -1:
3625          fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "          fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3626            "converted to UTF-16\n");            "converted to UTF-16\n");
3627          goto NEXT_DATA;          goto NEXT_DATA;
3628    
3629            case -2:
3630            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3631              "cannot be converted to UTF-16\n");
3632            goto NEXT_DATA;
3633    
3634            default:
3635            break;
3636          }          }
3637        bptr = (pcre_uint8 *)buffer16;        bptr = (pcre_uint8 *)buffer16;
3638        }        }
# Line 4021  while (!done) Line 4056  while (!done)
4056    
4057              case PCRE_ERROR_BADUTF8:              case PCRE_ERROR_BADUTF8:
4058              case PCRE_ERROR_SHORTUTF8:              case PCRE_ERROR_SHORTUTF8:
4059              fprintf(outfile, "Error %d (%s UTF-8 string)", count,              fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4060                (count == PCRE_ERROR_BADUTF8)? "bad" : "short");                (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4061                  use_pcre16? "16" : "8");
4062              if (use_size_offsets >= 2)              if (use_size_offsets >= 2)
4063                fprintf(outfile, " offset=%d reason=%d", use_offsets[0],                fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4064                  use_offsets[1]);                  use_offsets[1]);
4065              fprintf(outfile, "\n");              fprintf(outfile, "\n");
4066              break;              break;
4067    
4068                case PCRE_ERROR_BADUTF8_OFFSET:
4069                fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4070                  use_pcre16? "16" : "8");
4071                break;
4072    
4073              default:              default:
4074              if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))              if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))

Legend:
Removed from v.822  
changed lines
  Added in v.823

  ViewVC Help
Powered by ViewVC 1.1.5