/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1087 by chpe, Tue Oct 16 15:55:38 2012 UTC revision 1189 by ph10, Tue Oct 30 16:34:17 2012 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39  /* This program now supports the testing of both the 8-bit and 16-bit PCRE  /* This program now supports the testing of all of the 8-bit, 16-bit, and
40  libraries in a single program. This is different from the modules such as  32-bit PCRE libraries in a single program. This is different from the modules
41  pcre_compile.c in the library itself, which are compiled separately for each  such as pcre_compile.c in the library itself, which are compiled separately for
42  mode. If both modes are enabled, for example, pcre_compile.c is compiled twice  each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43  (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is  twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44  compiled only once. Therefore, it must not make use of any of the macros from  make use of any of the macros from pcre_internal.h that depend on
45  pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,  COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46  however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls  SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47  only supported library functions. */  supported library functions. */
48    
49  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
50  #include "config.h"  #include "config.h"
# Line 133  here before pcre_internal.h so that the Line 133  here before pcre_internal.h so that the
133  appropriately for an application, not for building PCRE. */  appropriately for an application, not for building PCRE. */
134    
135  #include "pcre.h"  #include "pcre.h"
   
 #if defined SUPPORT_PCRE32 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16  
 /* Configure internal macros to 32 bit mode. */  
 #define COMPILE_PCRE32  
 #endif  
 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE32  
 /* Configure internal macros to 16 bit mode. */  
 #define COMPILE_PCRE16  
 #endif  
 #if defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE32  
 /* Configure internal macros to 16 bit mode. */  
 #define COMPILE_PCRE8  
 #endif  
   
136  #include "pcre_internal.h"  #include "pcre_internal.h"
137    
138  /* The pcre_printint() function, which prints the internal form of a compiled  /* The pcre_printint() function, which prints the internal form of a compiled
# Line 302  argument, the casting might be incorrect Line 288  argument, the casting might be incorrect
288    
289  #define PCRE_JIT_STACK_FREE8(stack) \  #define PCRE_JIT_STACK_FREE8(stack) \
290    pcre_jit_stack_free(stack)    pcre_jit_stack_free(stack)
291    
292    #define pcre8_maketables pcre_maketables
293    
294  #endif /* SUPPORT_PCRE8 */  #endif /* SUPPORT_PCRE8 */
295    
# Line 399  argument, the casting might be incorrect Line 387  argument, the casting might be incorrect
387  #ifdef SUPPORT_PCRE32  #ifdef SUPPORT_PCRE32
388    
389  #define PCHARS32(lv, p, offset, len, f) \  #define PCHARS32(lv, p, offset, len, f) \
390    lv = pchars32((PCRE_SPTR32)(p) + offset, len, f)    lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
391    
392  #define PCHARSV32(p, offset, len, f) \  #define PCHARSV32(p, offset, len, f)                \
393    (void)pchars32((PCRE_SPTR32)(p) + offset, len, f)    (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
394    
395  #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \  #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
396    p = read_capture_name32(p, cn32, re)    p = read_capture_name32(p, cn32, re)
# Line 484  argument, the casting might be incorrect Line 472  argument, the casting might be incorrect
472  #endif /* SUPPORT_PCRE32 */  #endif /* SUPPORT_PCRE32 */
473    
474    
475  /* ----- Both modes are supported; a runtime test is needed, except for  /* ----- More than one mode is supported; a runtime test is needed, except for
476  pcre_config(), and the JIT stack functions, when it doesn't matter which  pcre_config(), and the JIT stack functions, when it doesn't matter which
477  version is called. ----- */  available version is called. ----- */
478    
479  enum {  enum {
480    PCRE8_MODE,    PCRE8_MODE,
# Line 494  enum { Line 482  enum {
482    PCRE32_MODE    PCRE32_MODE
483  };  };
484    
485  #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + defined (SUPPORT_PCRE32)) >= 2  #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
486         defined (SUPPORT_PCRE32)) >= 2
487    
488  #define CHAR_SIZE (1 << pcre_mode)  #define CHAR_SIZE (1 << pcre_mode)
489    
490    /* There doesn't seem to be an easy way of writing these macros that can cope
491    with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
492    cases separately. */
493    
494    /* ----- All three modes supported ----- */
495    
496    #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
497    
498  #define PCHARS(lv, p, offset, len, f) \  #define PCHARS(lv, p, offset, len, f) \
499    if (pcre_mode == PCRE32_MODE) \    if (pcre_mode == PCRE32_MODE) \
500      PCHARS32(lv, p, offset, len, f); \      PCHARS32(lv, p, offset, len, f); \
# Line 696  enum { Line 693  enum {
693    else \    else \
694      PCRE_STUDY8(extra, re, options, error)      PCRE_STUDY8(extra, re, options, error)
695    
696    
697    /* ----- Two out of three modes are supported ----- */
698    
699    #else
700    
701    /* We can use some macro trickery to make a single set of definitions work in
702    the three different cases. */
703    
704    /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
705    
706    #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
707    #define BITONE 32
708    #define BITTWO 16
709    
710    /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
711    
712    #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
713    #define BITONE 32
714    #define BITTWO 8
715    
716    /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
717    
718    #else
719    #define BITONE 16
720    #define BITTWO 8
721    #endif
722    
723    #define glue(a,b) a##b
724    #define G(a,b) glue(a,b)
725    
726    
727    /* ----- Common macros for two-mode cases ----- */
728    
729    #define PCHARS(lv, p, offset, len, f) \
730      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
731        G(PCHARS,BITONE)(lv, p, offset, len, f); \
732      else \
733        G(PCHARS,BITTWO)(lv, p, offset, len, f)
734    
735    #define PCHARSV(p, offset, len, f) \
736      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
737        G(PCHARSV,BITONE)(p, offset, len, f); \
738      else \
739        G(PCHARSV,BITTWO)(p, offset, len, f)
740    
741    #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
742      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
743        G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
744      else \
745        G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
746    
747    #define SET_PCRE_CALLOUT(callout) \
748      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
749        G(SET_PCRE_CALLOUT,BITONE)(callout); \
750      else \
751        G(SET_PCRE_CALLOUT,BITTWO)(callout)
752    
753    #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
754      G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
755    
756    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
757      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
758        G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
759      else \
760        G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
761    
762    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
763      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
764        G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
765      else \
766        G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
767    
768    #define PCRE_CONFIG G(G(pcre,BITONE),_config)
769    
770    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
771        namesptr, cbuffer, size) \
772      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
773        G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
774          namesptr, cbuffer, size); \
775      else \
776        G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
777          namesptr, cbuffer, size)
778    
779    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
780      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
781        G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
782      else \
783        G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
784    
785    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
786        offsets, size_offsets, workspace, size_workspace) \
787      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
788        G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
789          offsets, size_offsets, workspace, size_workspace); \
790      else \
791        G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
792          offsets, size_offsets, workspace, size_workspace)
793    
794    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
795        offsets, size_offsets) \
796      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
797        G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
798          offsets, size_offsets); \
799      else \
800        G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
801          offsets, size_offsets)
802    
803    #define PCRE_FREE_STUDY(extra) \
804      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
805        G(PCRE_FREE_STUDY,BITONE)(extra); \
806      else \
807        G(PCRE_FREE_STUDY,BITTWO)(extra)
808    
809    #define PCRE_FREE_SUBSTRING(substring) \
810      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
811        G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
812      else \
813        G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
814    
815    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
816      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817        G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
818      else \
819        G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
820    
821    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
822        getnamesptr, subsptr) \
823      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
824        G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
825          getnamesptr, subsptr); \
826      else \
827        G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
828          getnamesptr, subsptr)
829    
830    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
831      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
832        G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
833      else \
834        G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
835    
836    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
837      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
838        G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
839      else \
840        G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
841    
842    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
843      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
844        G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
845      else \
846        G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
847    
848    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
849      (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
850         G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
851        : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
852    
853    #define PCRE_JIT_STACK_FREE(stack) \
854      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
855        G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
856      else \
857        G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
858    
859    #define PCRE_MAKETABLES \
860      (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
861        G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
862    
863    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
864      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
865        G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
866      else \
867        G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
868    
869    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
870      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
871        G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
872      else \
873        G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
874    
875    #define PCRE_STUDY(extra, re, options, error) \
876      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
877        G(PCRE_STUDY,BITONE)(extra, re, options, error); \
878      else \
879        G(PCRE_STUDY,BITTWO)(extra, re, options, error)
880    
881    #endif  /* Two out of three modes */
882    
883    /* ----- End of cases where more than one mode is supported ----- */
884    
885    
886  /* ----- Only 8-bit mode is supported ----- */  /* ----- Only 8-bit mode is supported ----- */
887    
888  #elif defined SUPPORT_PCRE8  #elif defined SUPPORT_PCRE8
# Line 831  static const unsigned char *last_callout Line 1018  static const unsigned char *last_callout
1018    
1019  static int buffer_size = 50000;  static int buffer_size = 50000;
1020  static pcre_uint8 *buffer = NULL;  static pcre_uint8 *buffer = NULL;
 static pcre_uint8 *dbuffer = NULL;  
1021  static pcre_uint8 *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
1022    
1023  /* Another buffer is needed translation to 16/32-bit character strings. It will  /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
 obtained and extended as required. */  
   
 #if defined SUPPORT_PCRE8 && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)  
   
 /* We need the table of operator lengths that is used for 16/32-bit compiling, in  
 order to swap bytes in a pattern for saving/reloading testing. Luckily, the  
 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted  
 appropriately for the 16/32-bit world. Just as a safety check, make sure that  
 COMPILE_PCRE[16|32] is *not* set. */  
1024    
1025  #ifdef COMPILE_PCRE16  #ifdef COMPILE_PCRE16
1026  #error COMPILE_PCRE16 must not be set when compiling pcretest.c  #error COMPILE_PCRE16 must not be set when compiling pcretest.c
# Line 853  COMPILE_PCRE[16|32] is *not* set. */ Line 1030  COMPILE_PCRE[16|32] is *not* set. */
1030  #error COMPILE_PCRE32 must not be set when compiling pcretest.c  #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1031  #endif  #endif
1032    
1033    /* We need buffers for building 16/32-bit strings, and the tables of operator
1034    lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1035    pattern for saving/reloading testing. Luckily, the data for these tables is
1036    defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1037    are used in the tables) are adjusted appropriately for the 16/32-bit world.
1038    LINK_SIZE is also used later in this program. */
1039    
1040    #ifdef SUPPORT_PCRE16
1041    #undef IMM2_SIZE
1042    #define IMM2_SIZE 1
1043    
1044  #if LINK_SIZE == 2  #if LINK_SIZE == 2
1045  #undef LINK_SIZE  #undef LINK_SIZE
1046  #define LINK_SIZE 1  #define LINK_SIZE 1
# Line 863  COMPILE_PCRE[16|32] is *not* set. */ Line 1051  COMPILE_PCRE[16|32] is *not* set. */
1051  #error LINK_SIZE must be either 2, 3, or 4  #error LINK_SIZE must be either 2, 3, or 4
1052  #endif  #endif
1053    
 #undef IMM2_SIZE  
 #define IMM2_SIZE 1  
   
 #endif /* SUPPORT_PCRE8 && (SUPPORT_PCRE16 || SUPPORT_PCRE32) */  
   
 #ifdef SUPPORT_PCRE16  
1054  static int buffer16_size = 0;  static int buffer16_size = 0;
1055  static pcre_uint16 *buffer16 = NULL;  static pcre_uint16 *buffer16 = NULL;
1056  static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };  static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1057  #endif  /* SUPPORT_PCRE16 */  #endif  /* SUPPORT_PCRE16 */
1058    
1059  #ifdef SUPPORT_PCRE32  #ifdef SUPPORT_PCRE32
1060    #undef IMM2_SIZE
1061    #define IMM2_SIZE 1
1062    #undef LINK_SIZE
1063    #define LINK_SIZE 1
1064    
1065  static int buffer32_size = 0;  static int buffer32_size = 0;
1066  static pcre_uint32 *buffer32 = NULL;  static pcre_uint32 *buffer32 = NULL;
1067  static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };  static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1068  #endif  /* SUPPORT_PCRE32 */  #endif  /* SUPPORT_PCRE32 */
1069    
1070  /* If we have 8-bit support, default to it; if there is also  /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1071  16-or 32-bit support, it can be changed by an option. If there is no 8-bit support,  support, it can be changed by an option. If there is no 8-bit support, there
1072  there must be 16-or 32-bit support, so default it to 1. */  must be 16-or 32-bit support, so default it to 1. */
1073    
1074  #if defined SUPPORT_PCRE8  #if defined SUPPORT_PCRE8
1075  static int pcre_mode = PCRE8_MODE;  static int pcre_mode = PCRE8_MODE;
# Line 942  static const char *errtexts[] = { Line 1129  static const char *errtexts[] = {
1129    "JIT stack limit reached",    "JIT stack limit reached",
1130    "pattern compiled in wrong mode: 8-bit/16-bit error",    "pattern compiled in wrong mode: 8-bit/16-bit error",
1131    "pattern compiled with other endianness",    "pattern compiled with other endianness",
1132    "invalid data in workspace for DFA restart"    "invalid data in workspace for DFA restart",
1133      "bad JIT option",
1134      "bad length"
1135  };  };
1136    
1137    
# Line 1299  return sys_errlist[n]; Line 1488  return sys_errlist[n];
1488  *       Print newline configuration              *  *       Print newline configuration              *
1489  *************************************************/  *************************************************/
1490    
1491  /*  /*
1492  Arguments:  Arguments:
1493    rc         the return code from PCRE_CONFIG_NEWLINE    rc         the return code from PCRE_CONFIG_NEWLINE
1494    isc        TRUE if called from "-C newline"    isc        TRUE if called from "-C newline"
1495  Returns:     nothing  Returns:     nothing
1496  */  */
1497    
# Line 1318  switch(rc) Line 1507  switch(rc)
1507    case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;    case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1508    case -1: s = "ANY"; break;    case -1: s = "ANY"; break;
1509    case -2: s = "ANYCRLF"; break;    case -2: s = "ANYCRLF"; break;
1510    
1511    default:    default:
1512    printf("a non-standard value: 0x%04x\n", rc);    printf("a non-standard value: 0x%04x\n", rc);
1513    return;    return;
1514    }    }
1515    
1516  printf("%s\n", s);  printf("%s\n", s);
1517  }  }
# Line 1388  for (j = 0; j < i; j++) Line 1577  for (j = 0; j < i; j++)
1577  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
1578    
1579  for (j = 0; j < utf8_table1_size; j++)  for (j = 0; j < utf8_table1_size; j++)
1580    if (d <= utf8_table1[j]) break;    if (d <= (pcre_uint32)utf8_table1[j]) break;
1581  if (j != i) return -(i+1);  if (j != i) return -(i+1);
1582    
1583  /* Valid value */  /* Valid value */
# Line 1400  return i+1; Line 1589  return i+1;
1589    
1590    
1591    
1592  #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32  #if defined SUPPORT_PCRE8 && !defined NOUTF
1593  /*************************************************  /*************************************************
1594  *       Convert character value to UTF-8         *  *       Convert character value to UTF-8         *
1595  *************************************************/  *************************************************/
# Line 1422  register int i, j; Line 1611  register int i, j;
1611  if (cvalue > 0x7fffffffu)  if (cvalue > 0x7fffffffu)
1612    return -1;    return -1;
1613  for (i = 0; i < utf8_table1_size; i++)  for (i = 0; i < utf8_table1_size; i++)
1614    if (cvalue <= utf8_table1[i]) break;    if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1615  utf8bytes += i;  utf8bytes += i;
1616  for (j = i; j > 0; j--)  for (j = i; j > 0; j--)
1617   {   {
# Line 1590  else Line 1779  else
1779  *pp = 0;  *pp = 0;
1780  return pp - buffer32;  return pp - buffer32;
1781  }  }
1782    
1783    /* Check that a 32-bit character string is valid UTF-32.
1784    
1785    Arguments:
1786      string       points to the string
1787      length       length of string, or -1 if the string is zero-terminated
1788    
1789    Returns:       TRUE  if the string is a valid UTF-32 string
1790                   FALSE otherwise
1791    */
1792    
1793    #ifdef SUPPORT_UTF
1794    static BOOL
1795    valid_utf32(pcre_uint32 *string, int length)
1796    {
1797    register pcre_uint32 *p;
1798    register pcre_uint32 c;
1799    
1800    for (p = string; length-- > 0; p++)
1801      {
1802      c = *p;
1803    
1804      if (c > 0x10ffffu)
1805        return FALSE;
1806    
1807      /* A surrogate */
1808      if ((c & 0xfffff800u) == 0xd800u)
1809        return FALSE;
1810    
1811      /* Non-character */
1812      if ((c & 0xfffeu) == 0xfffeu || (c >= 0xfdd0u && c <= 0xfdefu))
1813        return FALSE;
1814      }
1815    
1816    return TRUE;
1817    }
1818    #endif /* SUPPORT_UTF */
1819    
1820  #endif  #endif
1821    
1822  /*************************************************  /*************************************************
# Line 1666  for (;;) Line 1893  for (;;)
1893      {      {
1894      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1895      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
     pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);  
1896      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1897    
1898      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_pbuffer == NULL)
1899        {        {
1900        fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);        fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1901        exit(1);        exit(1);
# Line 1684  for (;;) Line 1910  for (;;)
1910      here = new_buffer + (here - buffer);      here = new_buffer + (here - buffer);
1911    
1912      free(buffer);      free(buffer);
     free(dbuffer);  
1913      free(pbuffer);      free(pbuffer);
1914    
1915      buffer = new_buffer;      buffer = new_buffer;
     dbuffer = new_dbuffer;  
1916      pbuffer = new_pbuffer;      pbuffer = new_pbuffer;
1917      }      }
1918    }    }
# Line 1733  return(result); Line 1957  return(result);
1957    
1958  static int pchar(pcre_uint32 c, FILE *f)  static int pchar(pcre_uint32 c, FILE *f)
1959  {  {
1960  int n;  int n = 0;
1961  if (PRINTOK(c))  if (PRINTOK(c))
1962    {    {
1963    if (f != NULL) fprintf(f, "%c", c);    if (f != NULL) fprintf(f, "%c", c);
# Line 1878  return yield; Line 2102  return yield;
2102  /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.  /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2103  If handed a NULL file, just counts chars without printing. */  If handed a NULL file, just counts chars without printing. */
2104    
2105  static int pchars32(PCRE_SPTR32 p, int length, FILE *f)  #define UTF32_MASK (0x1fffffu)
2106    
2107    static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2108  {  {
2109  int yield = 0;  int yield = 0;
2110    
# Line 1888  if (length < 0) Line 2114  if (length < 0)
2114  while (length-- > 0)  while (length-- > 0)
2115    {    {
2116    pcre_uint32 c = *p++;    pcre_uint32 c = *p++;
2117      if (utf) c &= UTF32_MASK;
2118    yield += pchar(c, f);    yield += pchar(c, f);
2119    }    }
2120    
# Line 2417  real_pcre32 *re = (real_pcre32 *)ere; Line 2644  real_pcre32 *re = (real_pcre32 *)ere;
2644  int op;  int op;
2645  pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;  pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2646  int length = re->name_count * re->name_entry_size;  int length = re->name_count * re->name_entry_size;
 #ifdef SUPPORT_UTF  
 BOOL utf = (re->options & PCRE_UTF32) != 0;  
 #endif /* SUPPORT_UTF */  
2647    
2648  /* Always flip the bytes in the main data block and study blocks. */  /* Always flip the bytes in the main data block and study blocks. */
2649    
# Line 2443  if (extra != NULL) Line 2667  if (extra != NULL)
2667    rsd->minlength = swap_uint32(rsd->minlength);    rsd->minlength = swap_uint32(rsd->minlength);
2668    }    }
2669    
2670  /* In 32-bit mode we must swap bytes  /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2671  in the name table, if present, and then in the pattern itself. */  the pattern itself. */
2672    
2673  while(TRUE)  while(TRUE)
2674    {    {
# Line 2718  int done = 0; Line 2942  int done = 0;
2942  int all_use_dfa = 0;  int all_use_dfa = 0;
2943  int verify_jit = 0;  int verify_jit = 0;
2944  int yield = 0;  int yield = 0;
2945    #ifdef SUPPORT_PCRE32
2946    int mask_utf32 = 0;
2947    #endif
2948  int stack_size;  int stack_size;
2949    pcre_uint8 *dbuffer = NULL;
2950    size_t dbuffer_size = 1u << 14;
2951    
2952  #if !defined NOPOSIX  #if !defined NOPOSIX
2953  int posix = 0;  int posix = 0;
# Line 2758  pcre_uint8 *gn8ptr; Line 2987  pcre_uint8 *gn8ptr;
2987  #endif  #endif
2988    
2989  /* Get buffers from malloc() so that valgrind will check their misuse when  /* Get buffers from malloc() so that valgrind will check their misuse when
2990  debugging. They grow automatically when very long lines are read. The 16-  debugging. They grow automatically when very long lines are read. The 16-
2991  and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */  and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2992    
2993  buffer = (pcre_uint8 *)malloc(buffer_size);  buffer = (pcre_uint8 *)malloc(buffer_size);
 dbuffer = (pcre_uint8 *)malloc(buffer_size);  
2994  pbuffer = (pcre_uint8 *)malloc(buffer_size);  pbuffer = (pcre_uint8 *)malloc(buffer_size);
2995    
2996  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
# Line 2810  while (argc > 1 && argv[op][0] == '-') Line 3038  while (argc > 1 && argv[op][0] == '-')
3038        force_study_options = jit_study_bits[*arg - '1'];        force_study_options = jit_study_bits[*arg - '1'];
3039      else goto BAD_ARG;      else goto BAD_ARG;
3040      }      }
3041      else if (strcmp(arg, "-8") == 0)
3042        {
3043    #ifdef SUPPORT_PCRE8
3044        pcre_mode = PCRE8_MODE;
3045    #else
3046        printf("** This version of PCRE was built without 8-bit support\n");
3047        exit(1);
3048    #endif
3049        }
3050    else if (strcmp(arg, "-16") == 0)    else if (strcmp(arg, "-16") == 0)
3051      {      {
3052  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
# Line 2819  while (argc > 1 && argv[op][0] == '-') Line 3056  while (argc > 1 && argv[op][0] == '-')
3056      exit(1);      exit(1);
3057  #endif  #endif
3058      }      }
3059    else if (strcmp(arg, "-32") == 0)    else if (strcmp(arg, "-32") == 0 || strcmp(arg, "-32+") == 0)
3060      {      {
3061  #ifdef SUPPORT_PCRE32  #ifdef SUPPORT_PCRE32
3062      pcre_mode = PCRE32_MODE;      pcre_mode = PCRE32_MODE;
3063        mask_utf32 = (strcmp(arg, "-32+") == 0);
3064  #else  #else
3065      printf("** This version of PCRE was built without 32-bit support\n");      printf("** This version of PCRE was built without 32-bit support\n");
3066      exit(1);      exit(1);
# Line 2959  while (argc > 1 && argv[op][0] == '-') Line 3197  while (argc > 1 && argv[op][0] == '-')
3197        else if (strcmp(argv[op + 1], "newline") == 0)        else if (strcmp(argv[op + 1], "newline") == 0)
3198          {          {
3199          (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);          (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3200          print_newline_config(rc, TRUE);          print_newline_config(rc, TRUE);
3201          }          }
3202        else if (strcmp(argv[op + 1], "ebcdic") == 0)        else if (strcmp(argv[op + 1], "ebcdic") == 0)
3203          {          {
3204  #ifdef EBCDIC  #ifdef EBCDIC
3205          printf("1\n");          printf("1\n");
3206          yield = 1;          yield = 1;
3207  #else  #else
3208          printf("0\n");          printf("0\n");
3209  #endif  #endif
3210          }          }
3211        else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)        else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3212          {          {
3213  #ifdef EBCDIC  #ifdef EBCDIC
3214          printf("0x%02x\n", CHAR_LF);          printf("0x%02x\n", CHAR_LF);
3215  #else  #else
3216          printf("0\n");          printf("0\n");
3217  #endif  #endif
3218          }          }
3219        else        else
3220          {          {
3221          printf("Unknown -C option: %s\n", argv[op + 1]);          printf("Unknown -C option: %s\n", argv[op + 1]);
3222          }          }
3223        goto EXIT;        goto EXIT;
3224        }        }
3225    
3226      /* No argument for -C: output all configuration information. */      /* No argument for -C: output all configuration information. */
3227    
3228      printf("PCRE version %s\n", version);      printf("PCRE version %s\n", version);
3229      printf("Compiled with\n");      printf("Compiled with\n");
3230    
3231  #ifdef EBCDIC  #ifdef EBCDIC
3232      printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);      printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3233  #endif  #endif
3234    
3235  /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both  /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3236  are set, either both UTFs are supported or both are not supported. */  are set, either both UTFs are supported or both are not supported. */
# Line 3025  are set, either both UTFs are supported Line 3263  are set, either both UTFs are supported
3263      else      else
3264        printf("  No just-in-time compiler support\n");        printf("  No just-in-time compiler support\n");
3265      (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3266      print_newline_config(rc, FALSE);      print_newline_config(rc, FALSE);
3267      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3268      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3269                                       "all Unicode newlines");                                       "all Unicode newlines");
# Line 3279  while (!done) Line 3517  while (!done)
3517        PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);        PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3518        if (rc == PCRE_ERROR_BADMODE)        if (rc == PCRE_ERROR_BADMODE)
3519          {          {
3520            pcre_uint16 flags_in_host_byte_order;
3521            if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
3522              flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
3523            else
3524              flags_in_host_byte_order = swap_uint16(REAL_PCRE_FLAGS(re));
3525          /* Simulate the result of the function call below. */          /* Simulate the result of the function call below. */
3526          fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,          fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3527            pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",            pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3528            PCRE_INFO_OPTIONS);            PCRE_INFO_OPTIONS);
3529          fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "          fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3530            "%d-bit mode\n", 8 * CHAR_SIZE,            "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
           8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));  
3531          new_free(re);          new_free(re);
3532          fclose(f);          fclose(f);
3533          continue;          continue;
# Line 3672  while (!done) Line 3914  while (!done)
3914    
3915        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3916        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3917          real_pcre_size = 0;
3918  #ifdef SUPPORT_PCRE8  #ifdef SUPPORT_PCRE8
3919        if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)        if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3920          real_pcre_size = sizeof(real_pcre);          real_pcre_size = sizeof(real_pcre);
# Line 3766  while (!done) Line 4009  while (!done)
4009        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
4010            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
4011            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
4012            new_info(re, NULL, PCRE_INFO_FIRSTLITERAL, &first_char) +            new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
4013            new_info(re, NULL, PCRE_INFO_FIRSTLITERALSET, &first_char_set) +            new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
4014            new_info(re, NULL, PCRE_INFO_LASTLITERAL2, &need_char) +            new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
4015            new_info(re, NULL, PCRE_INFO_LASTLITERAL2SET, &need_char_set) +            new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
4016            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
4017            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
4018            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
# Line 4060  while (!done) Line 4303  while (!done)
4303    
4304    for (;;)    for (;;)
4305      {      {
4306      pcre_uint8 *q;  #ifdef SUPPORT_PCRE8
4307        pcre_uint8 *q8;
4308    #endif
4309    #ifdef SUPPORT_PCRE16
4310        pcre_uint16 *q16;
4311    #endif
4312    #ifdef SUPPORT_PCRE32
4313        pcre_uint32 *q32;
4314    #endif
4315      pcre_uint8 *bptr;      pcre_uint8 *bptr;
4316      int *use_offsets = offsets;      int *use_offsets = offsets;
4317      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
# Line 4132  while (!done) Line 4383  while (!done)
4383      p = buffer;      p = buffer;
4384      while (isspace(*p)) p++;      while (isspace(*p)) p++;
4385    
4386      bptr = q = dbuffer;  #ifndef NOUTF
4387        /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4388           invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4389        if (use_utf)
4390          {
4391          pcre_uint8 *q;
4392          pcre_uint32 cc;
4393          int n = 1;
4394    
4395          for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
4396          if (n <= 0)
4397            {
4398            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4399            goto NEXT_DATA;
4400            }
4401          }
4402    #endif
4403    
4404    #ifdef SUPPORT_VALGRIND
4405        /* Mark the dbuffer as addressable but undefined again. */
4406        if (dbuffer != NULL)
4407          {
4408          VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size * CHAR_SIZE);
4409          }
4410    #endif
4411    
4412        /* Allocate a buffer to hold the data line. len+1 is an upper bound on
4413           the number of pcre_uchar units that will be needed. */
4414        if (dbuffer == NULL || (size_t)len >= dbuffer_size)
4415          {
4416          dbuffer_size *= 2;
4417          dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4418          if (dbuffer == NULL)
4419            {
4420            fprintf(stderr, "pcretest: malloc(%d) failed\n", (int)dbuffer_size);
4421            exit(1);
4422            }
4423          }
4424    
4425    #ifdef SUPPORT_PCRE8
4426        q8 = (pcre_uint8 *) dbuffer;
4427    #endif
4428    #ifdef SUPPORT_PCRE16
4429        q16 = (pcre_uint16 *) dbuffer;
4430    #endif
4431    #ifdef SUPPORT_PCRE32
4432        q32 = (pcre_uint32 *) dbuffer;
4433    #endif
4434    
4435      while ((c = *p++) != 0)      while ((c = *p++) != 0)
4436        {        {
4437        int i = 0;        int i = 0;
# Line 4145  while (!done) Line 4444  while (!done)
4444    
4445        if (c != '\\')        if (c != '\\')
4446          {          {
4447          if (use_utf)  #ifndef NOUTF
4448            {          if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4449            *q++ = c;  #endif
           continue;  
           }  
4450          }          }
4451    
4452        /* Handle backslash escapes */        /* Handle backslash escapes */
# Line 4210  while (!done) Line 4507  while (!done)
4507            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4508            p++;            p++;
4509            }            }
4510          if (use_utf)  #if !defined NOUTF && defined SUPPORT_PCRE8
4511            if (use_utf && (pcre_mode == PCRE8_MODE))
4512            {            {
4513            *q++ = c;            *q8++ = c;
4514            continue;            continue;
4515            }            }
4516    #endif
4517          break;          break;
4518    
4519          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 4420  while (!done) Line 4719  while (!done)
4719          continue;          continue;
4720          }          }
4721    
4722        /* We now have a character value in c that may be greater than 255. In        /* We now have a character value in c that may be greater than 255.
4723        16-bit or 32-bit mode, we always convert characters to UTF-8 so that        In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4724        values greater than 255 can be passed to non-UTF 16- or 32-bit strings.        than 127 in UTF mode must have come from \x{...} or octal constructs
       In 8-bit       mode we convert to UTF-8 if we are in UTF mode. Values greater  
       than 127       in UTF mode must have come from \x{...} or octal constructs  
4725        because values from \x.. get this far only in non-UTF mode. */        because values from \x.. get this far only in non-UTF mode. */
4726    
4727  #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32  #ifdef SUPPORT_PCRE8
4728        if (pcre_mode != PCRE8_MODE || use_utf)        if (pcre_mode == PCRE8_MODE)
4729          {          {
4730          pcre_uint8 buff8[8];  #ifndef NOUTF
4731          int ii, utn;          if (use_utf)
4732          utn = ord2utf8(c, buff8);            {
4733          for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];            q8 += ord2utf8(c, q8);
4734              }
4735            else
4736    #endif
4737              {
4738              if (c > 0xffu)
4739                {
4740                fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4741                  "and UTF-8 mode is not enabled.\n", c);
4742                fprintf(outfile, "** Truncation will probably give the wrong "
4743                  "result.\n");
4744                }
4745              *q8++ = c;
4746              }
4747          }          }
       else  
4748  #endif  #endif
4749    #ifdef SUPPORT_PCRE16
4750          if (pcre_mode == PCRE16_MODE)
4751          {          {
4752          if (c > 255)  #ifndef NOUTF
4753            if (use_utf)
4754              {
4755              if (c > 0x10ffffu)
4756                {
4757                fprintf(outfile, "** Failed: character \\x{%x} is greater than "
4758                  "0x10ffff and so cannot be converted to UTF-16\n", c);
4759                goto NEXT_DATA;
4760                }
4761              else if (c >= 0x10000u)
4762                {
4763                c-= 0x10000u;
4764                *q16++ = 0xD800 | (c >> 10);
4765                *q16++ = 0xDC00 | (c & 0x3ff);
4766                }
4767              else
4768                *q16++ = c;
4769              }
4770            else
4771    #endif
4772            {            {
4773            fprintf(outfile, "** Character \\x{%x} is greater than 255 "            if (c > 0xffffu)
4774              "and UTF-8 mode is not enabled.\n", c);              {
4775            fprintf(outfile, "** Truncation will probably give the wrong "              fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
4776              "result.\n");                "and UTF-16 mode is not enabled.\n", c);
4777                fprintf(outfile, "** Truncation will probably give the wrong "
4778                  "result.\n");
4779                }
4780    
4781              *q16++ = c;
4782            }            }
         *q++ = c;  
4783          }          }
4784    #endif
4785    #ifdef SUPPORT_PCRE32
4786          if (pcre_mode == PCRE32_MODE)
4787            {
4788            *q32++ = c;
4789            }
4790    #endif
4791    
4792        }        }
4793    
4794      /* Reached end of subject string */      /* Reached end of subject string */
4795    
4796      *q = 0;  #ifdef SUPPORT_PCRE8
4797      len = (int)(q - dbuffer);      if (pcre_mode == PCRE8_MODE)
4798        {
4799          *q8 = 0;
4800          len = (int)(q8 - (pcre_uint8 *)dbuffer);
4801        }
4802    #endif
4803    #ifdef SUPPORT_PCRE16
4804        if (pcre_mode == PCRE16_MODE)
4805        {
4806          *q16 = 0;
4807          len = (int)(q16 - (pcre_uint16 *)dbuffer);
4808        }
4809    #endif
4810    #ifdef SUPPORT_PCRE32
4811        if (pcre_mode == PCRE32_MODE)
4812        {
4813          *q32 = 0;
4814          len = (int)(q32 - (pcre_uint32 *)dbuffer);
4815        }
4816    #endif
4817    
4818    #if defined SUPPORT_UTF && defined SUPPORT_PCRE32
4819        /* If we're requsted to test UTF-32 masking of high bits, change the data
4820        string to have high bits set, unless the string is invalid UTF-32.
4821        Since the JIT doesn't support this yet, only do it when not JITing. */
4822        if (use_utf && mask_utf32 && (study_options & PCRE_STUDY_ALLJIT) == 0 &&
4823            valid_utf32((pcre_uint32 *)dbuffer, len))
4824          {
4825          for (q32 = (pcre_uint32 *)dbuffer; *q32; q32++)
4826            *q32 |= ~(pcre_uint32)UTF32_MASK;
4827    
4828      /* Move the data to the end of the buffer so that a read over the end of        /* Need to pass NO_UTF32_CHECK so the high bits are allowed */
4829      the buffer will be seen by valgrind, even if it doesn't cause a crash. If        options |= PCRE_NO_UTF32_CHECK;
4830      we are using the POSIX interface, we must include the terminating zero. */        }
4831    #endif
4832    
4833        /* If we're compiling with explicit valgrind support, Mark the data from after
4834        its end to the end of the buffer as unaddressable, so that a read over the end
4835        of the buffer will be seen by valgrind, even if it doesn't cause a crash.
4836        If we're not building with valgrind support, at least move the data to the end
4837        of the buffer so that it might at least cause a crash.
4838        If we are using the POSIX interface, we must include the terminating zero. */
4839    
4840        bptr = dbuffer;
4841    
4842  #if !defined NOPOSIX  #if !defined NOPOSIX
4843      if (posix || do_posix)      if (posix || do_posix)
4844        {        {
4845        memmove(bptr + buffer_size - len - 1, bptr, len + 1);  #ifdef SUPPORT_VALGRIND
4846        bptr += buffer_size - len - 1;        VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + 1, dbuffer_size - (len + 1));
4847    #else
4848          memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
4849          bptr += dbuffer_size - len - 1;
4850    #endif
4851        }        }
4852      else      else
4853  #endif  #endif
4854        {        {
4855        memmove(bptr + buffer_size - len, bptr, len);  #ifdef SUPPORT_VALGRIND
4856        bptr += buffer_size - len;        VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len * CHAR_SIZE, (dbuffer_size - len) * CHAR_SIZE);
4857    #else
4858          bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
4859    #endif
4860        }        }
4861    
4862      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
# Line 4532  while (!done) Line 4920  while (!done)
4920    
4921      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
4922    
 #ifdef SUPPORT_PCRE16  
     if (pcre_mode == PCRE16_MODE)  
       {  
       len = to16(TRUE, bptr, REAL_PCRE_OPTIONS(re) & PCRE_UTF8, len);  
       switch(len)  
         {  
         case -1:  
         fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "  
           "converted to UTF-16\n");  
         goto NEXT_DATA;  
   
         case -2:  
         fprintf(outfile, "**Failed: character value greater than 0x10ffff "  
           "cannot be converted to UTF-16\n");  
         goto NEXT_DATA;  
   
         case -3:  
         fprintf(outfile, "**Failed: character value greater than 0xffff "  
           "cannot be converted to 16-bit in non-UTF mode\n");  
         goto NEXT_DATA;  
   
         default:  
         break;  
         }  
       bptr = (pcre_uint8 *)buffer16;  
       }  
 #endif  
   
 #ifdef SUPPORT_PCRE32  
     if (pcre_mode == PCRE32_MODE)  
       {  
       len = to32(TRUE, bptr, REAL_PCRE_OPTIONS(re) & PCRE_UTF32, len);  
       switch(len)  
         {  
         case -1:  
         fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "  
           "converted to UTF-32\n");  
         goto NEXT_DATA;  
   
         case -2:  
         fprintf(outfile, "**Failed: character value greater than 0x10ffff "  
           "cannot be converted to UTF-32\n");  
         goto NEXT_DATA;  
   
         case -3:  
         fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");  
         goto NEXT_DATA;  
   
         default:  
         break;  
         }  
       bptr = (pcre_uint8 *)buffer32;  
       }  
 #endif  
   
4923      /* Ensure that there is a JIT callback if we want to verify that JIT was      /* Ensure that there is a JIT callback if we want to verify that JIT was
4924      actually used. If jit_stack == NULL, no stack has yet been assigned. */      actually used. If jit_stack == NULL, no stack has yet been assigned. */
4925    
# Line 4808  while (!done) Line 5141  while (!done)
5141            int rc;            int rc;
5142            char copybuffer[256];            char copybuffer[256];
5143    
5144    #ifdef SUPPORT_PCRE32
5145              if (pcre_mode == PCRE32_MODE)
5146                {
5147                if (*(pcre_uint32 *)cnptr == 0) break;
5148                }
5149    #endif
5150    #ifdef SUPPORT_PCRE16
5151            if (pcre_mode == PCRE16_MODE)            if (pcre_mode == PCRE16_MODE)
5152              {              {
5153              if (*(pcre_uint16 *)cnptr == 0) break;              if (*(pcre_uint16 *)cnptr == 0) break;
5154              }              }
5155            else  #endif
5156    #ifdef SUPPORT_PCRE8
5157              if (pcre_mode == PCRE8_MODE)
5158              {              {
5159              if (*(pcre_uint8 *)cnptr == 0) break;              if (*(pcre_uint8 *)cnptr == 0) break;
5160              }              }
5161    #endif
5162    
5163            PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,            PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5164              cnptr, copybuffer, sizeof(copybuffer));              cnptr, copybuffer, sizeof(copybuffer));
# Line 4863  while (!done) Line 5206  while (!done)
5206            int rc;            int rc;
5207            const char *substring;            const char *substring;
5208    
5209    #ifdef SUPPORT_PCRE32
5210              if (pcre_mode == PCRE32_MODE)
5211                {
5212                if (*(pcre_uint32 *)gnptr == 0) break;
5213                }
5214    #endif
5215    #ifdef SUPPORT_PCRE16
5216            if (pcre_mode == PCRE16_MODE)            if (pcre_mode == PCRE16_MODE)
5217              {              {
5218              if (*(pcre_uint16 *)gnptr == 0) break;              if (*(pcre_uint16 *)gnptr == 0) break;
5219              }              }
5220            else  #endif
5221    #ifdef SUPPORT_PCRE8
5222              if (pcre_mode == PCRE8_MODE)
5223              {              {
5224              if (*(pcre_uint8 *)gnptr == 0) break;              if (*(pcre_uint8 *)gnptr == 0) break;
5225              }              }
5226    #endif
5227    
5228            PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,            PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5229              gnptr, &substring);              gnptr, &substring);

Legend:
Removed from v.1087  
changed lines
  Added in v.1189

  ViewVC Help
Powered by ViewVC 1.1.5