/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1087 by chpe, Tue Oct 16 15:55:38 2012 UTC revision 1202 by ph10, Sun Nov 4 16:13:29 2012 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39  /* This program now supports the testing of both the 8-bit and 16-bit PCRE  /* This program now supports the testing of all of the 8-bit, 16-bit, and
40  libraries in a single program. This is different from the modules such as  32-bit PCRE libraries in a single program. This is different from the modules
41  pcre_compile.c in the library itself, which are compiled separately for each  such as pcre_compile.c in the library itself, which are compiled separately for
42  mode. If both modes are enabled, for example, pcre_compile.c is compiled twice  each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43  (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is  twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44  compiled only once. Therefore, it must not make use of any of the macros from  make use of any of the macros from pcre_internal.h that depend on
45  pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,  COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46  however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls  SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47  only supported library functions. */  supported library functions. */
48    
49  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
50  #include "config.h"  #include "config.h"
# Line 133  here before pcre_internal.h so that the Line 133  here before pcre_internal.h so that the
133  appropriately for an application, not for building PCRE. */  appropriately for an application, not for building PCRE. */
134    
135  #include "pcre.h"  #include "pcre.h"
   
 #if defined SUPPORT_PCRE32 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16  
 /* Configure internal macros to 32 bit mode. */  
 #define COMPILE_PCRE32  
 #endif  
 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE32  
 /* Configure internal macros to 16 bit mode. */  
 #define COMPILE_PCRE16  
 #endif  
 #if defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE32  
 /* Configure internal macros to 16 bit mode. */  
 #define COMPILE_PCRE8  
 #endif  
   
136  #include "pcre_internal.h"  #include "pcre_internal.h"
137    
138  /* The pcre_printint() function, which prints the internal form of a compiled  /* The pcre_printint() function, which prints the internal form of a compiled
# Line 302  argument, the casting might be incorrect Line 288  argument, the casting might be incorrect
288    
289  #define PCRE_JIT_STACK_FREE8(stack) \  #define PCRE_JIT_STACK_FREE8(stack) \
290    pcre_jit_stack_free(stack)    pcre_jit_stack_free(stack)
291    
292    #define pcre8_maketables pcre_maketables
293    
294  #endif /* SUPPORT_PCRE8 */  #endif /* SUPPORT_PCRE8 */
295    
# Line 399  argument, the casting might be incorrect Line 387  argument, the casting might be incorrect
387  #ifdef SUPPORT_PCRE32  #ifdef SUPPORT_PCRE32
388    
389  #define PCHARS32(lv, p, offset, len, f) \  #define PCHARS32(lv, p, offset, len, f) \
390    lv = pchars32((PCRE_SPTR32)(p) + offset, len, f)    lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
391    
392  #define PCHARSV32(p, offset, len, f) \  #define PCHARSV32(p, offset, len, f)                \
393    (void)pchars32((PCRE_SPTR32)(p) + offset, len, f)    (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
394    
395  #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \  #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
396    p = read_capture_name32(p, cn32, re)    p = read_capture_name32(p, cn32, re)
# Line 484  argument, the casting might be incorrect Line 472  argument, the casting might be incorrect
472  #endif /* SUPPORT_PCRE32 */  #endif /* SUPPORT_PCRE32 */
473    
474    
475  /* ----- Both modes are supported; a runtime test is needed, except for  /* ----- More than one mode is supported; a runtime test is needed, except for
476  pcre_config(), and the JIT stack functions, when it doesn't matter which  pcre_config(), and the JIT stack functions, when it doesn't matter which
477  version is called. ----- */  available version is called. ----- */
478    
479  enum {  enum {
480    PCRE8_MODE,    PCRE8_MODE,
# Line 494  enum { Line 482  enum {
482    PCRE32_MODE    PCRE32_MODE
483  };  };
484    
485  #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + defined (SUPPORT_PCRE32)) >= 2  #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
486         defined (SUPPORT_PCRE32)) >= 2
487    
488  #define CHAR_SIZE (1 << pcre_mode)  #define CHAR_SIZE (1 << pcre_mode)
489    
490    /* There doesn't seem to be an easy way of writing these macros that can cope
491    with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
492    cases separately. */
493    
494    /* ----- All three modes supported ----- */
495    
496    #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
497    
498  #define PCHARS(lv, p, offset, len, f) \  #define PCHARS(lv, p, offset, len, f) \
499    if (pcre_mode == PCRE32_MODE) \    if (pcre_mode == PCRE32_MODE) \
500      PCHARS32(lv, p, offset, len, f); \      PCHARS32(lv, p, offset, len, f); \
# Line 696  enum { Line 693  enum {
693    else \    else \
694      PCRE_STUDY8(extra, re, options, error)      PCRE_STUDY8(extra, re, options, error)
695    
696    
697    /* ----- Two out of three modes are supported ----- */
698    
699    #else
700    
701    /* We can use some macro trickery to make a single set of definitions work in
702    the three different cases. */
703    
704    /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
705    
706    #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
707    #define BITONE 32
708    #define BITTWO 16
709    
710    /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
711    
712    #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
713    #define BITONE 32
714    #define BITTWO 8
715    
716    /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
717    
718    #else
719    #define BITONE 16
720    #define BITTWO 8
721    #endif
722    
723    #define glue(a,b) a##b
724    #define G(a,b) glue(a,b)
725    
726    
727    /* ----- Common macros for two-mode cases ----- */
728    
729    #define PCHARS(lv, p, offset, len, f) \
730      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
731        G(PCHARS,BITONE)(lv, p, offset, len, f); \
732      else \
733        G(PCHARS,BITTWO)(lv, p, offset, len, f)
734    
735    #define PCHARSV(p, offset, len, f) \
736      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
737        G(PCHARSV,BITONE)(p, offset, len, f); \
738      else \
739        G(PCHARSV,BITTWO)(p, offset, len, f)
740    
741    #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
742      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
743        G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
744      else \
745        G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
746    
747    #define SET_PCRE_CALLOUT(callout) \
748      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
749        G(SET_PCRE_CALLOUT,BITONE)(callout); \
750      else \
751        G(SET_PCRE_CALLOUT,BITTWO)(callout)
752    
753    #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
754      G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
755    
756    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
757      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
758        G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
759      else \
760        G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
761    
762    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
763      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
764        G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
765      else \
766        G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
767    
768    #define PCRE_CONFIG G(G(pcre,BITONE),_config)
769    
770    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
771        namesptr, cbuffer, size) \
772      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
773        G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
774          namesptr, cbuffer, size); \
775      else \
776        G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
777          namesptr, cbuffer, size)
778    
779    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
780      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
781        G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
782      else \
783        G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
784    
785    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
786        offsets, size_offsets, workspace, size_workspace) \
787      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
788        G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
789          offsets, size_offsets, workspace, size_workspace); \
790      else \
791        G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
792          offsets, size_offsets, workspace, size_workspace)
793    
794    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
795        offsets, size_offsets) \
796      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
797        G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
798          offsets, size_offsets); \
799      else \
800        G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
801          offsets, size_offsets)
802    
803    #define PCRE_FREE_STUDY(extra) \
804      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
805        G(PCRE_FREE_STUDY,BITONE)(extra); \
806      else \
807        G(PCRE_FREE_STUDY,BITTWO)(extra)
808    
809    #define PCRE_FREE_SUBSTRING(substring) \
810      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
811        G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
812      else \
813        G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
814    
815    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
816      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817        G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
818      else \
819        G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
820    
821    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
822        getnamesptr, subsptr) \
823      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
824        G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
825          getnamesptr, subsptr); \
826      else \
827        G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
828          getnamesptr, subsptr)
829    
830    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
831      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
832        G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
833      else \
834        G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
835    
836    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
837      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
838        G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
839      else \
840        G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
841    
842    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
843      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
844        G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
845      else \
846        G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
847    
848    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
849      (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
850         G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
851        : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
852    
853    #define PCRE_JIT_STACK_FREE(stack) \
854      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
855        G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
856      else \
857        G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
858    
859    #define PCRE_MAKETABLES \
860      (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
861        G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
862    
863    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
864      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
865        G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
866      else \
867        G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
868    
869    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
870      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
871        G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
872      else \
873        G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
874    
875    #define PCRE_STUDY(extra, re, options, error) \
876      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
877        G(PCRE_STUDY,BITONE)(extra, re, options, error); \
878      else \
879        G(PCRE_STUDY,BITTWO)(extra, re, options, error)
880    
881    #endif  /* Two out of three modes */
882    
883    /* ----- End of cases where more than one mode is supported ----- */
884    
885    
886  /* ----- Only 8-bit mode is supported ----- */  /* ----- Only 8-bit mode is supported ----- */
887    
888  #elif defined SUPPORT_PCRE8  #elif defined SUPPORT_PCRE8
# Line 831  static const unsigned char *last_callout Line 1018  static const unsigned char *last_callout
1018    
1019  static int buffer_size = 50000;  static int buffer_size = 50000;
1020  static pcre_uint8 *buffer = NULL;  static pcre_uint8 *buffer = NULL;
 static pcre_uint8 *dbuffer = NULL;  
1021  static pcre_uint8 *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
1022    
1023  /* Another buffer is needed translation to 16/32-bit character strings. It will  /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
 obtained and extended as required. */  
   
 #if defined SUPPORT_PCRE8 && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)  
   
 /* We need the table of operator lengths that is used for 16/32-bit compiling, in  
 order to swap bytes in a pattern for saving/reloading testing. Luckily, the  
 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted  
 appropriately for the 16/32-bit world. Just as a safety check, make sure that  
 COMPILE_PCRE[16|32] is *not* set. */  
1024    
1025  #ifdef COMPILE_PCRE16  #ifdef COMPILE_PCRE16
1026  #error COMPILE_PCRE16 must not be set when compiling pcretest.c  #error COMPILE_PCRE16 must not be set when compiling pcretest.c
# Line 853  COMPILE_PCRE[16|32] is *not* set. */ Line 1030  COMPILE_PCRE[16|32] is *not* set. */
1030  #error COMPILE_PCRE32 must not be set when compiling pcretest.c  #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1031  #endif  #endif
1032    
1033    /* We need buffers for building 16/32-bit strings, and the tables of operator
1034    lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1035    pattern for saving/reloading testing. Luckily, the data for these tables is
1036    defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1037    are used in the tables) are adjusted appropriately for the 16/32-bit world.
1038    LINK_SIZE is also used later in this program. */
1039    
1040    #ifdef SUPPORT_PCRE16
1041    #undef IMM2_SIZE
1042    #define IMM2_SIZE 1
1043    
1044  #if LINK_SIZE == 2  #if LINK_SIZE == 2
1045  #undef LINK_SIZE  #undef LINK_SIZE
1046  #define LINK_SIZE 1  #define LINK_SIZE 1
# Line 863  COMPILE_PCRE[16|32] is *not* set. */ Line 1051  COMPILE_PCRE[16|32] is *not* set. */
1051  #error LINK_SIZE must be either 2, 3, or 4  #error LINK_SIZE must be either 2, 3, or 4
1052  #endif  #endif
1053    
 #undef IMM2_SIZE  
 #define IMM2_SIZE 1  
   
 #endif /* SUPPORT_PCRE8 && (SUPPORT_PCRE16 || SUPPORT_PCRE32) */  
   
 #ifdef SUPPORT_PCRE16  
1054  static int buffer16_size = 0;  static int buffer16_size = 0;
1055  static pcre_uint16 *buffer16 = NULL;  static pcre_uint16 *buffer16 = NULL;
1056  static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };  static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1057  #endif  /* SUPPORT_PCRE16 */  #endif  /* SUPPORT_PCRE16 */
1058    
1059  #ifdef SUPPORT_PCRE32  #ifdef SUPPORT_PCRE32
1060    #undef IMM2_SIZE
1061    #define IMM2_SIZE 1
1062    #undef LINK_SIZE
1063    #define LINK_SIZE 1
1064    
1065  static int buffer32_size = 0;  static int buffer32_size = 0;
1066  static pcre_uint32 *buffer32 = NULL;  static pcre_uint32 *buffer32 = NULL;
1067  static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };  static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1068  #endif  /* SUPPORT_PCRE32 */  #endif  /* SUPPORT_PCRE32 */
1069    
1070  /* If we have 8-bit support, default to it; if there is also  /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1071  16-or 32-bit support, it can be changed by an option. If there is no 8-bit support,  support, it can be changed by an option. If there is no 8-bit support, there
1072  there must be 16-or 32-bit support, so default it to 1. */  must be 16-or 32-bit support, so default it to 1. */
1073    
1074  #if defined SUPPORT_PCRE8  #if defined SUPPORT_PCRE8
1075  static int pcre_mode = PCRE8_MODE;  static int pcre_mode = PCRE8_MODE;
# Line 942  static const char *errtexts[] = { Line 1129  static const char *errtexts[] = {
1129    "JIT stack limit reached",    "JIT stack limit reached",
1130    "pattern compiled in wrong mode: 8-bit/16-bit error",    "pattern compiled in wrong mode: 8-bit/16-bit error",
1131    "pattern compiled with other endianness",    "pattern compiled with other endianness",
1132    "invalid data in workspace for DFA restart"    "invalid data in workspace for DFA restart",
1133      "bad JIT option",
1134      "bad length"
1135  };  };
1136    
1137    
# Line 1299  return sys_errlist[n]; Line 1488  return sys_errlist[n];
1488  *       Print newline configuration              *  *       Print newline configuration              *
1489  *************************************************/  *************************************************/
1490    
1491  /*  /*
1492  Arguments:  Arguments:
1493    rc         the return code from PCRE_CONFIG_NEWLINE    rc         the return code from PCRE_CONFIG_NEWLINE
1494    isc        TRUE if called from "-C newline"    isc        TRUE if called from "-C newline"
1495  Returns:     nothing  Returns:     nothing
1496  */  */
1497    
# Line 1318  switch(rc) Line 1507  switch(rc)
1507    case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;    case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1508    case -1: s = "ANY"; break;    case -1: s = "ANY"; break;
1509    case -2: s = "ANYCRLF"; break;    case -2: s = "ANYCRLF"; break;
1510    
1511    default:    default:
1512    printf("a non-standard value: 0x%04x\n", rc);    printf("a non-standard value: 0x%04x\n", rc);
1513    return;    return;
1514    }    }
1515    
1516  printf("%s\n", s);  printf("%s\n", s);
1517  }  }
# Line 1388  for (j = 0; j < i; j++) Line 1577  for (j = 0; j < i; j++)
1577  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
1578    
1579  for (j = 0; j < utf8_table1_size; j++)  for (j = 0; j < utf8_table1_size; j++)
1580    if (d <= utf8_table1[j]) break;    if (d <= (pcre_uint32)utf8_table1[j]) break;
1581  if (j != i) return -(i+1);  if (j != i) return -(i+1);
1582    
1583  /* Valid value */  /* Valid value */
# Line 1400  return i+1; Line 1589  return i+1;
1589    
1590    
1591    
1592  #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32  #if defined SUPPORT_PCRE8 && !defined NOUTF
1593  /*************************************************  /*************************************************
1594  *       Convert character value to UTF-8         *  *       Convert character value to UTF-8         *
1595  *************************************************/  *************************************************/
# Line 1422  register int i, j; Line 1611  register int i, j;
1611  if (cvalue > 0x7fffffffu)  if (cvalue > 0x7fffffffu)
1612    return -1;    return -1;
1613  for (i = 0; i < utf8_table1_size; i++)  for (i = 0; i < utf8_table1_size; i++)
1614    if (cvalue <= utf8_table1[i]) break;    if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1615  utf8bytes += i;  utf8bytes += i;
1616  for (j = i; j > 0; j--)  for (j = i; j > 0; j--)
1617   {   {
# Line 1590  else Line 1779  else
1779  *pp = 0;  *pp = 0;
1780  return pp - buffer32;  return pp - buffer32;
1781  }  }
1782    
1783    /* Check that a 32-bit character string is valid UTF-32.
1784    
1785    Arguments:
1786      string       points to the string
1787      length       length of string, or -1 if the string is zero-terminated
1788    
1789    Returns:       TRUE  if the string is a valid UTF-32 string
1790                   FALSE otherwise
1791    */
1792    
1793    #ifdef NEVER
1794    
1795    #ifdef SUPPORT_UTF
1796    static BOOL
1797    valid_utf32(pcre_uint32 *string, int length)
1798    {
1799    register pcre_uint32 *p;
1800    register pcre_uint32 c;
1801    
1802    for (p = string; length-- > 0; p++)
1803      {
1804      c = *p;
1805    
1806      if (c > 0x10ffffu)
1807        return FALSE;
1808    
1809      /* A surrogate */
1810      if ((c & 0xfffff800u) == 0xd800u)
1811        return FALSE;
1812    
1813      /* Non-character */
1814      if ((c & 0xfffeu) == 0xfffeu || (c >= 0xfdd0u && c <= 0xfdefu))
1815        return FALSE;
1816      }
1817    
1818    return TRUE;
1819    }
1820    #endif /* SUPPORT_UTF */
1821    
1822    #endif /* NEVER */
1823    
1824    
1825  #endif  #endif
1826    
1827  /*************************************************  /*************************************************
# Line 1666  for (;;) Line 1898  for (;;)
1898      {      {
1899      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1900      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
     pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);  
1901      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1902    
1903      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_pbuffer == NULL)
1904        {        {
1905        fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);        fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1906        exit(1);        exit(1);
# Line 1684  for (;;) Line 1915  for (;;)
1915      here = new_buffer + (here - buffer);      here = new_buffer + (here - buffer);
1916    
1917      free(buffer);      free(buffer);
     free(dbuffer);  
1918      free(pbuffer);      free(pbuffer);
1919    
1920      buffer = new_buffer;      buffer = new_buffer;
     dbuffer = new_dbuffer;  
1921      pbuffer = new_pbuffer;      pbuffer = new_pbuffer;
1922      }      }
1923    }    }
# Line 1733  return(result); Line 1962  return(result);
1962    
1963  static int pchar(pcre_uint32 c, FILE *f)  static int pchar(pcre_uint32 c, FILE *f)
1964  {  {
1965  int n;  int n = 0;
1966  if (PRINTOK(c))  if (PRINTOK(c))
1967    {    {
1968    if (f != NULL) fprintf(f, "%c", c);    if (f != NULL) fprintf(f, "%c", c);
# Line 1878  return yield; Line 2107  return yield;
2107  /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.  /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2108  If handed a NULL file, just counts chars without printing. */  If handed a NULL file, just counts chars without printing. */
2109    
2110  static int pchars32(PCRE_SPTR32 p, int length, FILE *f)  static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2111  {  {
2112  int yield = 0;  int yield = 0;
2113    
2114    (void)(utf);  /* Avoid compiler warning */
2115    
2116  if (length < 0)  if (length < 0)
2117    length = strlen32(p);    length = strlen32(p);
2118    
# Line 2417  real_pcre32 *re = (real_pcre32 *)ere; Line 2648  real_pcre32 *re = (real_pcre32 *)ere;
2648  int op;  int op;
2649  pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;  pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2650  int length = re->name_count * re->name_entry_size;  int length = re->name_count * re->name_entry_size;
 #ifdef SUPPORT_UTF  
 BOOL utf = (re->options & PCRE_UTF32) != 0;  
 #endif /* SUPPORT_UTF */  
2651    
2652  /* Always flip the bytes in the main data block and study blocks. */  /* Always flip the bytes in the main data block and study blocks. */
2653    
# Line 2443  if (extra != NULL) Line 2671  if (extra != NULL)
2671    rsd->minlength = swap_uint32(rsd->minlength);    rsd->minlength = swap_uint32(rsd->minlength);
2672    }    }
2673    
2674  /* In 32-bit mode we must swap bytes  /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2675  in the name table, if present, and then in the pattern itself. */  the pattern itself. */
2676    
2677  while(TRUE)  while(TRUE)
2678    {    {
# Line 2719  int all_use_dfa = 0; Line 2947  int all_use_dfa = 0;
2947  int verify_jit = 0;  int verify_jit = 0;
2948  int yield = 0;  int yield = 0;
2949  int stack_size;  int stack_size;
2950    pcre_uint8 *dbuffer = NULL;
2951    size_t dbuffer_size = 1u << 14;
2952    
2953  #if !defined NOPOSIX  #if !defined NOPOSIX
2954  int posix = 0;  int posix = 0;
# Line 2758  pcre_uint8 *gn8ptr; Line 2988  pcre_uint8 *gn8ptr;
2988  #endif  #endif
2989    
2990  /* Get buffers from malloc() so that valgrind will check their misuse when  /* Get buffers from malloc() so that valgrind will check their misuse when
2991  debugging. They grow automatically when very long lines are read. The 16-  debugging. They grow automatically when very long lines are read. The 16-
2992  and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */  and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2993    
2994  buffer = (pcre_uint8 *)malloc(buffer_size);  buffer = (pcre_uint8 *)malloc(buffer_size);
 dbuffer = (pcre_uint8 *)malloc(buffer_size);  
2995  pbuffer = (pcre_uint8 *)malloc(buffer_size);  pbuffer = (pcre_uint8 *)malloc(buffer_size);
2996    
2997  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
# Line 2810  while (argc > 1 && argv[op][0] == '-') Line 3039  while (argc > 1 && argv[op][0] == '-')
3039        force_study_options = jit_study_bits[*arg - '1'];        force_study_options = jit_study_bits[*arg - '1'];
3040      else goto BAD_ARG;      else goto BAD_ARG;
3041      }      }
3042      else if (strcmp(arg, "-8") == 0)
3043        {
3044    #ifdef SUPPORT_PCRE8
3045        pcre_mode = PCRE8_MODE;
3046    #else
3047        printf("** This version of PCRE was built without 8-bit support\n");
3048        exit(1);
3049    #endif
3050        }
3051    else if (strcmp(arg, "-16") == 0)    else if (strcmp(arg, "-16") == 0)
3052      {      {
3053  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
# Line 2959  while (argc > 1 && argv[op][0] == '-') Line 3197  while (argc > 1 && argv[op][0] == '-')
3197        else if (strcmp(argv[op + 1], "newline") == 0)        else if (strcmp(argv[op + 1], "newline") == 0)
3198          {          {
3199          (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);          (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3200          print_newline_config(rc, TRUE);          print_newline_config(rc, TRUE);
3201          }          }
3202        else if (strcmp(argv[op + 1], "ebcdic") == 0)        else if (strcmp(argv[op + 1], "ebcdic") == 0)
3203          {          {
3204  #ifdef EBCDIC  #ifdef EBCDIC
3205          printf("1\n");          printf("1\n");
3206          yield = 1;          yield = 1;
3207  #else  #else
3208          printf("0\n");          printf("0\n");
3209  #endif  #endif
3210          }          }
3211        else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)        else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3212          {          {
3213  #ifdef EBCDIC  #ifdef EBCDIC
3214          printf("0x%02x\n", CHAR_LF);          printf("0x%02x\n", CHAR_LF);
3215  #else  #else
3216          printf("0\n");          printf("0\n");
3217  #endif  #endif
3218          }          }
3219        else        else
3220          {          {
3221          printf("Unknown -C option: %s\n", argv[op + 1]);          printf("Unknown -C option: %s\n", argv[op + 1]);
3222          }          }
3223        goto EXIT;        goto EXIT;
3224        }        }
3225    
3226      /* No argument for -C: output all configuration information. */      /* No argument for -C: output all configuration information. */
3227    
3228      printf("PCRE version %s\n", version);      printf("PCRE version %s\n", version);
3229      printf("Compiled with\n");      printf("Compiled with\n");
3230    
3231  #ifdef EBCDIC  #ifdef EBCDIC
3232      printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);      printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3233  #endif  #endif
3234    
3235  /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both  /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3236  are set, either both UTFs are supported or both are not supported. */  are set, either both UTFs are supported or both are not supported. */
# Line 3025  are set, either both UTFs are supported Line 3263  are set, either both UTFs are supported
3263      else      else
3264        printf("  No just-in-time compiler support\n");        printf("  No just-in-time compiler support\n");
3265      (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3266      print_newline_config(rc, FALSE);      print_newline_config(rc, FALSE);
3267      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3268      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3269                                       "all Unicode newlines");                                       "all Unicode newlines");
# Line 3279  while (!done) Line 3517  while (!done)
3517        PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);        PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3518        if (rc == PCRE_ERROR_BADMODE)        if (rc == PCRE_ERROR_BADMODE)
3519          {          {
3520            pcre_uint16 flags_in_host_byte_order;
3521            if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
3522              flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
3523            else
3524              flags_in_host_byte_order = swap_uint16(REAL_PCRE_FLAGS(re));
3525          /* Simulate the result of the function call below. */          /* Simulate the result of the function call below. */
3526          fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,          fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3527            pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",            pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3528            PCRE_INFO_OPTIONS);            PCRE_INFO_OPTIONS);
3529          fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "          fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3530            "%d-bit mode\n", 8 * CHAR_SIZE,            "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
           8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));  
3531          new_free(re);          new_free(re);
3532          fclose(f);          fclose(f);
3533          continue;          continue;
# Line 3672  while (!done) Line 3914  while (!done)
3914    
3915        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3916        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3917          real_pcre_size = 0;
3918  #ifdef SUPPORT_PCRE8  #ifdef SUPPORT_PCRE8
3919        if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)        if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3920          real_pcre_size = sizeof(real_pcre);          real_pcre_size = sizeof(real_pcre);
# Line 3766  while (!done) Line 4009  while (!done)
4009        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
4010            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
4011            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
4012            new_info(re, NULL, PCRE_INFO_FIRSTLITERAL, &first_char) +            new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
4013            new_info(re, NULL, PCRE_INFO_FIRSTLITERALSET, &first_char_set) +            new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
4014            new_info(re, NULL, PCRE_INFO_LASTLITERAL2, &need_char) +            new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
4015            new_info(re, NULL, PCRE_INFO_LASTLITERAL2SET, &need_char_set) +            new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
4016            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
4017            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
4018            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
# Line 4060  while (!done) Line 4303  while (!done)
4303    
4304    for (;;)    for (;;)
4305      {      {
4306      pcre_uint8 *q;  #ifdef SUPPORT_PCRE8
4307        pcre_uint8 *q8;
4308    #endif
4309    #ifdef SUPPORT_PCRE16
4310        pcre_uint16 *q16;
4311    #endif
4312    #ifdef SUPPORT_PCRE32
4313        pcre_uint32 *q32;
4314    #endif
4315      pcre_uint8 *bptr;      pcre_uint8 *bptr;
4316      int *use_offsets = offsets;      int *use_offsets = offsets;
4317      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
# Line 4132  while (!done) Line 4383  while (!done)
4383      p = buffer;      p = buffer;
4384      while (isspace(*p)) p++;      while (isspace(*p)) p++;
4385    
4386      bptr = q = dbuffer;  #ifndef NOUTF
4387        /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4388           invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4389        if (use_utf)
4390          {
4391          pcre_uint8 *q;
4392          pcre_uint32 cc;
4393          int n = 1;
4394    
4395          for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
4396          if (n <= 0)
4397            {
4398            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4399            goto NEXT_DATA;
4400            }
4401          }
4402    #endif
4403    
4404    #ifdef SUPPORT_VALGRIND
4405        /* Mark the dbuffer as addressable but undefined again. */
4406        if (dbuffer != NULL)
4407          {
4408          VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size * CHAR_SIZE);
4409          }
4410    #endif
4411    
4412        /* Allocate a buffer to hold the data line. len+1 is an upper bound on
4413           the number of pcre_uchar units that will be needed. */
4414        if (dbuffer == NULL || (size_t)len >= dbuffer_size)
4415          {
4416          dbuffer_size *= 2;
4417          dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4418          if (dbuffer == NULL)
4419            {
4420            fprintf(stderr, "pcretest: malloc(%d) failed\n", (int)dbuffer_size);
4421            exit(1);
4422            }
4423          }
4424    
4425    #ifdef SUPPORT_PCRE8
4426        q8 = (pcre_uint8 *) dbuffer;
4427    #endif
4428    #ifdef SUPPORT_PCRE16
4429        q16 = (pcre_uint16 *) dbuffer;
4430    #endif
4431    #ifdef SUPPORT_PCRE32
4432        q32 = (pcre_uint32 *) dbuffer;
4433    #endif
4434    
4435      while ((c = *p++) != 0)      while ((c = *p++) != 0)
4436        {        {
4437        int i = 0;        int i = 0;
# Line 4145  while (!done) Line 4444  while (!done)
4444    
4445        if (c != '\\')        if (c != '\\')
4446          {          {
4447          if (use_utf)  #ifndef NOUTF
4448            {          if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4449            *q++ = c;  #endif
           continue;  
           }  
4450          }          }
4451    
4452        /* Handle backslash escapes */        /* Handle backslash escapes */
# Line 4210  while (!done) Line 4507  while (!done)
4507            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4508            p++;            p++;
4509            }            }
4510          if (use_utf)  #if !defined NOUTF && defined SUPPORT_PCRE8
4511            if (use_utf && (pcre_mode == PCRE8_MODE))
4512            {            {
4513            *q++ = c;            *q8++ = c;
4514            continue;            continue;
4515            }            }
4516    #endif
4517          break;          break;
4518    
4519          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 4420  while (!done) Line 4719  while (!done)
4719          continue;          continue;
4720          }          }
4721    
4722        /* We now have a character value in c that may be greater than 255. In        /* We now have a character value in c that may be greater than 255.
4723        16-bit or 32-bit mode, we always convert characters to UTF-8 so that        In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4724        values greater than 255 can be passed to non-UTF 16- or 32-bit strings.        than 127 in UTF mode must have come from \x{...} or octal constructs
       In 8-bit       mode we convert to UTF-8 if we are in UTF mode. Values greater  
       than 127       in UTF mode must have come from \x{...} or octal constructs  
4725        because values from \x.. get this far only in non-UTF mode. */        because values from \x.. get this far only in non-UTF mode. */
4726    
4727  #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32  #ifdef SUPPORT_PCRE8
4728        if (pcre_mode != PCRE8_MODE || use_utf)        if (pcre_mode == PCRE8_MODE)
4729          {          {
4730          pcre_uint8 buff8[8];  #ifndef NOUTF
4731          int ii, utn;          if (use_utf)
4732          utn = ord2utf8(c, buff8);            {
4733          for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];            if (c > 0x7fffffff)
4734                {
4735                fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
4736                  "and so cannot be converted to UTF-8\n", c);
4737                goto NEXT_DATA;
4738                }
4739              q8 += ord2utf8(c, q8);
4740              }
4741            else
4742    #endif
4743              {
4744              if (c > 0xffu)
4745                {
4746                fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4747                  "and UTF-8 mode is not enabled.\n", c);
4748                fprintf(outfile, "** Truncation will probably give the wrong "
4749                  "result.\n");
4750                }
4751              *q8++ = c;
4752              }
4753          }          }
       else  
4754  #endif  #endif
4755    #ifdef SUPPORT_PCRE16
4756          if (pcre_mode == PCRE16_MODE)
4757          {          {
4758          if (c > 255)  #ifndef NOUTF
4759            if (use_utf)
4760              {
4761              if (c > 0x10ffffu)
4762                {
4763                fprintf(outfile, "** Failed: character \\x{%x} is greater than "
4764                  "0x10ffff and so cannot be converted to UTF-16\n", c);
4765                goto NEXT_DATA;
4766                }
4767              else if (c >= 0x10000u)
4768                {
4769                c-= 0x10000u;
4770                *q16++ = 0xD800 | (c >> 10);
4771                *q16++ = 0xDC00 | (c & 0x3ff);
4772                }
4773              else
4774                *q16++ = c;
4775              }
4776            else
4777    #endif
4778            {            {
4779            fprintf(outfile, "** Character \\x{%x} is greater than 255 "            if (c > 0xffffu)
4780              "and UTF-8 mode is not enabled.\n", c);              {
4781            fprintf(outfile, "** Truncation will probably give the wrong "              fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
4782              "result.\n");                "and UTF-16 mode is not enabled.\n", c);
4783                fprintf(outfile, "** Truncation will probably give the wrong "
4784                  "result.\n");
4785                }
4786    
4787              *q16++ = c;
4788            }            }
         *q++ = c;  
4789          }          }
4790    #endif
4791    #ifdef SUPPORT_PCRE32
4792          if (pcre_mode == PCRE32_MODE)
4793            {
4794            *q32++ = c;
4795            }
4796    #endif
4797    
4798        }        }
4799    
4800      /* Reached end of subject string */      /* Reached end of subject string */
4801    
4802      *q = 0;  #ifdef SUPPORT_PCRE8
4803      len = (int)(q - dbuffer);      if (pcre_mode == PCRE8_MODE)
4804        {
4805          *q8 = 0;
4806          len = (int)(q8 - (pcre_uint8 *)dbuffer);
4807        }
4808    #endif
4809    #ifdef SUPPORT_PCRE16
4810        if (pcre_mode == PCRE16_MODE)
4811        {
4812          *q16 = 0;
4813          len = (int)(q16 - (pcre_uint16 *)dbuffer);
4814        }
4815    #endif
4816    #ifdef SUPPORT_PCRE32
4817        if (pcre_mode == PCRE32_MODE)
4818        {
4819          *q32 = 0;
4820          len = (int)(q32 - (pcre_uint32 *)dbuffer);
4821        }
4822    #endif
4823    
4824        /* If we're compiling with explicit valgrind support, Mark the data from after
4825        its end to the end of the buffer as unaddressable, so that a read over the end
4826        of the buffer will be seen by valgrind, even if it doesn't cause a crash.
4827        If we're not building with valgrind support, at least move the data to the end
4828        of the buffer so that it might at least cause a crash.
4829        If we are using the POSIX interface, we must include the terminating zero. */
4830    
4831      /* Move the data to the end of the buffer so that a read over the end of      bptr = dbuffer;
     the buffer will be seen by valgrind, even if it doesn't cause a crash. If  
     we are using the POSIX interface, we must include the terminating zero. */  
4832    
4833  #if !defined NOPOSIX  #if !defined NOPOSIX
4834      if (posix || do_posix)      if (posix || do_posix)
4835        {        {
4836        memmove(bptr + buffer_size - len - 1, bptr, len + 1);  #ifdef SUPPORT_VALGRIND
4837        bptr += buffer_size - len - 1;        VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + 1, dbuffer_size - (len + 1));
4838    #else
4839          memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
4840          bptr += dbuffer_size - len - 1;
4841    #endif
4842        }        }
4843      else      else
4844  #endif  #endif
4845        {        {
4846        memmove(bptr + buffer_size - len, bptr, len);  #ifdef SUPPORT_VALGRIND
4847        bptr += buffer_size - len;        VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len * CHAR_SIZE, (dbuffer_size - len) * CHAR_SIZE);
4848    #else
4849          bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
4850    #endif
4851        }        }
4852    
4853      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
# Line 4532  while (!done) Line 4911  while (!done)
4911    
4912      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
4913    
 #ifdef SUPPORT_PCRE16  
     if (pcre_mode == PCRE16_MODE)  
       {  
       len = to16(TRUE, bptr, REAL_PCRE_OPTIONS(re) & PCRE_UTF8, len);  
       switch(len)  
         {  
         case -1:  
         fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "  
           "converted to UTF-16\n");  
         goto NEXT_DATA;  
   
         case -2:  
         fprintf(outfile, "**Failed: character value greater than 0x10ffff "  
           "cannot be converted to UTF-16\n");  
         goto NEXT_DATA;  
   
         case -3:  
         fprintf(outfile, "**Failed: character value greater than 0xffff "  
           "cannot be converted to 16-bit in non-UTF mode\n");  
         goto NEXT_DATA;  
   
         default:  
         break;  
         }  
       bptr = (pcre_uint8 *)buffer16;  
       }  
 #endif  
   
 #ifdef SUPPORT_PCRE32  
     if (pcre_mode == PCRE32_MODE)  
       {  
       len = to32(TRUE, bptr, REAL_PCRE_OPTIONS(re) & PCRE_UTF32, len);  
       switch(len)  
         {  
         case -1:  
         fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "  
           "converted to UTF-32\n");  
         goto NEXT_DATA;  
   
         case -2:  
         fprintf(outfile, "**Failed: character value greater than 0x10ffff "  
           "cannot be converted to UTF-32\n");  
         goto NEXT_DATA;  
   
         case -3:  
         fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");  
         goto NEXT_DATA;  
   
         default:  
         break;  
         }  
       bptr = (pcre_uint8 *)buffer32;  
       }  
 #endif  
   
4914      /* Ensure that there is a JIT callback if we want to verify that JIT was      /* Ensure that there is a JIT callback if we want to verify that JIT was
4915      actually used. If jit_stack == NULL, no stack has yet been assigned. */      actually used. If jit_stack == NULL, no stack has yet been assigned. */
4916    
# Line 4808  while (!done) Line 5132  while (!done)
5132            int rc;            int rc;
5133            char copybuffer[256];            char copybuffer[256];
5134    
5135    #ifdef SUPPORT_PCRE32
5136              if (pcre_mode == PCRE32_MODE)
5137                {
5138                if (*(pcre_uint32 *)cnptr == 0) break;
5139                }
5140    #endif
5141    #ifdef SUPPORT_PCRE16
5142            if (pcre_mode == PCRE16_MODE)            if (pcre_mode == PCRE16_MODE)
5143              {              {
5144              if (*(pcre_uint16 *)cnptr == 0) break;              if (*(pcre_uint16 *)cnptr == 0) break;
5145              }              }
5146            else  #endif
5147    #ifdef SUPPORT_PCRE8
5148              if (pcre_mode == PCRE8_MODE)
5149              {              {
5150              if (*(pcre_uint8 *)cnptr == 0) break;              if (*(pcre_uint8 *)cnptr == 0) break;
5151              }              }
5152    #endif
5153    
5154            PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,            PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5155              cnptr, copybuffer, sizeof(copybuffer));              cnptr, copybuffer, sizeof(copybuffer));
# Line 4863  while (!done) Line 5197  while (!done)
5197            int rc;            int rc;
5198            const char *substring;            const char *substring;
5199    
5200    #ifdef SUPPORT_PCRE32
5201              if (pcre_mode == PCRE32_MODE)
5202                {
5203                if (*(pcre_uint32 *)gnptr == 0) break;
5204                }
5205    #endif
5206    #ifdef SUPPORT_PCRE16
5207            if (pcre_mode == PCRE16_MODE)            if (pcre_mode == PCRE16_MODE)
5208              {              {
5209              if (*(pcre_uint16 *)gnptr == 0) break;              if (*(pcre_uint16 *)gnptr == 0) break;
5210              }              }
5211            else  #endif
5212    #ifdef SUPPORT_PCRE8
5213              if (pcre_mode == PCRE8_MODE)
5214              {              {
5215              if (*(pcre_uint8 *)gnptr == 0) break;              if (*(pcre_uint8 *)gnptr == 0) break;
5216              }              }
5217    #endif
5218    
5219            PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,            PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5220              gnptr, &substring);              gnptr, &substring);

Legend:
Removed from v.1087  
changed lines
  Added in v.1202

  ViewVC Help
Powered by ViewVC 1.1.5