/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1087 by chpe, Tue Oct 16 15:55:38 2012 UTC revision 1309 by ph10, Fri Apr 5 15:35:59 2013 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39  /* This program now supports the testing of both the 8-bit and 16-bit PCRE  /* This program now supports the testing of all of the 8-bit, 16-bit, and
40  libraries in a single program. This is different from the modules such as  32-bit PCRE libraries in a single program. This is different from the modules
41  pcre_compile.c in the library itself, which are compiled separately for each  such as pcre_compile.c in the library itself, which are compiled separately for
42  mode. If both modes are enabled, for example, pcre_compile.c is compiled twice  each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43  (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is  twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44  compiled only once. Therefore, it must not make use of any of the macros from  make use of any of the macros from pcre_internal.h that depend on
45  pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,  COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46  however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls  SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47  only supported library functions. */  supported library functions. */
48    
49  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
50  #include "config.h"  #include "config.h"
# Line 121  input mode under Windows. */ Line 121  input mode under Windows. */
121  #endif  #endif
122  #endif  #endif
123    
124    #ifdef __VMS
125    #include <ssdef.h>
126    void vms_setsymbol( char *, char *, int );
127    #endif
128    
129    
130  #define PRIV(name) name  #define PRIV(name) name
131    
132  /* We have to include pcre_internal.h because we need the internal info for  /* We have to include pcre_internal.h because we need the internal info for
# Line 133  here before pcre_internal.h so that the Line 139  here before pcre_internal.h so that the
139  appropriately for an application, not for building PCRE. */  appropriately for an application, not for building PCRE. */
140    
141  #include "pcre.h"  #include "pcre.h"
   
 #if defined SUPPORT_PCRE32 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16  
 /* Configure internal macros to 32 bit mode. */  
 #define COMPILE_PCRE32  
 #endif  
 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE32  
 /* Configure internal macros to 16 bit mode. */  
 #define COMPILE_PCRE16  
 #endif  
 #if defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE32  
 /* Configure internal macros to 16 bit mode. */  
 #define COMPILE_PCRE8  
 #endif  
   
142  #include "pcre_internal.h"  #include "pcre_internal.h"
143    
144  /* The pcre_printint() function, which prints the internal form of a compiled  /* The pcre_printint() function, which prints the internal form of a compiled
# Line 303  argument, the casting might be incorrect Line 295  argument, the casting might be incorrect
295  #define PCRE_JIT_STACK_FREE8(stack) \  #define PCRE_JIT_STACK_FREE8(stack) \
296    pcre_jit_stack_free(stack)    pcre_jit_stack_free(stack)
297    
298    #define pcre8_maketables pcre_maketables
299    
300  #endif /* SUPPORT_PCRE8 */  #endif /* SUPPORT_PCRE8 */
301    
302  /* -----------------------------------------------------------*/  /* -----------------------------------------------------------*/
# Line 399  argument, the casting might be incorrect Line 393  argument, the casting might be incorrect
393  #ifdef SUPPORT_PCRE32  #ifdef SUPPORT_PCRE32
394    
395  #define PCHARS32(lv, p, offset, len, f) \  #define PCHARS32(lv, p, offset, len, f) \
396    lv = pchars32((PCRE_SPTR32)(p) + offset, len, f)    lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
397    
398  #define PCHARSV32(p, offset, len, f) \  #define PCHARSV32(p, offset, len, f)                \
399    (void)pchars32((PCRE_SPTR32)(p) + offset, len, f)    (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
400    
401  #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \  #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
402    p = read_capture_name32(p, cn32, re)    p = read_capture_name32(p, cn32, re)
# Line 484  argument, the casting might be incorrect Line 478  argument, the casting might be incorrect
478  #endif /* SUPPORT_PCRE32 */  #endif /* SUPPORT_PCRE32 */
479    
480    
481  /* ----- Both modes are supported; a runtime test is needed, except for  /* ----- More than one mode is supported; a runtime test is needed, except for
482  pcre_config(), and the JIT stack functions, when it doesn't matter which  pcre_config(), and the JIT stack functions, when it doesn't matter which
483  version is called. ----- */  available version is called. ----- */
484    
485  enum {  enum {
486    PCRE8_MODE,    PCRE8_MODE,
# Line 494  enum { Line 488  enum {
488    PCRE32_MODE    PCRE32_MODE
489  };  };
490    
491  #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + defined (SUPPORT_PCRE32)) >= 2  #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
492         defined (SUPPORT_PCRE32)) >= 2
493    
494  #define CHAR_SIZE (1 << pcre_mode)  #define CHAR_SIZE (1 << pcre_mode)
495    
496    /* There doesn't seem to be an easy way of writing these macros that can cope
497    with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
498    cases separately. */
499    
500    /* ----- All three modes supported ----- */
501    
502    #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
503    
504  #define PCHARS(lv, p, offset, len, f) \  #define PCHARS(lv, p, offset, len, f) \
505    if (pcre_mode == PCRE32_MODE) \    if (pcre_mode == PCRE32_MODE) \
506      PCHARS32(lv, p, offset, len, f); \      PCHARS32(lv, p, offset, len, f); \
# Line 696  enum { Line 699  enum {
699    else \    else \
700      PCRE_STUDY8(extra, re, options, error)      PCRE_STUDY8(extra, re, options, error)
701    
702    
703    /* ----- Two out of three modes are supported ----- */
704    
705    #else
706    
707    /* We can use some macro trickery to make a single set of definitions work in
708    the three different cases. */
709    
710    /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
711    
712    #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
713    #define BITONE 32
714    #define BITTWO 16
715    
716    /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
717    
718    #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
719    #define BITONE 32
720    #define BITTWO 8
721    
722    /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
723    
724    #else
725    #define BITONE 16
726    #define BITTWO 8
727    #endif
728    
729    #define glue(a,b) a##b
730    #define G(a,b) glue(a,b)
731    
732    
733    /* ----- Common macros for two-mode cases ----- */
734    
735    #define PCHARS(lv, p, offset, len, f) \
736      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
737        G(PCHARS,BITONE)(lv, p, offset, len, f); \
738      else \
739        G(PCHARS,BITTWO)(lv, p, offset, len, f)
740    
741    #define PCHARSV(p, offset, len, f) \
742      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
743        G(PCHARSV,BITONE)(p, offset, len, f); \
744      else \
745        G(PCHARSV,BITTWO)(p, offset, len, f)
746    
747    #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
748      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
749        G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
750      else \
751        G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
752    
753    #define SET_PCRE_CALLOUT(callout) \
754      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
755        G(SET_PCRE_CALLOUT,BITONE)(callout); \
756      else \
757        G(SET_PCRE_CALLOUT,BITTWO)(callout)
758    
759    #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
760      G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
761    
762    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
763      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
764        G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
765      else \
766        G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
767    
768    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
769      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
770        G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
771      else \
772        G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
773    
774    #define PCRE_CONFIG G(G(pcre,BITONE),_config)
775    
776    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
777        namesptr, cbuffer, size) \
778      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
779        G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
780          namesptr, cbuffer, size); \
781      else \
782        G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
783          namesptr, cbuffer, size)
784    
785    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
786      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
787        G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
788      else \
789        G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
790    
791    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
792        offsets, size_offsets, workspace, size_workspace) \
793      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
794        G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
795          offsets, size_offsets, workspace, size_workspace); \
796      else \
797        G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
798          offsets, size_offsets, workspace, size_workspace)
799    
800    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
801        offsets, size_offsets) \
802      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
803        G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
804          offsets, size_offsets); \
805      else \
806        G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
807          offsets, size_offsets)
808    
809    #define PCRE_FREE_STUDY(extra) \
810      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
811        G(PCRE_FREE_STUDY,BITONE)(extra); \
812      else \
813        G(PCRE_FREE_STUDY,BITTWO)(extra)
814    
815    #define PCRE_FREE_SUBSTRING(substring) \
816      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817        G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
818      else \
819        G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
820    
821    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
822      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
823        G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
824      else \
825        G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
826    
827    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
828        getnamesptr, subsptr) \
829      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
830        G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
831          getnamesptr, subsptr); \
832      else \
833        G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
834          getnamesptr, subsptr)
835    
836    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
837      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
838        G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
839      else \
840        G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
841    
842    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
843      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
844        G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
845      else \
846        G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
847    
848    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
849      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
850        G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
851      else \
852        G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
853    
854    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
855      (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
856         G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
857        : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
858    
859    #define PCRE_JIT_STACK_FREE(stack) \
860      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
861        G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
862      else \
863        G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
864    
865    #define PCRE_MAKETABLES \
866      (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
867        G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
868    
869    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
870      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
871        G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
872      else \
873        G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
874    
875    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
876      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
877        G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
878      else \
879        G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
880    
881    #define PCRE_STUDY(extra, re, options, error) \
882      if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
883        G(PCRE_STUDY,BITONE)(extra, re, options, error); \
884      else \
885        G(PCRE_STUDY,BITTWO)(extra, re, options, error)
886    
887    #endif  /* Two out of three modes */
888    
889    /* ----- End of cases where more than one mode is supported ----- */
890    
891    
892  /* ----- Only 8-bit mode is supported ----- */  /* ----- Only 8-bit mode is supported ----- */
893    
894  #elif defined SUPPORT_PCRE8  #elif defined SUPPORT_PCRE8
# Line 831  static const unsigned char *last_callout Line 1024  static const unsigned char *last_callout
1024    
1025  static int buffer_size = 50000;  static int buffer_size = 50000;
1026  static pcre_uint8 *buffer = NULL;  static pcre_uint8 *buffer = NULL;
 static pcre_uint8 *dbuffer = NULL;  
1027  static pcre_uint8 *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
1028    
1029  /* Another buffer is needed translation to 16/32-bit character strings. It will  /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
 obtained and extended as required. */  
   
 #if defined SUPPORT_PCRE8 && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)  
   
 /* We need the table of operator lengths that is used for 16/32-bit compiling, in  
 order to swap bytes in a pattern for saving/reloading testing. Luckily, the  
 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted  
 appropriately for the 16/32-bit world. Just as a safety check, make sure that  
 COMPILE_PCRE[16|32] is *not* set. */  
1030    
1031  #ifdef COMPILE_PCRE16  #ifdef COMPILE_PCRE16
1032  #error COMPILE_PCRE16 must not be set when compiling pcretest.c  #error COMPILE_PCRE16 must not be set when compiling pcretest.c
# Line 853  COMPILE_PCRE[16|32] is *not* set. */ Line 1036  COMPILE_PCRE[16|32] is *not* set. */
1036  #error COMPILE_PCRE32 must not be set when compiling pcretest.c  #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1037  #endif  #endif
1038    
1039    /* We need buffers for building 16/32-bit strings, and the tables of operator
1040    lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1041    pattern for saving/reloading testing. Luckily, the data for these tables is
1042    defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1043    are used in the tables) are adjusted appropriately for the 16/32-bit world.
1044    LINK_SIZE is also used later in this program. */
1045    
1046    #ifdef SUPPORT_PCRE16
1047    #undef IMM2_SIZE
1048    #define IMM2_SIZE 1
1049    
1050  #if LINK_SIZE == 2  #if LINK_SIZE == 2
1051  #undef LINK_SIZE  #undef LINK_SIZE
1052  #define LINK_SIZE 1  #define LINK_SIZE 1
# Line 863  COMPILE_PCRE[16|32] is *not* set. */ Line 1057  COMPILE_PCRE[16|32] is *not* set. */
1057  #error LINK_SIZE must be either 2, 3, or 4  #error LINK_SIZE must be either 2, 3, or 4
1058  #endif  #endif
1059    
 #undef IMM2_SIZE  
 #define IMM2_SIZE 1  
   
 #endif /* SUPPORT_PCRE8 && (SUPPORT_PCRE16 || SUPPORT_PCRE32) */  
   
 #ifdef SUPPORT_PCRE16  
1060  static int buffer16_size = 0;  static int buffer16_size = 0;
1061  static pcre_uint16 *buffer16 = NULL;  static pcre_uint16 *buffer16 = NULL;
1062  static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };  static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1063  #endif  /* SUPPORT_PCRE16 */  #endif  /* SUPPORT_PCRE16 */
1064    
1065  #ifdef SUPPORT_PCRE32  #ifdef SUPPORT_PCRE32
1066    #undef IMM2_SIZE
1067    #define IMM2_SIZE 1
1068    #undef LINK_SIZE
1069    #define LINK_SIZE 1
1070    
1071  static int buffer32_size = 0;  static int buffer32_size = 0;
1072  static pcre_uint32 *buffer32 = NULL;  static pcre_uint32 *buffer32 = NULL;
1073  static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };  static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1074  #endif  /* SUPPORT_PCRE32 */  #endif  /* SUPPORT_PCRE32 */
1075    
1076  /* If we have 8-bit support, default to it; if there is also  /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1077  16-or 32-bit support, it can be changed by an option. If there is no 8-bit support,  support, it can be changed by an option. If there is no 8-bit support, there
1078  there must be 16-or 32-bit support, so default it to 1. */  must be 16-or 32-bit support, so default it to 1. */
1079    
1080  #if defined SUPPORT_PCRE8  #if defined SUPPORT_PCRE8
1081  static int pcre_mode = PCRE8_MODE;  static int pcre_mode = PCRE8_MODE;
# Line 942  static const char *errtexts[] = { Line 1135  static const char *errtexts[] = {
1135    "JIT stack limit reached",    "JIT stack limit reached",
1136    "pattern compiled in wrong mode: 8-bit/16-bit error",    "pattern compiled in wrong mode: 8-bit/16-bit error",
1137    "pattern compiled with other endianness",    "pattern compiled with other endianness",
1138    "invalid data in workspace for DFA restart"    "invalid data in workspace for DFA restart",
1139      "bad JIT option",
1140      "bad length"
1141  };  };
1142    
1143    
# Line 1299  return sys_errlist[n]; Line 1494  return sys_errlist[n];
1494  *       Print newline configuration              *  *       Print newline configuration              *
1495  *************************************************/  *************************************************/
1496    
1497  /*  /*
1498  Arguments:  Arguments:
1499    rc         the return code from PCRE_CONFIG_NEWLINE    rc         the return code from PCRE_CONFIG_NEWLINE
1500    isc        TRUE if called from "-C newline"    isc        TRUE if called from "-C newline"
1501  Returns:     nothing  Returns:     nothing
1502  */  */
1503    
# Line 1318  switch(rc) Line 1513  switch(rc)
1513    case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;    case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1514    case -1: s = "ANY"; break;    case -1: s = "ANY"; break;
1515    case -2: s = "ANYCRLF"; break;    case -2: s = "ANYCRLF"; break;
1516    
1517    default:    default:
1518    printf("a non-standard value: 0x%04x\n", rc);    printf("a non-standard value: 0x%04x\n", rc);
1519    return;    return;
1520    }    }
1521    
1522  printf("%s\n", s);  printf("%s\n", s);
1523  }  }
# Line 1388  for (j = 0; j < i; j++) Line 1583  for (j = 0; j < i; j++)
1583  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
1584    
1585  for (j = 0; j < utf8_table1_size; j++)  for (j = 0; j < utf8_table1_size; j++)
1586    if (d <= utf8_table1[j]) break;    if (d <= (pcre_uint32)utf8_table1[j]) break;
1587  if (j != i) return -(i+1);  if (j != i) return -(i+1);
1588    
1589  /* Valid value */  /* Valid value */
# Line 1400  return i+1; Line 1595  return i+1;
1595    
1596    
1597    
1598  #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32  #if defined SUPPORT_PCRE8 && !defined NOUTF
1599  /*************************************************  /*************************************************
1600  *       Convert character value to UTF-8         *  *       Convert character value to UTF-8         *
1601  *************************************************/  *************************************************/
# Line 1422  register int i, j; Line 1617  register int i, j;
1617  if (cvalue > 0x7fffffffu)  if (cvalue > 0x7fffffffu)
1618    return -1;    return -1;
1619  for (i = 0; i < utf8_table1_size; i++)  for (i = 0; i < utf8_table1_size; i++)
1620    if (cvalue <= utf8_table1[i]) break;    if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1621  utf8bytes += i;  utf8bytes += i;
1622  for (j = i; j > 0; j--)  for (j = i; j > 0; j--)
1623   {   {
# Line 1590  else Line 1785  else
1785  *pp = 0;  *pp = 0;
1786  return pp - buffer32;  return pp - buffer32;
1787  }  }
1788  #endif  
1789    /* Check that a 32-bit character string is valid UTF-32.
1790    
1791    Arguments:
1792      string       points to the string
1793      length       length of string, or -1 if the string is zero-terminated
1794    
1795    Returns:       TRUE  if the string is a valid UTF-32 string
1796                   FALSE otherwise
1797    */
1798    
1799    #ifdef NEVER   /* Not used */
1800    #ifdef SUPPORT_UTF
1801    static BOOL
1802    valid_utf32(pcre_uint32 *string, int length)
1803    {
1804    register pcre_uint32 *p;
1805    register pcre_uint32 c;
1806    
1807    for (p = string; length-- > 0; p++)
1808      {
1809      c = *p;
1810      if (c > 0x10ffffu) return FALSE;                 /* Too big */
1811      if ((c & 0xfffff800u) == 0xd800u) return FALSE;  /* Surrogate */
1812      }
1813    
1814    return TRUE;
1815    }
1816    #endif /* SUPPORT_UTF */
1817    #endif /* NEVER */
1818    #endif /* SUPPORT_PCRE32 */
1819    
1820    
1821  /*************************************************  /*************************************************
1822  *        Read or extend an input line            *  *        Read or extend an input line            *
# Line 1666  for (;;) Line 1892  for (;;)
1892      {      {
1893      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1894      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
     pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);  
1895      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1896    
1897      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_pbuffer == NULL)
1898        {        {
1899        fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);        fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1900        exit(1);        exit(1);
# Line 1684  for (;;) Line 1909  for (;;)
1909      here = new_buffer + (here - buffer);      here = new_buffer + (here - buffer);
1910    
1911      free(buffer);      free(buffer);
     free(dbuffer);  
1912      free(pbuffer);      free(pbuffer);
1913    
1914      buffer = new_buffer;      buffer = new_buffer;
     dbuffer = new_dbuffer;  
1915      pbuffer = new_pbuffer;      pbuffer = new_pbuffer;
1916      }      }
1917    }    }
# Line 1733  return(result); Line 1956  return(result);
1956    
1957  static int pchar(pcre_uint32 c, FILE *f)  static int pchar(pcre_uint32 c, FILE *f)
1958  {  {
1959  int n;  int n = 0;
1960  if (PRINTOK(c))  if (PRINTOK(c))
1961    {    {
1962    if (f != NULL) fprintf(f, "%c", c);    if (f != NULL) fprintf(f, "%c", c);
# Line 1853  while (length-- > 0) Line 2076  while (length-- > 0)
2076    if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)    if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2077      {      {
2078      int d = *p & 0xffff;      int d = *p & 0xffff;
2079      if (d >= 0xDC00 && d < 0xDFFF)      if (d >= 0xDC00 && d <= 0xDFFF)
2080        {        {
2081        c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;        c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2082        length--;        length--;
# Line 1878  return yield; Line 2101  return yield;
2101  /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.  /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2102  If handed a NULL file, just counts chars without printing. */  If handed a NULL file, just counts chars without printing. */
2103    
2104  static int pchars32(PCRE_SPTR32 p, int length, FILE *f)  static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2105  {  {
2106  int yield = 0;  int yield = 0;
2107    
2108    (void)(utf);  /* Avoid compiler warning */
2109    
2110  if (length < 0)  if (length < 0)
2111    length = strlen32(p);    length = strlen32(p);
2112    
# Line 2417  real_pcre32 *re = (real_pcre32 *)ere; Line 2642  real_pcre32 *re = (real_pcre32 *)ere;
2642  int op;  int op;
2643  pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;  pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2644  int length = re->name_count * re->name_entry_size;  int length = re->name_count * re->name_entry_size;
 #ifdef SUPPORT_UTF  
 BOOL utf = (re->options & PCRE_UTF32) != 0;  
 #endif /* SUPPORT_UTF */  
2645    
2646  /* Always flip the bytes in the main data block and study blocks. */  /* Always flip the bytes in the main data block and study blocks. */
2647    
# Line 2443  if (extra != NULL) Line 2665  if (extra != NULL)
2665    rsd->minlength = swap_uint32(rsd->minlength);    rsd->minlength = swap_uint32(rsd->minlength);
2666    }    }
2667    
2668  /* In 32-bit mode we must swap bytes  /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2669  in the name table, if present, and then in the pattern itself. */  the pattern itself. */
2670    
2671  while(TRUE)  while(TRUE)
2672    {    {
# Line 2719  int all_use_dfa = 0; Line 2941  int all_use_dfa = 0;
2941  int verify_jit = 0;  int verify_jit = 0;
2942  int yield = 0;  int yield = 0;
2943  int stack_size;  int stack_size;
2944    pcre_uint8 *dbuffer = NULL;
2945    size_t dbuffer_size = 1u << 14;
2946    
2947  #if !defined NOPOSIX  #if !defined NOPOSIX
2948  int posix = 0;  int posix = 0;
# Line 2758  pcre_uint8 *gn8ptr; Line 2982  pcre_uint8 *gn8ptr;
2982  #endif  #endif
2983    
2984  /* Get buffers from malloc() so that valgrind will check their misuse when  /* Get buffers from malloc() so that valgrind will check their misuse when
2985  debugging. They grow automatically when very long lines are read. The 16-  debugging. They grow automatically when very long lines are read. The 16-
2986  and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */  and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2987    
2988  buffer = (pcre_uint8 *)malloc(buffer_size);  buffer = (pcre_uint8 *)malloc(buffer_size);
 dbuffer = (pcre_uint8 *)malloc(buffer_size);  
2989  pbuffer = (pcre_uint8 *)malloc(buffer_size);  pbuffer = (pcre_uint8 *)malloc(buffer_size);
2990    
2991  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
# Line 2810  while (argc > 1 && argv[op][0] == '-') Line 3033  while (argc > 1 && argv[op][0] == '-')
3033        force_study_options = jit_study_bits[*arg - '1'];        force_study_options = jit_study_bits[*arg - '1'];
3034      else goto BAD_ARG;      else goto BAD_ARG;
3035      }      }
3036      else if (strcmp(arg, "-8") == 0)
3037        {
3038    #ifdef SUPPORT_PCRE8
3039        pcre_mode = PCRE8_MODE;
3040    #else
3041        printf("** This version of PCRE was built without 8-bit support\n");
3042        exit(1);
3043    #endif
3044        }
3045    else if (strcmp(arg, "-16") == 0)    else if (strcmp(arg, "-16") == 0)
3046      {      {
3047  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
# Line 2861  while (argc > 1 && argv[op][0] == '-') Line 3093  while (argc > 1 && argv[op][0] == '-')
3093        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3094          *endptr == 0))          *endptr == 0))
3095      {      {
3096  #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)  #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
3097      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
3098      exit(1);      exit(1);
3099  #else  #else
# Line 2894  while (argc > 1 && argv[op][0] == '-') Line 3126  while (argc > 1 && argv[op][0] == '-')
3126          (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);          (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3127          printf("%d\n", rc);          printf("%d\n", rc);
3128          yield = rc;          yield = rc;
3129    
3130    #ifdef __VMS
3131            vms_setsymbol("LINKSIZE",0,yield );
3132    #endif
3133          }          }
3134        else if (strcmp(argv[op + 1], "pcre8") == 0)        else if (strcmp(argv[op + 1], "pcre8") == 0)
3135          {          {
# Line 2904  while (argc > 1 && argv[op][0] == '-') Line 3140  while (argc > 1 && argv[op][0] == '-')
3140          printf("0\n");          printf("0\n");
3141          yield = 0;          yield = 0;
3142  #endif  #endif
3143    #ifdef __VMS
3144            vms_setsymbol("PCRE8",0,yield );
3145    #endif
3146          }          }
3147        else if (strcmp(argv[op + 1], "pcre16") == 0)        else if (strcmp(argv[op + 1], "pcre16") == 0)
3148          {          {
# Line 2914  while (argc > 1 && argv[op][0] == '-') Line 3153  while (argc > 1 && argv[op][0] == '-')
3153          printf("0\n");          printf("0\n");
3154          yield = 0;          yield = 0;
3155  #endif  #endif
3156    #ifdef __VMS
3157            vms_setsymbol("PCRE16",0,yield );
3158    #endif
3159          }          }
3160        else if (strcmp(argv[op + 1], "pcre32") == 0)        else if (strcmp(argv[op + 1], "pcre32") == 0)
3161          {          {
# Line 2924  while (argc > 1 && argv[op][0] == '-') Line 3166  while (argc > 1 && argv[op][0] == '-')
3166          printf("0\n");          printf("0\n");
3167          yield = 0;          yield = 0;
3168  #endif  #endif
3169          goto EXIT;  #ifdef __VMS
3170            vms_setsymbol("PCRE32",0,yield );
3171    #endif
3172          }          }
3173        if (strcmp(argv[op + 1], "utf") == 0)        else if (strcmp(argv[op + 1], "utf") == 0)
3174          {          {
3175  #ifdef SUPPORT_PCRE8  #ifdef SUPPORT_PCRE8
3176          if (pcre_mode == PCRE8_MODE)          if (pcre_mode == PCRE8_MODE)
# Line 2942  while (argc > 1 && argv[op][0] == '-') Line 3186  while (argc > 1 && argv[op][0] == '-')
3186  #endif  #endif
3187          printf("%d\n", rc);          printf("%d\n", rc);
3188          yield = rc;          yield = rc;
3189          goto EXIT;  #ifdef __VMS
3190            vms_setsymbol("UTF",0,yield );
3191    #endif
3192          }          }
3193        else if (strcmp(argv[op + 1], "ucp") == 0)        else if (strcmp(argv[op + 1], "ucp") == 0)
3194          {          {
# Line 2959  while (argc > 1 && argv[op][0] == '-') Line 3205  while (argc > 1 && argv[op][0] == '-')
3205        else if (strcmp(argv[op + 1], "newline") == 0)        else if (strcmp(argv[op + 1], "newline") == 0)
3206          {          {
3207          (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);          (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3208          print_newline_config(rc, TRUE);          print_newline_config(rc, TRUE);
3209          }          }
3210        else if (strcmp(argv[op + 1], "ebcdic") == 0)        else if (strcmp(argv[op + 1], "ebcdic") == 0)
3211          {          {
3212  #ifdef EBCDIC  #ifdef EBCDIC
3213          printf("1\n");          printf("1\n");
3214          yield = 1;          yield = 1;
3215  #else  #else
3216          printf("0\n");          printf("0\n");
3217  #endif  #endif
3218          }          }
3219        else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)        else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3220          {          {
3221  #ifdef EBCDIC  #ifdef EBCDIC
3222          printf("0x%02x\n", CHAR_LF);          printf("0x%02x\n", CHAR_LF);
3223  #else  #else
3224          printf("0\n");          printf("0\n");
3225  #endif  #endif
3226          }          }
3227        else        else
3228          {          {
3229          printf("Unknown -C option: %s\n", argv[op + 1]);          printf("Unknown -C option: %s\n", argv[op + 1]);
3230          }          }
3231        goto EXIT;        goto EXIT;
3232        }        }
3233    
3234      /* No argument for -C: output all configuration information. */      /* No argument for -C: output all configuration information. */
3235    
3236      printf("PCRE version %s\n", version);      printf("PCRE version %s\n", version);
3237      printf("Compiled with\n");      printf("Compiled with\n");
3238    
3239  #ifdef EBCDIC  #ifdef EBCDIC
3240      printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);      printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3241  #endif  #endif
3242    
3243  /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both  /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3244  are set, either both UTFs are supported or both are not supported. */  are set, either both UTFs are supported or both are not supported. */
# Line 3025  are set, either both UTFs are supported Line 3271  are set, either both UTFs are supported
3271      else      else
3272        printf("  No just-in-time compiler support\n");        printf("  No just-in-time compiler support\n");
3273      (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3274      print_newline_config(rc, FALSE);      print_newline_config(rc, FALSE);
3275      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3276      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3277                                       "all Unicode newlines");                                       "all Unicode newlines");
# Line 3279  while (!done) Line 3525  while (!done)
3525        PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);        PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3526        if (rc == PCRE_ERROR_BADMODE)        if (rc == PCRE_ERROR_BADMODE)
3527          {          {
3528            pcre_uint16 flags_in_host_byte_order;
3529            if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
3530              flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
3531            else
3532              flags_in_host_byte_order = swap_uint16(REAL_PCRE_FLAGS(re));
3533          /* Simulate the result of the function call below. */          /* Simulate the result of the function call below. */
3534          fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,          fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3535            pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",            pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3536            PCRE_INFO_OPTIONS);            PCRE_INFO_OPTIONS);
3537          fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "          fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3538            "%d-bit mode\n", 8 * CHAR_SIZE,            "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
           8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));  
3539          new_free(re);          new_free(re);
3540          fclose(f);          fclose(f);
3541          continue;          continue;
# Line 3439  while (!done) Line 3689  while (!done)
3689        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3690        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
3691        case '8': options |= PCRE_UTF8; use_utf = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
3692          case '9': options |= PCRE_NEVER_UTF; break;
3693        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
3694    
3695        case 'T':        case 'T':
# Line 3672  while (!done) Line 3923  while (!done)
3923    
3924        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3925        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3926          real_pcre_size = 0;
3927  #ifdef SUPPORT_PCRE8  #ifdef SUPPORT_PCRE8
3928        if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)        if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3929          real_pcre_size = sizeof(real_pcre);          real_pcre_size = sizeof(real_pcre);
# Line 3766  while (!done) Line 4018  while (!done)
4018        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
4019            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
4020            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
4021            new_info(re, NULL, PCRE_INFO_FIRSTLITERAL, &first_char) +            new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
4022            new_info(re, NULL, PCRE_INFO_FIRSTLITERALSET, &first_char_set) +            new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
4023            new_info(re, NULL, PCRE_INFO_LASTLITERAL2, &need_char) +            new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
4024            new_info(re, NULL, PCRE_INFO_LASTLITERAL2SET, &need_char_set) +            new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
4025            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
4026            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
4027            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
# Line 4060  while (!done) Line 4312  while (!done)
4312    
4313    for (;;)    for (;;)
4314      {      {
4315      pcre_uint8 *q;  #ifdef SUPPORT_PCRE8
4316        pcre_uint8 *q8;
4317    #endif
4318    #ifdef SUPPORT_PCRE16
4319        pcre_uint16 *q16;
4320    #endif
4321    #ifdef SUPPORT_PCRE32
4322        pcre_uint32 *q32;
4323    #endif
4324      pcre_uint8 *bptr;      pcre_uint8 *bptr;
4325      int *use_offsets = offsets;      int *use_offsets = offsets;
4326      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
# Line 4132  while (!done) Line 4392  while (!done)
4392      p = buffer;      p = buffer;
4393      while (isspace(*p)) p++;      while (isspace(*p)) p++;
4394    
4395      bptr = q = dbuffer;  #ifndef NOUTF
4396        /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4397           invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4398        if (use_utf)
4399          {
4400          pcre_uint8 *q;
4401          pcre_uint32 cc;
4402          int n = 1;
4403    
4404          for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
4405          if (n <= 0)
4406            {
4407            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4408            goto NEXT_DATA;
4409            }
4410          }
4411    #endif
4412    
4413    #ifdef SUPPORT_VALGRIND
4414        /* Mark the dbuffer as addressable but undefined again. */
4415        if (dbuffer != NULL)
4416          {
4417          VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size * CHAR_SIZE);
4418          }
4419    #endif
4420    
4421        /* Allocate a buffer to hold the data line. len+1 is an upper bound on
4422           the number of pcre_uchar units that will be needed. */
4423        if (dbuffer == NULL || (size_t)len >= dbuffer_size)
4424          {
4425          dbuffer_size *= 2;
4426          dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4427          if (dbuffer == NULL)
4428            {
4429            fprintf(stderr, "pcretest: malloc(%d) failed\n", (int)dbuffer_size);
4430            exit(1);
4431            }
4432          }
4433    
4434    #ifdef SUPPORT_PCRE8
4435        q8 = (pcre_uint8 *) dbuffer;
4436    #endif
4437    #ifdef SUPPORT_PCRE16
4438        q16 = (pcre_uint16 *) dbuffer;
4439    #endif
4440    #ifdef SUPPORT_PCRE32
4441        q32 = (pcre_uint32 *) dbuffer;
4442    #endif
4443    
4444      while ((c = *p++) != 0)      while ((c = *p++) != 0)
4445        {        {
4446        int i = 0;        int i = 0;
# Line 4145  while (!done) Line 4453  while (!done)
4453    
4454        if (c != '\\')        if (c != '\\')
4455          {          {
4456          if (use_utf)  #ifndef NOUTF
4457            {          if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4458            *q++ = c;  #endif
           continue;  
           }  
4459          }          }
4460    
4461        /* Handle backslash escapes */        /* Handle backslash escapes */
# Line 4210  while (!done) Line 4516  while (!done)
4516            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4517            p++;            p++;
4518            }            }
4519          if (use_utf)  #if !defined NOUTF && defined SUPPORT_PCRE8
4520            if (use_utf && (pcre_mode == PCRE8_MODE))
4521            {            {
4522            *q++ = c;            *q8++ = c;
4523            continue;            continue;
4524            }            }
4525    #endif
4526          break;          break;
4527    
4528          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 4420  while (!done) Line 4728  while (!done)
4728          continue;          continue;
4729          }          }
4730    
4731        /* We now have a character value in c that may be greater than 255. In        /* We now have a character value in c that may be greater than 255.
4732        16-bit or 32-bit mode, we always convert characters to UTF-8 so that        In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4733        values greater than 255 can be passed to non-UTF 16- or 32-bit strings.        than 127 in UTF mode must have come from \x{...} or octal constructs
       In 8-bit       mode we convert to UTF-8 if we are in UTF mode. Values greater  
       than 127       in UTF mode must have come from \x{...} or octal constructs  
4734        because values from \x.. get this far only in non-UTF mode. */        because values from \x.. get this far only in non-UTF mode. */
4735    
4736  #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32  #ifdef SUPPORT_PCRE8
4737        if (pcre_mode != PCRE8_MODE || use_utf)        if (pcre_mode == PCRE8_MODE)
4738          {          {
4739          pcre_uint8 buff8[8];  #ifndef NOUTF
4740          int ii, utn;          if (use_utf)
4741          utn = ord2utf8(c, buff8);            {
4742          for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];            if (c > 0x7fffffff)
4743                {
4744                fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
4745                  "and so cannot be converted to UTF-8\n", c);
4746                goto NEXT_DATA;
4747                }
4748              q8 += ord2utf8(c, q8);
4749              }
4750            else
4751    #endif
4752              {
4753              if (c > 0xffu)
4754                {
4755                fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4756                  "and UTF-8 mode is not enabled.\n", c);
4757                fprintf(outfile, "** Truncation will probably give the wrong "
4758                  "result.\n");
4759                }
4760              *q8++ = c;
4761              }
4762          }          }
       else  
4763  #endif  #endif
4764    #ifdef SUPPORT_PCRE16
4765          if (pcre_mode == PCRE16_MODE)
4766          {          {
4767          if (c > 255)  #ifndef NOUTF
4768            if (use_utf)
4769            {            {
4770            fprintf(outfile, "** Character \\x{%x} is greater than 255 "            if (c > 0x10ffffu)
4771              "and UTF-8 mode is not enabled.\n", c);              {
4772            fprintf(outfile, "** Truncation will probably give the wrong "              fprintf(outfile, "** Failed: character \\x{%x} is greater than "
4773              "result.\n");                "0x10ffff and so cannot be converted to UTF-16\n", c);
4774                goto NEXT_DATA;
4775                }
4776              else if (c >= 0x10000u)
4777                {
4778                c-= 0x10000u;
4779                *q16++ = 0xD800 | (c >> 10);
4780                *q16++ = 0xDC00 | (c & 0x3ff);
4781                }
4782              else
4783                *q16++ = c;
4784              }
4785            else
4786    #endif
4787              {
4788              if (c > 0xffffu)
4789                {
4790                fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
4791                  "and UTF-16 mode is not enabled.\n", c);
4792                fprintf(outfile, "** Truncation will probably give the wrong "
4793                  "result.\n");
4794                }
4795    
4796              *q16++ = c;
4797            }            }
         *q++ = c;  
4798          }          }
4799    #endif
4800    #ifdef SUPPORT_PCRE32
4801          if (pcre_mode == PCRE32_MODE)
4802            {
4803            *q32++ = c;
4804            }
4805    #endif
4806    
4807        }        }
4808    
4809      /* Reached end of subject string */      /* Reached end of subject string */
4810    
4811      *q = 0;  #ifdef SUPPORT_PCRE8
4812      len = (int)(q - dbuffer);      if (pcre_mode == PCRE8_MODE)
4813        {
4814          *q8 = 0;
4815          len = (int)(q8 - (pcre_uint8 *)dbuffer);
4816        }
4817    #endif
4818    #ifdef SUPPORT_PCRE16
4819        if (pcre_mode == PCRE16_MODE)
4820        {
4821          *q16 = 0;
4822          len = (int)(q16 - (pcre_uint16 *)dbuffer);
4823        }
4824    #endif
4825    #ifdef SUPPORT_PCRE32
4826        if (pcre_mode == PCRE32_MODE)
4827        {
4828          *q32 = 0;
4829          len = (int)(q32 - (pcre_uint32 *)dbuffer);
4830        }
4831    #endif
4832    
4833        /* If we're compiling with explicit valgrind support, Mark the data from after
4834        its end to the end of the buffer as unaddressable, so that a read over the end
4835        of the buffer will be seen by valgrind, even if it doesn't cause a crash.
4836        If we're not building with valgrind support, at least move the data to the end
4837        of the buffer so that it might at least cause a crash.
4838        If we are using the POSIX interface, we must include the terminating zero. */
4839    
4840      /* Move the data to the end of the buffer so that a read over the end of      bptr = dbuffer;
     the buffer will be seen by valgrind, even if it doesn't cause a crash. If  
     we are using the POSIX interface, we must include the terminating zero. */  
4841    
4842  #if !defined NOPOSIX  #if !defined NOPOSIX
4843      if (posix || do_posix)      if (posix || do_posix)
4844        {        {
4845        memmove(bptr + buffer_size - len - 1, bptr, len + 1);  #ifdef SUPPORT_VALGRIND
4846        bptr += buffer_size - len - 1;        VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + 1, dbuffer_size - (len + 1));
4847    #else
4848          memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
4849          bptr += dbuffer_size - len - 1;
4850    #endif
4851        }        }
4852      else      else
4853  #endif  #endif
4854        {        {
4855        memmove(bptr + buffer_size - len, bptr, len);  #ifdef SUPPORT_VALGRIND
4856        bptr += buffer_size - len;        VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len * CHAR_SIZE, (dbuffer_size - len) * CHAR_SIZE);
4857    #else
4858          bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
4859    #endif
4860        }        }
4861    
4862      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
# Line 4532  while (!done) Line 4920  while (!done)
4920    
4921      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
4922    
 #ifdef SUPPORT_PCRE16  
     if (pcre_mode == PCRE16_MODE)  
       {  
       len = to16(TRUE, bptr, REAL_PCRE_OPTIONS(re) & PCRE_UTF8, len);  
       switch(len)  
         {  
         case -1:  
         fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "  
           "converted to UTF-16\n");  
         goto NEXT_DATA;  
   
         case -2:  
         fprintf(outfile, "**Failed: character value greater than 0x10ffff "  
           "cannot be converted to UTF-16\n");  
         goto NEXT_DATA;  
   
         case -3:  
         fprintf(outfile, "**Failed: character value greater than 0xffff "  
           "cannot be converted to 16-bit in non-UTF mode\n");  
         goto NEXT_DATA;  
   
         default:  
         break;  
         }  
       bptr = (pcre_uint8 *)buffer16;  
       }  
 #endif  
   
 #ifdef SUPPORT_PCRE32  
     if (pcre_mode == PCRE32_MODE)  
       {  
       len = to32(TRUE, bptr, REAL_PCRE_OPTIONS(re) & PCRE_UTF32, len);  
       switch(len)  
         {  
         case -1:  
         fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "  
           "converted to UTF-32\n");  
         goto NEXT_DATA;  
   
         case -2:  
         fprintf(outfile, "**Failed: character value greater than 0x10ffff "  
           "cannot be converted to UTF-32\n");  
         goto NEXT_DATA;  
   
         case -3:  
         fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");  
         goto NEXT_DATA;  
   
         default:  
         break;  
         }  
       bptr = (pcre_uint8 *)buffer32;  
       }  
 #endif  
   
4923      /* Ensure that there is a JIT callback if we want to verify that JIT was      /* Ensure that there is a JIT callback if we want to verify that JIT was
4924      actually used. If jit_stack == NULL, no stack has yet been assigned. */      actually used. If jit_stack == NULL, no stack has yet been assigned. */
4925    
# Line 4703  while (!done) Line 5036  while (!done)
5036          if (count == 0)          if (count == 0)
5037            {            {
5038            fprintf(outfile, "Matched, but too many substrings\n");            fprintf(outfile, "Matched, but too many substrings\n");
5039            count = use_size_offsets/3;            /* 2 is a special case; match can be returned */
5040              count = (use_size_offsets == 2)? 1 : use_size_offsets/3;
5041            }            }
5042          }          }
5043    
# Line 4717  while (!done) Line 5051  while (!done)
5051  #if !defined NODFA  #if !defined NODFA
5052          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
5053  #endif  #endif
5054            maxcount = use_size_offsets/3;            /* 2 is a special case; match can be returned */
5055              maxcount = (use_size_offsets == 2)? 1 : use_size_offsets/3;
5056    
5057          /* This is a check against a lunatic return value. */          /* This is a check against a lunatic return value. */
5058    
# Line 4808  while (!done) Line 5143  while (!done)
5143            int rc;            int rc;
5144            char copybuffer[256];            char copybuffer[256];
5145    
5146    #ifdef SUPPORT_PCRE32
5147              if (pcre_mode == PCRE32_MODE)
5148                {
5149                if (*(pcre_uint32 *)cnptr == 0) break;
5150                }
5151    #endif
5152    #ifdef SUPPORT_PCRE16
5153            if (pcre_mode == PCRE16_MODE)            if (pcre_mode == PCRE16_MODE)
5154              {              {
5155              if (*(pcre_uint16 *)cnptr == 0) break;              if (*(pcre_uint16 *)cnptr == 0) break;
5156              }              }
5157            else  #endif
5158    #ifdef SUPPORT_PCRE8
5159              if (pcre_mode == PCRE8_MODE)
5160              {              {
5161              if (*(pcre_uint8 *)cnptr == 0) break;              if (*(pcre_uint8 *)cnptr == 0) break;
5162              }              }
5163    #endif
5164    
5165            PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,            PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5166              cnptr, copybuffer, sizeof(copybuffer));              cnptr, copybuffer, sizeof(copybuffer));
# Line 4863  while (!done) Line 5208  while (!done)
5208            int rc;            int rc;
5209            const char *substring;            const char *substring;
5210    
5211    #ifdef SUPPORT_PCRE32
5212              if (pcre_mode == PCRE32_MODE)
5213                {
5214                if (*(pcre_uint32 *)gnptr == 0) break;
5215                }
5216    #endif
5217    #ifdef SUPPORT_PCRE16
5218            if (pcre_mode == PCRE16_MODE)            if (pcre_mode == PCRE16_MODE)
5219              {              {
5220              if (*(pcre_uint16 *)gnptr == 0) break;              if (*(pcre_uint16 *)gnptr == 0) break;
5221              }              }
5222            else  #endif
5223    #ifdef SUPPORT_PCRE8
5224              if (pcre_mode == PCRE8_MODE)
5225              {              {
5226              if (*(pcre_uint8 *)gnptr == 0) break;              if (*(pcre_uint8 *)gnptr == 0) break;
5227              }              }
5228    #endif
5229    
5230            PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,            PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5231              gnptr, &substring);              gnptr, &substring);
# Line 4915  while (!done) Line 5270  while (!done)
5270            }            }
5271          }          }
5272    
5273        /* There was a partial match */        /* There was a partial match. If the bumpalong point is not the same as
5274          the first inspected character, show the offset explicitly. */
5275    
5276        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
5277          {          {
5278          if (markptr == NULL) fprintf(outfile, "Partial match");          fprintf(outfile, "Partial match");
5279          else          if (use_size_offsets > 2 && use_offsets[0] != use_offsets[2])
5280              fprintf(outfile, " at offset %d", use_offsets[2]);
5281            if (markptr != NULL)
5282            {            {
5283            fprintf(outfile, "Partial match, mark=");            fprintf(outfile, ", mark=");
5284            PCHARSV(markptr, 0, -1, outfile);            PCHARSV(markptr, 0, -1, outfile);
5285            }            }
5286          if (use_size_offsets > 1)          if (use_size_offsets > 1)
# Line 5136  if (dfa_workspace != NULL) Line 5494  if (dfa_workspace != NULL)
5494    free(dfa_workspace);    free(dfa_workspace);
5495  #endif  #endif
5496    
5497    #if defined(__VMS)
5498      yield = SS$_NORMAL;  /* Return values via DCL symbols */
5499    #endif
5500    
5501  return yield;  return yield;
5502  }  }
5503    

Legend:
Removed from v.1087  
changed lines
  Added in v.1309

  ViewVC Help
Powered by ViewVC 1.1.5