/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 895 by ph10, Fri Jan 20 12:12:03 2012 UTC revision 1117 by chpe, Tue Oct 16 15:57:27 2012 UTC
# Line 46  pcre_internal.h that depend on COMPILE_P Line 46  pcre_internal.h that depend on COMPILE_P
46  however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls  however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47  only supported library functions. */  only supported library functions. */
48    
   
49  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
50  #include "config.h"  #include "config.h"
51  #endif  #endif
# Line 59  only supported library functions. */ Line 58  only supported library functions. */
58  #include <locale.h>  #include <locale.h>
59  #include <errno.h>  #include <errno.h>
60    
61  #ifdef SUPPORT_LIBREADLINE  /* Both libreadline and libedit are optionally supported. The user-supplied
62    original patch uses readline/readline.h for libedit, but in at least one system
63    it is installed as editline/readline.h, so the configuration code now looks for
64    that first, falling back to readline/readline.h. */
65    
66    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67  #ifdef HAVE_UNISTD_H  #ifdef HAVE_UNISTD_H
68  #include <unistd.h>  #include <unistd.h>
69  #endif  #endif
70    #if defined(SUPPORT_LIBREADLINE)
71  #include <readline/readline.h>  #include <readline/readline.h>
72  #include <readline/history.h>  #include <readline/history.h>
73    #else
74    #if defined(HAVE_EDITLINE_READLINE_H)
75    #include <editline/readline.h>
76    #else
77    #include <readline/readline.h>
78    #endif
79    #endif
80  #endif  #endif
   
81    
82  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
83  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 101  input mode under Windows. */ Line 112  input mode under Windows. */
112  #else  #else
113  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
114  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
115    #if defined NATIVE_ZOS         /* z/OS uses non-binary I/O */
116    #define INPUT_MODE   "r"
117    #define OUTPUT_MODE  "w"
118    #else
119  #define INPUT_MODE   "rb"  #define INPUT_MODE   "rb"
120  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
121  #endif  #endif
122    #endif
123    
124    #define PRIV(name) name
125    
126  /* We have to include pcre_internal.h because we need the internal info for  /* We have to include pcre_internal.h because we need the internal info for
127  displaying the results of pcre_study() and we also need to know about the  displaying the results of pcre_study() and we also need to know about the
# Line 117  appropriately for an application, not fo Line 134  appropriately for an application, not fo
134    
135  #include "pcre.h"  #include "pcre.h"
136    
137  #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8  #if defined SUPPORT_PCRE32 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16
138    /* Configure internal macros to 32 bit mode. */
139    #define COMPILE_PCRE32
140    #endif
141    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE32
142  /* Configure internal macros to 16 bit mode. */  /* Configure internal macros to 16 bit mode. */
143  #define COMPILE_PCRE16  #define COMPILE_PCRE16
144  #endif  #endif
145    #if defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE32
146    /* Configure internal macros to 16 bit mode. */
147    #define COMPILE_PCRE8
148    #endif
149    
150  #include "pcre_internal.h"  #include "pcre_internal.h"
151    
152  /* The pcre_printint() function, which prints the internal form of a compiled  /* The pcre_printint() function, which prints the internal form of a compiled
153  regex, is held in a separate file so that (a) it can be compiled in either  regex, is held in a separate file so that (a) it can be compiled in either
154  8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c  8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
155  when that is compiled in debug mode. */  when that is compiled in debug mode. */
156    
157  #ifdef SUPPORT_PCRE8  #ifdef SUPPORT_PCRE8
# Line 135  void pcre_printint(pcre *external_re, FI Line 160  void pcre_printint(pcre *external_re, FI
160  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
161  void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);  void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
162  #endif  #endif
163    #ifdef SUPPORT_PCRE32
164    void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
165    #endif
166    
167  /* We need access to some of the data tables that PCRE uses. So as not to have  /* We need access to some of the data tables that PCRE uses. So as not to have
168  to keep two copies, we include the source file here, changing the names of the  to keep two copies, we include the source files here, changing the names of the
169  external symbols to prevent clashes. */  external symbols to prevent clashes. */
170    
171  #define PCRE_INCLUDED  #define PCRE_INCLUDED
 #undef PRIV  
 #define PRIV(name) name  
172    
173  #include "pcre_tables.c"  #include "pcre_tables.c"
174    #include "pcre_ucd.c"
175    
176  /* The definition of the macro PRINTABLE, which determines whether to print an  /* The definition of the macro PRINTABLE, which determines whether to print an
177  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
# Line 160  that differ in their output from isprint Line 187  that differ in their output from isprint
187    
188  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
189    
190  /* Posix support is disabled in 16 bit only mode. */  /* Posix support is disabled in 16 or 32 bit only mode. */
191  #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX  #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
192  #define NOPOSIX  #define NOPOSIX
193  #endif  #endif
194    
# Line 184  automatically cut out the UTF support if Line 211  automatically cut out the UTF support if
211  #endif  #endif
212  #endif  #endif
213    
214  /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros  /* To make the code a bit tidier for 8/16/32-bit support, we define macros
215  for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called  for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
216  only from one place and is handled differently). I couldn't dream up any way of  only from one place and is handled differently). I couldn't dream up any way of
217  using a single macro to do this in a generic way, because of the many different  using a single macro to do this in a generic way, because of the many different
# Line 206  argument, the casting might be incorrect Line 233  argument, the casting might be incorrect
233  #define PCHARSV8(p, offset, len, f) \  #define PCHARSV8(p, offset, len, f) \
234    (void)pchars((pcre_uint8 *)(p) + offset, len, f)    (void)pchars((pcre_uint8 *)(p) + offset, len, f)
235    
236  #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \  #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
237    p = read_capture_name8(p, cn8, re)    p = read_capture_name8(p, cn8, re)
238    
239  #define STRLEN8(p) ((int)strlen((char *)p))  #define STRLEN8(p) ((int)strlen((char *)p))
# Line 288  argument, the casting might be incorrect Line 315  argument, the casting might be incorrect
315  #define PCHARSV16(p, offset, len, f) \  #define PCHARSV16(p, offset, len, f) \
316    (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)    (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
317    
318  #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \  #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
319    p = read_capture_name16(p, cn16, re)    p = read_capture_name16(p, cn16, re)
320    
321  #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))  #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
# Line 367  argument, the casting might be incorrect Line 394  argument, the casting might be incorrect
394    
395  #endif /* SUPPORT_PCRE16 */  #endif /* SUPPORT_PCRE16 */
396    
397    /* -----------------------------------------------------------*/
398    
399    #ifdef SUPPORT_PCRE32
400    
401    #define PCHARS32(lv, p, offset, len, f) \
402      lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
403    
404    #define PCHARSV32(p, offset, len, f)                \
405      (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
406    
407    #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408      p = read_capture_name32(p, cn32, re)
409    
410    #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
411    
412    #define SET_PCRE_CALLOUT32(callout) \
413      pcre32_callout = (int (*)(pcre32_callout_block *))callout
414    
415    #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
416      pcre32_assign_jit_stack((pcre32_extra *)extra, \
417        (pcre32_jit_callback)callback, userdata)
418    
419    #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
420      re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
421        tables)
422    
423    #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
424        namesptr, cbuffer, size) \
425      rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
426        count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
427    
428    #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
429      rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
430        (PCRE_UCHAR32 *)cbuffer, size/2)
431    
432    #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
433        offsets, size_offsets, workspace, size_workspace) \
434      count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
435        (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
436        workspace, size_workspace)
437    
438    #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
439        offsets, size_offsets) \
440      count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
441        len, start_offset, options, offsets, size_offsets)
442    
443    #define PCRE_FREE_STUDY32(extra) \
444      pcre32_free_study((pcre32_extra *)extra)
445    
446    #define PCRE_FREE_SUBSTRING32(substring) \
447      pcre32_free_substring((PCRE_SPTR32)substring)
448    
449    #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
450      pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
451    
452    #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
453        getnamesptr, subsptr) \
454      rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
455        count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
456    
457    #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
458      n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
459    
460    #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
461      rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
462        (PCRE_SPTR32 *)(void*)subsptr)
463    
464    #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
465      rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
466        (PCRE_SPTR32 **)(void*)listptr)
467    
468    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
469      rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
470        tables)
471    
472    #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
473      pcre32_printint(re, outfile, debug_lengths)
474    
475    #define PCRE_STUDY32(extra, re, options, error) \
476      extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
477    
478    #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
479      (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
480    
481    #define PCRE_JIT_STACK_FREE32(stack) \
482      pcre32_jit_stack_free((pcre32_jit_stack *)stack)
483    
484    #endif /* SUPPORT_PCRE32 */
485    
486    
487  /* ----- Both modes are supported; a runtime test is needed, except for  /* ----- Both modes are supported; a runtime test is needed, except for
488  pcre_config(), and the JIT stack functions, when it doesn't matter which  pcre_config(), and the JIT stack functions, when it doesn't matter which
489  version is called. ----- */  version is called. ----- */
490    
491  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16  enum {
492      PCRE8_MODE,
493      PCRE16_MODE,
494      PCRE32_MODE
495    };
496    
497    #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + defined (SUPPORT_PCRE32)) >= 2
498    
499  #define CHAR_SIZE (use_pcre16? 2:1)  #define CHAR_SIZE (1 << pcre_mode)
500    
501  #define PCHARS(lv, p, offset, len, f) \  #define PCHARS(lv, p, offset, len, f) \
502    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
503        PCHARS32(lv, p, offset, len, f); \
504      else if (pcre_mode == PCRE16_MODE) \
505      PCHARS16(lv, p, offset, len, f); \      PCHARS16(lv, p, offset, len, f); \
506    else \    else \
507      PCHARS8(lv, p, offset, len, f)      PCHARS8(lv, p, offset, len, f)
508    
509  #define PCHARSV(p, offset, len, f) \  #define PCHARSV(p, offset, len, f) \
510    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
511        PCHARSV32(p, offset, len, f); \
512      else if (pcre_mode == PCRE16_MODE) \
513      PCHARSV16(p, offset, len, f); \      PCHARSV16(p, offset, len, f); \
514    else \    else \
515      PCHARSV8(p, offset, len, f)      PCHARSV8(p, offset, len, f)
516    
517  #define READ_CAPTURE_NAME(p, cn8, cn16, re) \  #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
518    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
519      READ_CAPTURE_NAME16(p, cn8, cn16, re); \      READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
520      else if (pcre_mode == PCRE16_MODE) \
521        READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
522    else \    else \
523      READ_CAPTURE_NAME8(p, cn8, cn16, re)      READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
524    
525  #define SET_PCRE_CALLOUT(callout) \  #define SET_PCRE_CALLOUT(callout) \
526    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
527        SET_PCRE_CALLOUT32(callout); \
528      else if (pcre_mode == PCRE16_MODE) \
529      SET_PCRE_CALLOUT16(callout); \      SET_PCRE_CALLOUT16(callout); \
530    else \    else \
531      SET_PCRE_CALLOUT8(callout)      SET_PCRE_CALLOUT8(callout)
532    
533  #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))  #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
534    
535  #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \  #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
536    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
537        PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
538      else if (pcre_mode == PCRE16_MODE) \
539      PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \      PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
540    else \    else \
541      PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)      PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
542    
543  #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \  #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
544    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
545        PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
546      else if (pcre_mode == PCRE16_MODE) \
547      PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \      PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
548    else \    else \
549      PCRE_COMPILE8(re, pat, options, error, erroffset, tables)      PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
# Line 418  version is called. ----- */ Line 552  version is called. ----- */
552    
553  #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \  #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
554      namesptr, cbuffer, size) \      namesptr, cbuffer, size) \
555    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
556        PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
557          namesptr, cbuffer, size); \
558      else if (pcre_mode == PCRE16_MODE) \
559      PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \      PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
560        namesptr, cbuffer, size); \        namesptr, cbuffer, size); \
561    else \    else \
# Line 426  version is called. ----- */ Line 563  version is called. ----- */
563        namesptr, cbuffer, size)        namesptr, cbuffer, size)
564    
565  #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \  #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
566    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
567        PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
568      else if (pcre_mode == PCRE16_MODE) \
569      PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \      PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
570    else \    else \
571      PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)      PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
572    
573  #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \  #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
574      offsets, size_offsets, workspace, size_workspace) \      offsets, size_offsets, workspace, size_workspace) \
575    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
576        PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
577          offsets, size_offsets, workspace, size_workspace); \
578      else if (pcre_mode == PCRE16_MODE) \
579      PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \      PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
580        offsets, size_offsets, workspace, size_workspace); \        offsets, size_offsets, workspace, size_workspace); \
581    else \    else \
# Line 442  version is called. ----- */ Line 584  version is called. ----- */
584    
585  #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \  #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
586      offsets, size_offsets) \      offsets, size_offsets) \
587    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
588        PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
589          offsets, size_offsets); \
590      else if (pcre_mode == PCRE16_MODE) \
591      PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \      PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
592        offsets, size_offsets); \        offsets, size_offsets); \
593    else \    else \
# Line 450  version is called. ----- */ Line 595  version is called. ----- */
595        offsets, size_offsets)        offsets, size_offsets)
596    
597  #define PCRE_FREE_STUDY(extra) \  #define PCRE_FREE_STUDY(extra) \
598    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
599        PCRE_FREE_STUDY32(extra); \
600      else if (pcre_mode == PCRE16_MODE) \
601      PCRE_FREE_STUDY16(extra); \      PCRE_FREE_STUDY16(extra); \
602    else \    else \
603      PCRE_FREE_STUDY8(extra)      PCRE_FREE_STUDY8(extra)
604    
605  #define PCRE_FREE_SUBSTRING(substring) \  #define PCRE_FREE_SUBSTRING(substring) \
606    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
607        PCRE_FREE_SUBSTRING32(substring); \
608      else if (pcre_mode == PCRE16_MODE) \
609      PCRE_FREE_SUBSTRING16(substring); \      PCRE_FREE_SUBSTRING16(substring); \
610    else \    else \
611      PCRE_FREE_SUBSTRING8(substring)      PCRE_FREE_SUBSTRING8(substring)
612    
613  #define PCRE_FREE_SUBSTRING_LIST(listptr) \  #define PCRE_FREE_SUBSTRING_LIST(listptr) \
614    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
615        PCRE_FREE_SUBSTRING_LIST32(listptr); \
616      else if (pcre_mode == PCRE16_MODE) \
617      PCRE_FREE_SUBSTRING_LIST16(listptr); \      PCRE_FREE_SUBSTRING_LIST16(listptr); \
618    else \    else \
619      PCRE_FREE_SUBSTRING_LIST8(listptr)      PCRE_FREE_SUBSTRING_LIST8(listptr)
620    
621  #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \  #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
622      getnamesptr, subsptr) \      getnamesptr, subsptr) \
623    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
624        PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
625          getnamesptr, subsptr); \
626      else if (pcre_mode == PCRE16_MODE) \
627      PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \      PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
628        getnamesptr, subsptr); \        getnamesptr, subsptr); \
629    else \    else \
# Line 477  version is called. ----- */ Line 631  version is called. ----- */
631        getnamesptr, subsptr)        getnamesptr, subsptr)
632    
633  #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \  #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
634    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
635        PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
636      else if (pcre_mode == PCRE16_MODE) \
637      PCRE_GET_STRINGNUMBER16(n, rc, ptr); \      PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
638    else \    else \
639      PCRE_GET_STRINGNUMBER8(n, rc, ptr)      PCRE_GET_STRINGNUMBER8(n, rc, ptr)
640    
641  #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \  #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
642    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
643        PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
644      else if (pcre_mode == PCRE16_MODE) \
645      PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \      PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
646    else \    else \
647      PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)      PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
648    
649  #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \  #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
650    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
651        PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
652      else if (pcre_mode == PCRE16_MODE) \
653      PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \      PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
654    else \    else \
655      PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)      PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
656    
657  #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \  #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
658    (use_pcre16 ? \    (pcre_mode == PCRE32_MODE ? \
659       PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \       PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
660      :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))      : pcre_mode == PCRE16_MODE ? \
661          PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
662          : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
663    
664  #define PCRE_JIT_STACK_FREE(stack) \  #define PCRE_JIT_STACK_FREE(stack) \
665    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
666        PCRE_JIT_STACK_FREE32(stack); \
667      else if (pcre_mode == PCRE16_MODE) \
668      PCRE_JIT_STACK_FREE16(stack); \      PCRE_JIT_STACK_FREE16(stack); \
669    else \    else \
670      PCRE_JIT_STACK_FREE8(stack)      PCRE_JIT_STACK_FREE8(stack)
671    
672  #define PCRE_MAKETABLES \  #define PCRE_MAKETABLES \
673    (use_pcre16? pcre16_maketables() : pcre_maketables())    (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
674    
675  #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \  #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
676    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
677        PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
678      else if (pcre_mode == PCRE16_MODE) \
679      PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \      PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
680    else \    else \
681      PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)      PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
682    
683  #define PCRE_PRINTINT(re, outfile, debug_lengths) \  #define PCRE_PRINTINT(re, outfile, debug_lengths) \
684    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
685        PCRE_PRINTINT32(re, outfile, debug_lengths); \
686      else if (pcre_mode == PCRE16_MODE) \
687      PCRE_PRINTINT16(re, outfile, debug_lengths); \      PCRE_PRINTINT16(re, outfile, debug_lengths); \
688    else \    else \
689      PCRE_PRINTINT8(re, outfile, debug_lengths)      PCRE_PRINTINT8(re, outfile, debug_lengths)
690    
691  #define PCRE_STUDY(extra, re, options, error) \  #define PCRE_STUDY(extra, re, options, error) \
692    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
693        PCRE_STUDY32(extra, re, options, error); \
694      else if (pcre_mode == PCRE16_MODE) \
695      PCRE_STUDY16(extra, re, options, error); \      PCRE_STUDY16(extra, re, options, error); \
696    else \    else \
697      PCRE_STUDY8(extra, re, options, error)      PCRE_STUDY8(extra, re, options, error)
# Line 558  version is called. ----- */ Line 728  version is called. ----- */
728    
729  /* ----- Only 16-bit mode is supported ----- */  /* ----- Only 16-bit mode is supported ----- */
730    
731  #else  #elif defined SUPPORT_PCRE16
732  #define CHAR_SIZE                 2  #define CHAR_SIZE                 2
733  #define PCHARS                    PCHARS16  #define PCHARS                    PCHARS16
734  #define PCHARSV                   PCHARSV16  #define PCHARSV                   PCHARSV16
# Line 585  version is called. ----- */ Line 755  version is called. ----- */
755  #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16  #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
756  #define PCRE_PRINTINT             PCRE_PRINTINT16  #define PCRE_PRINTINT             PCRE_PRINTINT16
757  #define PCRE_STUDY                PCRE_STUDY16  #define PCRE_STUDY                PCRE_STUDY16
758    
759    /* ----- Only 32-bit mode is supported ----- */
760    
761    #elif defined SUPPORT_PCRE32
762    #define CHAR_SIZE                 4
763    #define PCHARS                    PCHARS32
764    #define PCHARSV                   PCHARSV32
765    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME32
766    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT32
767    #define STRLEN                    STRLEN32
768    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK32
769    #define PCRE_COMPILE              PCRE_COMPILE32
770    #define PCRE_CONFIG               pcre32_config
771    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
772    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING32
773    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC32
774    #define PCRE_EXEC                 PCRE_EXEC32
775    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY32
776    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING32
777    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST32
778    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING32
779    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER32
780    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING32
781    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST32
782    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC32
783    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE32
784    #define PCRE_MAKETABLES           pcre32_maketables()
785    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
786    #define PCRE_PRINTINT             PCRE_PRINTINT32
787    #define PCRE_STUDY                PCRE_STUDY32
788    
789  #endif  #endif
790    
791  /* ----- End of mode-specific function call macros ----- */  /* ----- End of mode-specific function call macros ----- */
# Line 600  version is called. ----- */ Line 801  version is called. ----- */
801  #endif  #endif
802  #endif  #endif
803    
804    #if !defined NODFA
805    #define DFA_WS_DIMENSION 1000
806    #endif
807    
808  /* This is the default loop count for timing. */  /* This is the default loop count for timing. */
809    
810  #define LOOPREPEAT 500000  #define LOOPREPEAT 500000
# Line 614  static int callout_fail_count; Line 819  static int callout_fail_count;
819  static int callout_fail_id;  static int callout_fail_id;
820  static int debug_lengths;  static int debug_lengths;
821  static int first_callout;  static int first_callout;
822    static int jit_was_used;
823  static int locale_set = 0;  static int locale_set = 0;
824  static int show_malloc;  static int show_malloc;
825  static int use_utf;  static int use_utf;
# Line 625  static const unsigned char *last_callout Line 831  static const unsigned char *last_callout
831    
832  static int buffer_size = 50000;  static int buffer_size = 50000;
833  static pcre_uint8 *buffer = NULL;  static pcre_uint8 *buffer = NULL;
 static pcre_uint8 *dbuffer = NULL;  
834  static pcre_uint8 *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
835    
836  /* Another buffer is needed translation to 16-bit character strings. It will  /* Another buffer is needed translation to 16/32-bit character strings. It will
837  obtained and extended as required. */  obtained and extended as required. */
838    
839  #ifdef SUPPORT_PCRE16  #if defined SUPPORT_PCRE8 && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)
 static int buffer16_size = 0;  
 static pcre_uint16 *buffer16 = NULL;  
   
 #ifdef SUPPORT_PCRE8  
840    
841  /* We need the table of operator lengths that is used for 16-bit compiling, in  /* We need the table of operator lengths that is used for 16/32-bit compiling, in
842  order to swap bytes in a pattern for saving/reloading testing. Luckily, the  order to swap bytes in a pattern for saving/reloading testing. Luckily, the
843  data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted  data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
844  appropriately for the 16-bit world. Just as a safety check, make sure that  appropriately for the 16/32-bit world. Just as a safety check, make sure that
845  COMPILE_PCRE16 is *not* set. */  COMPILE_PCRE[16|32] is *not* set. */
846    
847  #ifdef COMPILE_PCRE16  #ifdef COMPILE_PCRE16
848  #error COMPILE_PCRE16 must not be set when compiling pcretest.c  #error COMPILE_PCRE16 must not be set when compiling pcretest.c
849  #endif  #endif
850    
851    #ifdef COMPILE_PCRE32
852    #error COMPILE_PCRE32 must not be set when compiling pcretest.c
853    #endif
854    
855  #if LINK_SIZE == 2  #if LINK_SIZE == 2
856  #undef LINK_SIZE  #undef LINK_SIZE
857  #define LINK_SIZE 1  #define LINK_SIZE 1
# Line 660  COMPILE_PCRE16 is *not* set. */ Line 865  COMPILE_PCRE16 is *not* set. */
865  #undef IMM2_SIZE  #undef IMM2_SIZE
866  #define IMM2_SIZE 1  #define IMM2_SIZE 1
867    
868  #endif /* SUPPORT_PCRE8 */  #endif /* SUPPORT_PCRE8 && (SUPPORT_PCRE16 || SUPPORT_PCRE32) */
869    
870    #ifdef SUPPORT_PCRE16
871    static int buffer16_size = 0;
872    static pcre_uint16 *buffer16 = NULL;
873  static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };  static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
874  #endif  /* SUPPORT_PCRE16 */  #endif  /* SUPPORT_PCRE16 */
875    
876  /* If we have 8-bit support, default use_pcre16 to false; if there is also  #ifdef SUPPORT_PCRE32
877  16-bit support, it can be changed by an option. If there is no 8-bit support,  static int buffer32_size = 0;
878  there must be 16-bit support, so default it to 1. */  static pcre_uint32 *buffer32 = NULL;
879    static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
880    #endif  /* SUPPORT_PCRE32 */
881    
882    /* If we have 8-bit support, default to it; if there is also
883    16-or 32-bit support, it can be changed by an option. If there is no 8-bit support,
884    there must be 16-or 32-bit support, so default it to 1. */
885    
886  #ifdef SUPPORT_PCRE8  #if defined SUPPORT_PCRE8
887  static int use_pcre16 = 0;  static int pcre_mode = PCRE8_MODE;
888  #else  #elif defined SUPPORT_PCRE16
889  static int use_pcre16 = 1;  static int pcre_mode = PCRE16_MODE;
890    #elif defined SUPPORT_PCRE32
891    static int pcre_mode = PCRE32_MODE;
892  #endif  #endif
893    
894    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
895    
896    static int jit_study_bits[] =
897      {
898      PCRE_STUDY_JIT_COMPILE,
899      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
900      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
901      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
902      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
903      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
904      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
905        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
906    };
907    
908    #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
909      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
910    
911  /* Textual explanations for runtime error codes */  /* Textual explanations for runtime error codes */
912    
913  static const char *errtexts[] = {  static const char *errtexts[] = {
# Line 706  static const char *errtexts[] = { Line 939  static const char *errtexts[] = {
939    NULL,  /* SHORTUTF8/16 is handled specially */    NULL,  /* SHORTUTF8/16 is handled specially */
940    "nested recursion at the same subject position",    "nested recursion at the same subject position",
941    "JIT stack limit reached",    "JIT stack limit reached",
942    "pattern compiled in wrong mode: 8-bit/16-bit error"    "pattern compiled in wrong mode: 8-bit/16-bit error",
943      "pattern compiled with other endianness",
944      "invalid data in workspace for DFA restart"
945  };  };
946    
947    
# Line 1058  return sys_errlist[n]; Line 1293  return sys_errlist[n];
1293  #endif /* HAVE_STRERROR */  #endif /* HAVE_STRERROR */
1294    
1295    
1296    
1297    /*************************************************
1298    *       Print newline configuration              *
1299    *************************************************/
1300    
1301    /*
1302    Arguments:
1303      rc         the return code from PCRE_CONFIG_NEWLINE
1304      isc        TRUE if called from "-C newline"
1305    Returns:     nothing
1306    */
1307    
1308    static void
1309    print_newline_config(int rc, BOOL isc)
1310    {
1311    const char *s = NULL;
1312    if (!isc) printf("  Newline sequence is ");
1313    switch(rc)
1314      {
1315      case CHAR_CR: s = "CR"; break;
1316      case CHAR_LF: s = "LF"; break;
1317      case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1318      case -1: s = "ANY"; break;
1319      case -2: s = "ANYCRLF"; break;
1320    
1321      default:
1322      printf("a non-standard value: 0x%04x\n", rc);
1323      return;
1324      }
1325    
1326    printf("%s\n", s);
1327    }
1328    
1329    
1330    
1331  /*************************************************  /*************************************************
1332  *         JIT memory callback                    *  *         JIT memory callback                    *
1333  *************************************************/  *************************************************/
1334    
1335  static pcre_jit_stack* jit_callback(void *arg)  static pcre_jit_stack* jit_callback(void *arg)
1336  {  {
1337    jit_was_used = TRUE;
1338  return (pcre_jit_stack *)arg;  return (pcre_jit_stack *)arg;
1339  }  }
1340    
1341    
1342  #if !defined NOUTF || defined SUPPORT_PCRE16  #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1343  /*************************************************  /*************************************************
1344  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
1345  *************************************************/  *************************************************/
# Line 1085  Returns:      >  0 => the number of byte Line 1356  Returns:      >  0 => the number of byte
1356  */  */
1357    
1358  static int  static int
1359  utf82ord(pcre_uint8 *utf8bytes, int *vptr)  utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1360  {  {
1361  int c = *utf8bytes++;  pcre_uint32 c = *utf8bytes++;
1362  int d = c;  pcre_uint32 d = c;
1363  int i, j, s;  int i, j, s;
1364    
1365  for (i = -1; i < 6; i++)               /* i is number of additional bytes */  for (i = -1; i < 6; i++)               /* i is number of additional bytes */
# Line 1128  return i+1; Line 1399  return i+1;
1399    
1400    
1401    
1402  #if !defined NOUTF || defined SUPPORT_PCRE16  #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1403  /*************************************************  /*************************************************
1404  *       Convert character value to UTF-8         *  *       Convert character value to UTF-8         *
1405  *************************************************/  *************************************************/
# Line 1144  Returns:     number of characters placed Line 1415  Returns:     number of characters placed
1415  */  */
1416    
1417  static int  static int
1418  ord2utf8(int cvalue, pcre_uint8 *utf8bytes)  ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1419  {  {
1420  register int i, j;  register int i, j;
1421    if (cvalue > 0x7fffffffu)
1422      return -1;
1423  for (i = 0; i < utf8_table1_size; i++)  for (i = 0; i < utf8_table1_size; i++)
1424    if (cvalue <= utf8_table1[i]) break;    if (cvalue <= utf8_table1[i]) break;
1425  utf8bytes += i;  utf8bytes += i;
# Line 1217  if (!utf && !data) Line 1490  if (!utf && !data)
1490    
1491  else  else
1492    {    {
1493    int c = 0;    pcre_uint32 c = 0;
1494    while (len > 0)    while (len > 0)
1495      {      {
1496      int chlen = utf82ord(p, &c);      int chlen = utf82ord(p, &c);
# Line 1240  return pp - buffer16; Line 1513  return pp - buffer16;
1513  }  }
1514  #endif  #endif
1515    
1516    #ifdef SUPPORT_PCRE32
1517    /*************************************************
1518    *         Convert a string to 32-bit             *
1519    *************************************************/
1520    
1521    /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1522    8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1523    times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1524    in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1525    result is always left in buffer32.
1526    
1527    Note that this function does not object to surrogate values. This is
1528    deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1529    for the purpose of testing that they are correctly faulted.
1530    
1531    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1532    in UTF-8 so that values greater than 255 can be handled.
1533    
1534    Arguments:
1535      data       TRUE if converting a data line; FALSE for a regex
1536      p          points to a byte string
1537      utf        true if UTF-8 (to be converted to UTF-32)
1538      len        number of bytes in the string (excluding trailing zero)
1539    
1540    Returns:     number of 32-bit data items used (excluding trailing zero)
1541                 OR -1 if a UTF-8 string is malformed
1542                 OR -2 if a value > 0x10ffff is encountered
1543                 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1544    */
1545    
1546    static int
1547    to32(int data, pcre_uint8 *p, int utf, int len)
1548    {
1549    pcre_uint32 *pp;
1550    
1551    if (buffer32_size < 4*len + 4)
1552      {
1553      if (buffer32 != NULL) free(buffer32);
1554      buffer32_size = 4*len + 4;
1555      buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1556      if (buffer32 == NULL)
1557        {
1558        fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1559        exit(1);
1560        }
1561      }
1562    
1563    pp = buffer32;
1564    
1565    if (!utf && !data)
1566      {
1567      while (len-- > 0) *pp++ = *p++;
1568      }
1569    
1570    else
1571      {
1572      pcre_uint32 c = 0;
1573      while (len > 0)
1574        {
1575        int chlen = utf82ord(p, &c);
1576        if (chlen <= 0) return -1;
1577        if (utf)
1578          {
1579          if (c > 0x10ffff) return -2;
1580          if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1581          }
1582    
1583        p += chlen;
1584        len -= chlen;
1585        *pp++ = c;
1586        }
1587      }
1588    
1589    *pp = 0;
1590    return pp - buffer32;
1591    }
1592    
1593    /* Check that a 32-bit character string is valid UTF-32.
1594    
1595    Arguments:
1596      string       points to the string
1597      length       length of string, or -1 if the string is zero-terminated
1598    
1599    Returns:       TRUE  if the string is a valid UTF-32 string
1600                   FALSE otherwise
1601    */
1602    
1603    #ifdef SUPPORT_UTF
1604    static BOOL
1605    valid_utf32(pcre_uint32 *string, int length)
1606    {
1607    register pcre_uint32 *p;
1608    register pcre_uint32 c;
1609    
1610    for (p = string; length-- > 0; p++)
1611      {
1612      c = *p;
1613    
1614      if (c > 0x10ffffu)
1615        return FALSE;
1616    
1617      /* A surrogate */
1618      if ((c & 0xfffff800u) == 0xd800u)
1619        return FALSE;
1620    
1621      /* Non-character */
1622      if ((c & 0xfffeu) == 0xfffeu ||
1623          c >= 0xfdd0u && c <= 0xfdefu)
1624        return FALSE;
1625      }
1626    
1627    return TRUE;
1628    }
1629    #endif /* SUPPORT_UTF */
1630    
1631    #endif
1632    
1633  /*************************************************  /*************************************************
1634  *        Read or extend an input line            *  *        Read or extend an input line            *
# Line 1271  pcre_uint8 *here = start; Line 1660  pcre_uint8 *here = start;
1660    
1661  for (;;)  for (;;)
1662    {    {
1663    int rlen = (int)(buffer_size - (here - buffer));    size_t rlen = (size_t)(buffer_size - (here - buffer));
1664    
1665    if (rlen > 1000)    if (rlen > 1000)
1666      {      {
1667      int dlen;      int dlen;
1668    
1669      /* If libreadline support is required, use readline() to read a line if the      /* If libreadline or libedit support is required, use readline() to read a
1670      input is a terminal. Note that readline() removes the trailing newline, so      line if the input is a terminal. Note that readline() removes the trailing
1671      we must put it back again, to be compatible with fgets(). */      newline, so we must put it back again, to be compatible with fgets(). */
1672    
1673  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1674      if (isatty(fileno(f)))      if (isatty(fileno(f)))
1675        {        {
1676        size_t len;        size_t len;
# Line 1315  for (;;) Line 1704  for (;;)
1704      {      {
1705      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1706      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
     pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);  
1707      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1708    
1709      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_pbuffer == NULL)
1710        {        {
1711        fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);        fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1712        exit(1);        exit(1);
# Line 1333  for (;;) Line 1721  for (;;)
1721      here = new_buffer + (here - buffer);      here = new_buffer + (here - buffer);
1722    
1723      free(buffer);      free(buffer);
     free(dbuffer);  
1724      free(pbuffer);      free(pbuffer);
1725    
1726      buffer = new_buffer;      buffer = new_buffer;
     dbuffer = new_dbuffer;  
1727      pbuffer = new_pbuffer;      pbuffer = new_pbuffer;
1728      }      }
1729    }    }
# Line 1380  return(result); Line 1766  return(result);
1766    
1767  /* Print a single character either literally, or as a hex escape. */  /* Print a single character either literally, or as a hex escape. */
1768    
1769  static int pchar(int c, FILE *f)  static int pchar(pcre_uint32 c, FILE *f)
1770  {  {
1771    int n;
1772  if (PRINTOK(c))  if (PRINTOK(c))
1773    {    {
1774    if (f != NULL) fprintf(f, "%c", c);    if (f != NULL) fprintf(f, "%c", c);
# Line 1402  if (c < 0x100) Line 1789  if (c < 0x100)
1789      }      }
1790    }    }
1791    
1792  if (f != NULL) fprintf(f, "\\x{%02x}", c);  if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1793  return (c <= 0x000000ff)? 6 :  return n >= 0 ? n : 0;
        (c <= 0x00000fff)? 7 :  
        (c <= 0x0000ffff)? 8 :  
        (c <= 0x000fffff)? 9 : 10;  
1794  }  }
1795    
1796    
# Line 1421  If handed a NULL file, just counts chars Line 1805  If handed a NULL file, just counts chars
1805    
1806  static int pchars(pcre_uint8 *p, int length, FILE *f)  static int pchars(pcre_uint8 *p, int length, FILE *f)
1807  {  {
1808  int c = 0;  pcre_uint32 c = 0;
1809  int yield = 0;  int yield = 0;
1810    
1811  if (length < 0)  if (length < 0)
# Line 1466  return len; Line 1850  return len;
1850  #endif  /* SUPPORT_PCRE16 */  #endif  /* SUPPORT_PCRE16 */
1851    
1852    
1853    
1854    #ifdef SUPPORT_PCRE32
1855    /*************************************************
1856    *    Find length of 0-terminated 32-bit string   *
1857    *************************************************/
1858    
1859    static int strlen32(PCRE_SPTR32 p)
1860    {
1861    int len = 0;
1862    while (*p++ != 0) len++;
1863    return len;
1864    }
1865    #endif  /* SUPPORT_PCRE32 */
1866    
1867    
1868    
1869  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
1870  /*************************************************  /*************************************************
1871  *           Print 16-bit character string        *  *           Print 16-bit character string        *
# Line 1483  if (length < 0) Line 1883  if (length < 0)
1883    
1884  while (length-- > 0)  while (length-- > 0)
1885    {    {
1886    int c = *p++ & 0xffff;    pcre_uint32 c = *p++ & 0xffff;
1887  #if !defined NOUTF  #if !defined NOUTF
1888    if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)    if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1889      {      {
# Line 1505  return yield; Line 1905  return yield;
1905    
1906    
1907    
1908    #ifdef SUPPORT_PCRE32
1909    /*************************************************
1910    *           Print 32-bit character string        *
1911    *************************************************/
1912    
1913    /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
1914    If handed a NULL file, just counts chars without printing. */
1915    
1916    #define UTF32_MASK (0x1fffffu)
1917    
1918    static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
1919    {
1920    int yield = 0;
1921    
1922    if (length < 0)
1923      length = strlen32(p);
1924    
1925    while (length-- > 0)
1926      {
1927      pcre_uint32 c = *p++;
1928      if (utf) c &= UTF32_MASK;
1929      yield += pchar(c, f);
1930      }
1931    
1932    return yield;
1933    }
1934    #endif  /* SUPPORT_PCRE32 */
1935    
1936    
1937    
1938  #ifdef SUPPORT_PCRE8  #ifdef SUPPORT_PCRE8
1939  /*************************************************  /*************************************************
1940  *     Read a capture name (8-bit) and check it   *  *     Read a capture name (8-bit) and check it   *
# Line 1558  return p; Line 1988  return p;
1988    
1989    
1990    
1991    #ifdef SUPPORT_PCRE32
1992    /*************************************************
1993    *     Read a capture name (32-bit) and check it  *
1994    *************************************************/
1995    
1996    /* Note that the text being read is 8-bit. */
1997    
1998    static pcre_uint8 *
1999    read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2000    {
2001    pcre_uint32 *npp = *pp;
2002    while (isalnum(*p)) *npp++ = *p++;
2003    *npp++ = 0;
2004    *npp = 0;
2005    if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2006      {
2007      fprintf(outfile, "no parentheses with name \"");
2008      PCHARSV(*pp, 0, -1, outfile);
2009      fprintf(outfile, "\"\n");
2010      }
2011    *pp = npp;
2012    return p;
2013    }
2014    #endif  /* SUPPORT_PCRE32 */
2015    
2016    
2017    
2018  /*************************************************  /*************************************************
2019  *              Callout function                  *  *              Callout function                  *
2020  *************************************************/  *************************************************/
# Line 1715  free(block); Line 2172  free(block);
2172  *************************************************/  *************************************************/
2173    
2174  /* Get one piece of information from the pcre_fullinfo() function. When only  /* Get one piece of information from the pcre_fullinfo() function. When only
2175  one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct  one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2176  value, but the code is defensive.  value, but the code is defensive.
2177    
2178  Arguments:  Arguments:
# Line 1732  new_info(pcre *re, pcre_extra *study, in Line 2189  new_info(pcre *re, pcre_extra *study, in
2189  {  {
2190  int rc;  int rc;
2191    
2192  if (use_pcre16)  if (pcre_mode == PCRE32_MODE)
2193    #ifdef SUPPORT_PCRE32
2194      rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2195    #else
2196      rc = PCRE_ERROR_BADMODE;
2197    #endif
2198    else if (pcre_mode == PCRE16_MODE)
2199  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
2200    rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);    rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2201  #else  #else
# Line 1748  else Line 2211  else
2211  if (rc < 0)  if (rc < 0)
2212    {    {
2213    fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,    fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2214      use_pcre16? "16" : "", option);      pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2215    if (rc == PCRE_ERROR_BADMODE)    if (rc == PCRE_ERROR_BADMODE)
2216      fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "      fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2217        "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");        "%d-bit mode\n", 8 * CHAR_SIZE,
2218          8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2219    }    }
2220    
2221  return rc;  return rc;
# Line 1800  bytes in the pattern itself. This is to Line 2264  bytes in the pattern itself. This is to
2264  ability to reload byte-flipped patterns, e.g. those compiled on a different  ability to reload byte-flipped patterns, e.g. those compiled on a different
2265  architecture. */  architecture. */
2266    
2267    #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2268  static void  static void
2269  regexflip(pcre *ere, pcre_extra *extra)  regexflip8_or_16(pcre *ere, pcre_extra *extra)
2270  {  {
2271  REAL_PCRE *re = (REAL_PCRE *)ere;  real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2272  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
2273  int op;  int op;
2274  pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;  pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
# Line 1840  if (extra != NULL) Line 2305  if (extra != NULL)
2305  in the name table, if present, and then in the pattern itself. */  in the name table, if present, and then in the pattern itself. */
2306    
2307  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
2308  if (!use_pcre16) return;  if (pcre_mode != PCRE16_MODE) return;
2309    
2310  while(TRUE)  while(TRUE)
2311    {    {
# Line 1978  while(TRUE) Line 2443  while(TRUE)
2443  /* Control should never reach here in 16 bit mode. */  /* Control should never reach here in 16 bit mode. */
2444  #endif /* SUPPORT_PCRE16 */  #endif /* SUPPORT_PCRE16 */
2445  }  }
2446    #endif /* SUPPORT_PCRE[8|16] */
2447    
2448    
2449    
2450  /*************************************************  #if defined SUPPORT_PCRE32
2451  *        Check match or recursion limit          *  static void
2452  *************************************************/  regexflip_32(pcre *ere, pcre_extra *extra)
   
 static int  
 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,  
   int start_offset, int options, int *use_offsets, int use_size_offsets,  
   int flag, unsigned long int *limit, int errnumber, const char *msg)  
2453  {  {
2454  int count;  real_pcre32 *re = (real_pcre32 *)ere;
2455  int min = 0;  int op;
2456  int mid = 64;  pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2457  int max = -1;  int length = re->name_count * re->name_entry_size;
2458    #ifdef SUPPORT_UTF
2459    BOOL utf = (re->options & PCRE_UTF32) != 0;
2460    #endif /* SUPPORT_UTF */
2461    
2462  extra->flags |= flag;  /* Always flip the bytes in the main data block and study blocks. */
2463    
2464  for (;;)  re->magic_number = REVERSED_MAGIC_NUMBER;
2465    {  re->size = swap_uint32(re->size);
2466    *limit = mid;  re->options = swap_uint32(re->options);
2467    re->flags = swap_uint16(re->flags);
2468    re->top_bracket = swap_uint16(re->top_bracket);
2469    re->top_backref = swap_uint16(re->top_backref);
2470    re->first_char = swap_uint32(re->first_char);
2471    re->req_char = swap_uint32(re->req_char);
2472    re->name_table_offset = swap_uint16(re->name_table_offset);
2473    re->name_entry_size = swap_uint16(re->name_entry_size);
2474    re->name_count = swap_uint16(re->name_count);
2475    
2476    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,  if (extra != NULL)
2477      {
2478      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2479      rsd->size = swap_uint32(rsd->size);
2480      rsd->flags = swap_uint32(rsd->flags);
2481      rsd->minlength = swap_uint32(rsd->minlength);
2482      }
2483    
2484    /* In 32-bit mode we must swap bytes
2485    in the name table, if present, and then in the pattern itself. */
2486    
2487    while(TRUE)
2488      {
2489      /* Swap previous characters. */
2490      while (length-- > 0)
2491        {
2492        *ptr = swap_uint32(*ptr);
2493        ptr++;
2494        }
2495    
2496      /* Get next opcode. */
2497    
2498      length = 0;
2499      op = *ptr;
2500      *ptr++ = swap_uint32(op);
2501    
2502      switch (op)
2503        {
2504        case OP_END:
2505        return;
2506    
2507        default:
2508        length = OP_lengths32[op] - 1;
2509        break;
2510    
2511        case OP_CLASS:
2512        case OP_NCLASS:
2513        /* Skip the character bit map. */
2514        ptr += 32/sizeof(pcre_uint32);
2515        length = 0;
2516        break;
2517    
2518        case OP_XCLASS:
2519        /* LINK_SIZE can only be 1 in 32-bit mode. */
2520        length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2521    
2522        /* Reverse the size of the XCLASS instance. */
2523        *ptr = swap_uint32(*ptr);
2524        ptr++;
2525    
2526        op = *ptr;
2527        *ptr = swap_uint32(op);
2528        ptr++;
2529        if ((op & XCL_MAP) != 0)
2530          {
2531          /* Skip the character bit map. */
2532          ptr += 32/sizeof(pcre_uint32);
2533          length -= 32/sizeof(pcre_uint32);
2534          }
2535        break;
2536        }
2537      }
2538    /* Control should never reach here in 32 bit mode. */
2539    }
2540    
2541    #endif /* SUPPORT_PCRE32 */
2542    
2543    
2544    
2545    static void
2546    regexflip(pcre *ere, pcre_extra *extra)
2547    {
2548    #if defined SUPPORT_PCRE32
2549      if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2550        regexflip_32(ere, extra);
2551    #endif
2552    #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2553      if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2554        regexflip8_or_16(ere, extra);
2555    #endif
2556    }
2557    
2558    
2559    
2560    /*************************************************
2561    *        Check match or recursion limit          *
2562    *************************************************/
2563    
2564    static int
2565    check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2566      int start_offset, int options, int *use_offsets, int use_size_offsets,
2567      int flag, unsigned long int *limit, int errnumber, const char *msg)
2568    {
2569    int count;
2570    int min = 0;
2571    int mid = 64;
2572    int max = -1;
2573    
2574    extra->flags |= flag;
2575    
2576    for (;;)
2577      {
2578      *limit = mid;
2579    
2580      PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2581      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
2582    
2583    if (count == errnumber)    if (count == errnumber)
# Line 2097  usage(void) Line 2673  usage(void)
2673  {  {
2674  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2675  printf("Input and output default to stdin and stdout.\n");  printf("Input and output default to stdin and stdout.\n");
2676  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2677  printf("If input is a terminal, readline() is used to read from it.\n");  printf("If input is a terminal, readline() is used to read from it.\n");
2678  #else  #else
2679  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
# Line 2106  printf("\nOptions:\n"); Line 2682  printf("\nOptions:\n");
2682  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
2683  printf("  -16      use the 16-bit library\n");  printf("  -16      use the 16-bit library\n");
2684  #endif  #endif
2685    #ifdef SUPPORT_PCRE32
2686    printf("  -32      use the 32-bit library\n");
2687    #endif
2688  printf("  -b       show compiled code\n");  printf("  -b       show compiled code\n");
2689  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2690  printf("  -C arg   show a specific compile-time option\n");  printf("  -C arg   show a specific compile-time option\n");
# Line 2113  printf("           and exit with its val Line 2692  printf("           and exit with its val
2692  printf("     linksize     internal link size [2, 3, 4]\n");  printf("     linksize     internal link size [2, 3, 4]\n");
2693  printf("     pcre8        8 bit library support enabled [0, 1]\n");  printf("     pcre8        8 bit library support enabled [0, 1]\n");
2694  printf("     pcre16       16 bit library support enabled [0, 1]\n");  printf("     pcre16       16 bit library support enabled [0, 1]\n");
2695    printf("     pcre32       32 bit library support enabled [0, 1]\n");
2696  printf("     utf          Unicode Transformation Format supported [0, 1]\n");  printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2697  printf("     ucp          Unicode Properties supported [0, 1]\n");  printf("     ucp          Unicode Properties supported [0, 1]\n");
2698  printf("     jit          Just-in-time compiler supported [0, 1]\n");  printf("     jit          Just-in-time compiler supported [0, 1]\n");
# Line 2133  printf("  -q       quiet: do not output Line 2713  printf("  -q       quiet: do not output
2713  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
2714  printf("  -s       force each pattern to be studied at basic level\n"  printf("  -s       force each pattern to be studied at basic level\n"
2715         "  -s+      force each pattern to be studied, using JIT if available\n"         "  -s+      force each pattern to be studied, using JIT if available\n"
2716           "  -s++     ditto, verifying when JIT was actually used\n"
2717           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2718           "             where 1 <= n <= 7 selects JIT options\n"
2719           "  -s++n    ditto, verifying when JIT was actually used\n"
2720         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
2721  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2722  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 2167  int quiet = 0; Line 2751  int quiet = 0;
2751  int size_offsets = 45;  int size_offsets = 45;
2752  int size_offsets_max;  int size_offsets_max;
2753  int *offsets = NULL;  int *offsets = NULL;
 #if !defined NOPOSIX  
 int posix = 0;  
 #endif  
2754  int debug = 0;  int debug = 0;
2755  int done = 0;  int done = 0;
2756  int all_use_dfa = 0;  int all_use_dfa = 0;
2757    int verify_jit = 0;
2758  int yield = 0;  int yield = 0;
2759    int mask_utf32 = 0;
2760  int stack_size;  int stack_size;
2761    pcre_uint8 *dbuffer = NULL;
2762    size_t dbuffer_size = 1u << 14;
2763    
2764    #if !defined NOPOSIX
2765    int posix = 0;
2766    #endif
2767    #if !defined NODFA
2768    int *dfa_workspace = NULL;
2769    #endif
2770    
2771  pcre_jit_stack *jit_stack = NULL;  pcre_jit_stack *jit_stack = NULL;
2772    
2773  /* These vectors store, end-to-end, a list of zero-terminated captured  /* These vectors store, end-to-end, a list of zero-terminated captured
2774  substring names, each list itself being terminated by an empty name. Assume  substring names, each list itself being terminated by an empty name. Assume
2775  that 1024 is plenty long enough for the few names we'll be testing. It is  that 1024 is plenty long enough for the few names we'll be testing. It is
2776  easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version  easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2777  for the actual memory, to ensure alignment. */  for the actual memory, to ensure alignment. */
2778    
2779  pcre_uint16 copynames[1024];  pcre_uint32 copynames[1024];
2780  pcre_uint16 getnames[1024];  pcre_uint32 getnames[1024];
2781    
2782    #ifdef SUPPORT_PCRE32
2783    pcre_uint32 *cn32ptr;
2784    pcre_uint32 *gn32ptr;
2785    #endif
2786    
2787  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
2788    pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2789    pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2790  pcre_uint16 *cn16ptr;  pcre_uint16 *cn16ptr;
2791  pcre_uint16 *gn16ptr;  pcre_uint16 *gn16ptr;
2792  #endif  #endif
# Line 2200  pcre_uint8 *gn8ptr; Line 2799  pcre_uint8 *gn8ptr;
2799  #endif  #endif
2800    
2801  /* Get buffers from malloc() so that valgrind will check their misuse when  /* Get buffers from malloc() so that valgrind will check their misuse when
2802  debugging. They grow automatically when very long lines are read. The 16-bit  debugging. They grow automatically when very long lines are read. The 16-
2803  buffer (buffer16) is obtained only if needed. */  and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2804    
2805  buffer = (pcre_uint8 *)malloc(buffer_size);  buffer = (pcre_uint8 *)malloc(buffer_size);
 dbuffer = (pcre_uint8 *)malloc(buffer_size);  
2806  pbuffer = (pcre_uint8 *)malloc(buffer_size);  pbuffer = (pcre_uint8 *)malloc(buffer_size);
2807    
2808  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
# Line 2223  _setmode( _fileno( stdout ), _O_BINARY ) Line 2821  _setmode( _fileno( stdout ), _O_BINARY )
2821  /* Get the version number: both pcre_version() and pcre16_version() give the  /* Get the version number: both pcre_version() and pcre16_version() give the
2822  same answer. We just need to ensure that we call one that is available. */  same answer. We just need to ensure that we call one that is available. */
2823    
2824  #ifdef SUPPORT_PCRE8  #if defined SUPPORT_PCRE8
2825  version = pcre_version();  version = pcre_version();
2826  #else  #elif defined SUPPORT_PCRE16
2827  version = pcre16_version();  version = pcre16_version();
2828    #elif defined SUPPORT_PCRE32
2829    version = pcre32_version();
2830  #endif  #endif
2831    
2832  /* Scan options */  /* Scan options */
# Line 2234  version = pcre16_version(); Line 2834  version = pcre16_version();
2834  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2835    {    {
2836    pcre_uint8 *endptr;    pcre_uint8 *endptr;
2837      char *arg = argv[op];
2838    
2839    if (strcmp(argv[op], "-m") == 0) showstore = 1;    if (strcmp(arg, "-m") == 0) showstore = 1;
2840    else if (strcmp(argv[op], "-s") == 0) force_study = 0;    else if (strcmp(arg, "-s") == 0) force_study = 0;
2841    else if (strcmp(argv[op], "-s+") == 0)  
2842      else if (strncmp(arg, "-s+", 3) == 0)
2843      {      {
2844        arg += 3;
2845        if (*arg == '+') { arg++; verify_jit = TRUE; }
2846      force_study = 1;      force_study = 1;
2847      force_study_options = PCRE_STUDY_JIT_COMPILE;      if (*arg == 0)
2848          force_study_options = jit_study_bits[6];
2849        else if (*arg >= '1' && *arg <= '7')
2850          force_study_options = jit_study_bits[*arg - '1'];
2851        else goto BAD_ARG;
2852        }
2853      else if (strcmp(arg, "-8") == 0)
2854        {
2855    #ifdef SUPPORT_PCRE8
2856        pcre_mode = PCRE8_MODE;
2857    #else
2858        printf("** This version of PCRE was built without 8-bit support\n");
2859        exit(1);
2860    #endif
2861      }      }
2862    else if (strcmp(argv[op], "-16") == 0)    else if (strcmp(arg, "-16") == 0)
2863      {      {
2864  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
2865      use_pcre16 = 1;      pcre_mode = PCRE16_MODE;
2866  #else  #else
2867      printf("** This version of PCRE was built without 16-bit support\n");      printf("** This version of PCRE was built without 16-bit support\n");
2868      exit(1);      exit(1);
2869  #endif  #endif
2870      }      }
2871    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(arg, "-32") == 0 || strcmp(arg, "-32+") == 0)
2872    else if (strcmp(argv[op], "-b") == 0) debug = 1;      {
2873    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;  #ifdef SUPPORT_PCRE32
2874    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;      pcre_mode = PCRE32_MODE;
2875    else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;      mask_utf32 = (strcmp(arg, "-32+") == 0);
2876    #else
2877        printf("** This version of PCRE was built without 32-bit support\n");
2878        exit(1);
2879    #endif
2880        }
2881      else if (strcmp(arg, "-q") == 0) quiet = 1;
2882      else if (strcmp(arg, "-b") == 0) debug = 1;
2883      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2884      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2885      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2886  #if !defined NODFA  #if !defined NODFA
2887    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2888  #endif  #endif
2889    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2890        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2891          *endptr == 0))          *endptr == 0))
2892      {      {
2893      op++;      op++;
2894      argc--;      argc--;
2895      }      }
2896    else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)    else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2897      {      {
2898      int both = argv[op][2] == 0;      int both = arg[2] == 0;
2899      int temp;      int temp;
2900      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2901                       *endptr == 0))                       *endptr == 0))
# Line 2280  while (argc > 1 && argv[op][0] == '-') Line 2907  while (argc > 1 && argv[op][0] == '-')
2907      else timeitm = LOOPREPEAT;      else timeitm = LOOPREPEAT;
2908      if (both) timeit = timeitm;      if (both) timeit = timeitm;
2909      }      }
2910    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2911        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2912          *endptr == 0))          *endptr == 0))
2913      {      {
2914  #if defined(_WIN32) || defined(WIN32) || defined(__minix)  #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2915      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
2916      exit(1);      exit(1);
2917  #else  #else
# Line 2303  while (argc > 1 && argv[op][0] == '-') Line 2930  while (argc > 1 && argv[op][0] == '-')
2930  #endif  #endif
2931      }      }
2932  #if !defined NOPOSIX  #if !defined NOPOSIX
2933    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(arg, "-p") == 0) posix = 1;
2934  #endif  #endif
2935    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(arg, "-C") == 0)
2936      {      {
2937      int rc;      int rc;
2938      unsigned long int lrc;      unsigned long int lrc;
# Line 2317  while (argc > 1 && argv[op][0] == '-') Line 2944  while (argc > 1 && argv[op][0] == '-')
2944          (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);          (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2945          printf("%d\n", rc);          printf("%d\n", rc);
2946          yield = rc;          yield = rc;
         goto EXIT;  
2947          }          }
2948        if (strcmp(argv[op + 1], "pcre8") == 0)        else if (strcmp(argv[op + 1], "pcre8") == 0)
2949          {          {
2950  #ifdef SUPPORT_PCRE8  #ifdef SUPPORT_PCRE8
2951          printf("1\n");          printf("1\n");
# Line 2328  while (argc > 1 && argv[op][0] == '-') Line 2954  while (argc > 1 && argv[op][0] == '-')
2954          printf("0\n");          printf("0\n");
2955          yield = 0;          yield = 0;
2956  #endif  #endif
         goto EXIT;  
2957          }          }
2958        if (strcmp(argv[op + 1], "pcre16") == 0)        else if (strcmp(argv[op + 1], "pcre16") == 0)
2959          {          {
2960  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
2961          printf("1\n");          printf("1\n");
# Line 2339  while (argc > 1 && argv[op][0] == '-') Line 2964  while (argc > 1 && argv[op][0] == '-')
2964          printf("0\n");          printf("0\n");
2965          yield = 0;          yield = 0;
2966  #endif  #endif
2967            }
2968          else if (strcmp(argv[op + 1], "pcre32") == 0)
2969            {
2970    #ifdef SUPPORT_PCRE32
2971            printf("1\n");
2972            yield = 1;
2973    #else
2974            printf("0\n");
2975            yield = 0;
2976    #endif
2977          goto EXIT;          goto EXIT;
2978          }          }
2979        if (strcmp(argv[op + 1], "utf") == 0)        if (strcmp(argv[op + 1], "utf") == 0)
2980          {          {
2981  #ifdef SUPPORT_PCRE8  #ifdef SUPPORT_PCRE8
2982          (void)pcre_config(PCRE_CONFIG_UTF8, &rc);          if (pcre_mode == PCRE8_MODE)
2983          printf("%d\n", rc);            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2984          yield = rc;  #endif
2985  #else  #ifdef SUPPORT_PCRE16
2986          (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);          if (pcre_mode == PCRE16_MODE)
2987              (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2988    #endif
2989    #ifdef SUPPORT_PCRE32
2990            if (pcre_mode == PCRE32_MODE)
2991              (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
2992    #endif
2993          printf("%d\n", rc);          printf("%d\n", rc);
2994          yield = rc;          yield = rc;
 #endif  
2995          goto EXIT;          goto EXIT;
2996          }          }
2997        if (strcmp(argv[op + 1], "ucp") == 0)        else if (strcmp(argv[op + 1], "ucp") == 0)
2998          {          {
2999          (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);          (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3000          printf("%d\n", rc);          printf("%d\n", rc);
3001          yield = rc;          yield = rc;
         goto EXIT;  
3002          }          }
3003        if (strcmp(argv[op + 1], "jit") == 0)        else if (strcmp(argv[op + 1], "jit") == 0)
3004          {          {
3005          (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);          (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3006          printf("%d\n", rc);          printf("%d\n", rc);
3007          yield = rc;          yield = rc;
         goto EXIT;  
3008          }          }
3009        if (strcmp(argv[op + 1], "newline") == 0)        else if (strcmp(argv[op + 1], "newline") == 0)
3010          {          {
3011          (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);          (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3012          /* Note that these values are always the ASCII values, even          print_newline_config(rc, TRUE);
         in EBCDIC environments. CR is 13 and NL is 10. */  
         printf("%s\n", (rc == 13)? "CR" :  
           (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :  
           (rc == -2)? "ANYCRLF" :  
           (rc == -1)? "ANY" : "???");  
         goto EXIT;  
3013          }          }
3014        printf("Unknown -C option: %s\n", argv[op + 1]);        else if (strcmp(argv[op + 1], "ebcdic") == 0)
3015            {
3016    #ifdef EBCDIC
3017            printf("1\n");
3018            yield = 1;
3019    #else
3020            printf("0\n");
3021    #endif
3022            }
3023          else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3024            {
3025    #ifdef EBCDIC
3026            printf("0x%02x\n", CHAR_LF);
3027    #else
3028            printf("0\n");
3029    #endif
3030            }
3031          else
3032            {
3033            printf("Unknown -C option: %s\n", argv[op + 1]);
3034            }
3035        goto EXIT;        goto EXIT;
3036        }        }
3037    
3038        /* No argument for -C: output all configuration information. */
3039    
3040      printf("PCRE version %s\n", version);      printf("PCRE version %s\n", version);
3041      printf("Compiled with\n");      printf("Compiled with\n");
3042    
3043    #ifdef EBCDIC
3044        printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3045    #endif
3046    
3047  /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both  /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3048  are set, either both UTFs are supported or both are not supported. */  are set, either both UTFs are supported or both are not supported. */
3049    
3050  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16  #ifdef SUPPORT_PCRE8
3051      printf("  8-bit and 16-bit support\n");      printf("  8-bit support\n");
     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);  
     if (rc)  
       printf("  UTF-8 and UTF-16 support\n");  
     else  
       printf("  No UTF-8 or UTF-16 support\n");  
 #elif defined SUPPORT_PCRE8  
     printf("  8-bit support only\n");  
3052      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3053      printf("  %sUTF-8 support\n", rc? "" : "No ");        printf ("  %sUTF-8 support\n", rc ? "" : "No ");
3054  #else  #endif
3055      printf("  16-bit support only\n");  #ifdef SUPPORT_PCRE16
3056        printf("  16-bit support\n");
3057      (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);      (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3058      printf("  %sUTF-16 support\n", rc? "" : "No ");      printf ("  %sUTF-16 support\n", rc ? "" : "No ");
3059    #endif
3060    #ifdef SUPPORT_PCRE32
3061        printf("  32-bit support\n");
3062        (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3063        printf ("  %sUTF-32 support\n", rc ? "" : "No ");
3064  #endif  #endif
3065    
3066      (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
# Line 2412  are set, either both UTFs are supported Line 3069  are set, either both UTFs are supported
3069      if (rc)      if (rc)
3070        {        {
3071        const char *arch;        const char *arch;
3072        (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, &arch);        (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3073        printf("  Just-in-time compiler support: %s\n", arch);        printf("  Just-in-time compiler support: %s\n", arch);
3074        }        }
3075      else      else
3076        printf("  No just-in-time compiler support\n");        printf("  No just-in-time compiler support\n");
3077      (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3078      /* Note that these values are always the ASCII values, even      print_newline_config(rc, FALSE);
     in EBCDIC environments. CR is 13 and NL is 10. */  
     printf("  Newline sequence is %s\n", (rc == 13)? "CR" :  
       (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :  
       (rc == -2)? "ANYCRLF" :  
       (rc == -1)? "ANY" : "???");  
3079      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3080      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3081                                       "all Unicode newlines");                                       "all Unicode newlines");
# Line 2438  are set, either both UTFs are supported Line 3090  are set, either both UTFs are supported
3090      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3091      printf("  Match recursion uses %s", rc? "stack" : "heap");      printf("  Match recursion uses %s", rc? "stack" : "heap");
3092      if (showstore)      if (showstore)
3093        {        {
3094        PCRE_EXEC(stack_size, NULL, NULL, NULL, -1, -1, 0, NULL, 0);        PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3095        printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);        printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3096        }        }
3097      printf("\n");      printf("\n");
3098      goto EXIT;      goto EXIT;
3099      }      }
3100    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(arg, "-help") == 0 ||
3101             strcmp(argv[op], "--help") == 0)             strcmp(arg, "--help") == 0)
3102      {      {
3103      usage();      usage();
3104      goto EXIT;      goto EXIT;
3105      }      }
3106    else    else
3107      {      {
3108      printf("** Unknown or malformed option %s\n", argv[op]);      BAD_ARG:
3109        printf("** Unknown or malformed option %s\n", arg);
3110      usage();      usage();
3111      yield = 1;      yield = 1;
3112      goto EXIT;      goto EXIT;
# Line 2514  pcre16_stack_malloc = stack_malloc; Line 3167  pcre16_stack_malloc = stack_malloc;
3167  pcre16_stack_free = stack_free;  pcre16_stack_free = stack_free;
3168  #endif  #endif
3169    
3170    #ifdef SUPPORT_PCRE32
3171    pcre32_malloc = new_malloc;
3172    pcre32_free = new_free;
3173    pcre32_stack_malloc = stack_malloc;
3174    pcre32_stack_free = stack_free;
3175    #endif
3176    
3177  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
3178    
3179  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
# Line 2551  while (!done) Line 3211  while (!done)
3211    int do_flip = 0;    int do_flip = 0;
3212    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
3213    
3214    #if !defined NODFA
3215      int dfa_matched = 0;
3216    #endif
3217    
3218    use_utf = 0;    use_utf = 0;
3219    debug_lengths = 1;    debug_lengths = 1;
3220    
# Line 2598  while (!done) Line 3262  while (!done)
3262        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3263    
3264      re = (pcre *)new_malloc(true_size);      re = (pcre *)new_malloc(true_size);
3265        if (re == NULL)
3266          {
3267          printf("** Failed to get %d bytes of memory for pcre object\n",
3268            (int)true_size);
3269          yield = 1;
3270          goto EXIT;
3271          }
3272      regex_gotten_store = first_gotten_store;      regex_gotten_store = first_gotten_store;
3273    
3274      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3275    
3276      magic = ((REAL_PCRE *)re)->magic_number;      magic = REAL_PCRE_MAGIC(re);
3277      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
3278        {        {
3279        if (swap_uint32(magic) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
# Line 2612  while (!done) Line 3283  while (!done)
3283        else        else
3284          {          {
3285          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3286            new_free(re);
3287          fclose(f);          fclose(f);
3288          continue;          continue;
3289          }          }
# Line 2641  while (!done) Line 3313  while (!done)
3313            {            {
3314            PCRE_FREE_STUDY(extra);            PCRE_FREE_STUDY(extra);
3315            }            }
3316          if (re != NULL) new_free(re);          new_free(re);
3317          fclose(f);          fclose(f);
3318          continue;          continue;
3319          }          }
# Line 2659  while (!done) Line 3331  while (!done)
3331          {          {
3332          /* Simulate the result of the function call below. */          /* Simulate the result of the function call below. */
3333          fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,          fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3334            use_pcre16? "16" : "", PCRE_INFO_OPTIONS);            pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3335          fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "            PCRE_INFO_OPTIONS);
3336            "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");          fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3337              "%d-bit mode\n", 8 * CHAR_SIZE,
3338              8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
3339            new_free(re);
3340            fclose(f);
3341          continue;          continue;
3342          }          }
3343        }        }
3344    
3345      /* Need to know if UTF-8 for printing data strings. */      /* Need to know if UTF-8 for printing data strings. */
3346    
3347      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3348          {
3349          new_free(re);
3350          fclose(f);
3351          continue;
3352          }
3353      use_utf = (get_options & PCRE_UTF8) != 0;      use_utf = (get_options & PCRE_UTF8) != 0;
3354    
3355      fclose(f);      fclose(f);
# Line 2727  while (!done) Line 3408  while (!done)
3408    /* Look for options after final delimiter */    /* Look for options after final delimiter */
3409    
3410    options = 0;    options = 0;
3411    study_options = 0;    study_options = force_study_options;
3412    log_store = showstore;  /* default from command line */    log_store = showstore;  /* default from command line */
3413    
3414    while (*pp != 0)    while (*pp != 0)
# Line 2764  while (!done) Line 3445  while (!done)
3445  #endif  #endif
3446    
3447        case 'S':        case 'S':
3448        if (do_study == 0)        do_study = 1;
3449          for (;;)
3450          {          {
3451          do_study = 1;          switch (*pp++)
         if (*pp == '+')  
3452            {            {
3453            study_options |= PCRE_STUDY_JIT_COMPILE;            case 'S':
3454            pp++;            do_study = 0;
3455              no_force_study = 1;
3456              break;
3457    
3458              case '!':
3459              study_options |= PCRE_STUDY_EXTRA_NEEDED;
3460              break;
3461    
3462              case '+':
3463              if (*pp == '+')
3464                {
3465                verify_jit = TRUE;
3466                pp++;
3467                }
3468              if (*pp >= '1' && *pp <= '7')
3469                study_options |= jit_study_bits[*pp++ - '1'];
3470              else
3471                study_options |= jit_study_bits[6];
3472              break;
3473    
3474              case '-':
3475              study_options &= ~PCRE_STUDY_ALLJIT;
3476              break;
3477    
3478              default:
3479              pp--;
3480              goto ENDLOOP;
3481            }            }
3482          }          }
3483        else        ENDLOOP:
         {  
         do_study = 0;  
         no_force_study = 1;  
         }  
3484        break;        break;
3485    
3486        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
# Line 2896  while (!done) Line 3599  while (!done)
3599  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
3600    
3601      {      {
3602      /* In 16-bit mode, convert the input. */      /* In 16- or 32-bit mode, convert the input. */
3603    
3604  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
3605      if (use_pcre16)      if (pcre_mode == PCRE16_MODE)
3606        {        {
3607        switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))        switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3608          {          {
# Line 2925  while (!done) Line 3628  while (!done)
3628        }        }
3629  #endif  #endif
3630    
3631    #ifdef SUPPORT_PCRE32
3632        if (pcre_mode == PCRE32_MODE)
3633          {
3634          switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3635            {
3636            case -1:
3637            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3638              "converted to UTF-32\n");
3639            goto SKIP_DATA;
3640    
3641            case -2:
3642            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3643              "cannot be converted to UTF-32\n");
3644            goto SKIP_DATA;
3645    
3646            case -3:
3647            fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3648            goto SKIP_DATA;
3649    
3650            default:
3651            break;
3652            }
3653          p = (pcre_uint8 *)buffer32;
3654          }
3655    #endif
3656    
3657      /* Compile many times when timing */      /* Compile many times when timing */
3658    
3659      if (timeit > 0)      if (timeit > 0)
# Line 2982  while (!done) Line 3711  while (!done)
3711      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
3712      and remember the store that was got. */      and remember the store that was got. */
3713    
3714      true_size = ((REAL_PCRE *)re)->size;      true_size = REAL_PCRE_SIZE(re);
3715      regex_gotten_store = first_gotten_store;      regex_gotten_store = first_gotten_store;
3716    
3717      /* Output code size information if requested */      /* Output code size information if requested */
3718    
3719      if (log_store)      if (log_store)
3720          {
3721          int name_count, name_entry_size, real_pcre_size;
3722    
3723          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3724          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3725    #ifdef SUPPORT_PCRE8
3726          if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3727            real_pcre_size = sizeof(real_pcre);
3728    #endif
3729    #ifdef SUPPORT_PCRE16
3730          if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
3731            real_pcre_size = sizeof(real_pcre16);
3732    #endif
3733    #ifdef SUPPORT_PCRE32
3734          if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
3735            real_pcre_size = sizeof(real_pcre32);
3736    #endif
3737        fprintf(outfile, "Memory allocation (code space): %d\n",        fprintf(outfile, "Memory allocation (code space): %d\n",
3738          (int)(first_gotten_store -          (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
3739                sizeof(REAL_PCRE) -        }
               ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));  
3740    
3741      /* If -s or /S was present, study the regex to generate additional info to      /* If -s or /S was present, study the regex to generate additional info to
3742      help with the matching, unless the pattern has the SS option, which      help with the matching, unless the pattern has the SS option, which
# Line 3007  while (!done) Line 3752  while (!done)
3752          clock_t start_time = clock();          clock_t start_time = clock();
3753          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
3754            {            {
3755            PCRE_STUDY(extra, re, study_options | force_study_options, &error);            PCRE_STUDY(extra, re, study_options, &error);
3756            }            }
3757          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3758          if (extra != NULL)          if (extra != NULL)
# Line 3018  while (!done) Line 3763  while (!done)
3763            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
3764              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3765          }          }
3766        PCRE_STUDY(extra, re, study_options | force_study_options, &error);        PCRE_STUDY(extra, re, study_options, &error);
3767        if (error != NULL)        if (error != NULL)
3768          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
3769        else if (extra != NULL)        else if (extra != NULL)
# Line 3062  while (!done) Line 3807  while (!done)
3807      if (do_showinfo)      if (do_showinfo)
3808        {        {
3809        unsigned long int all_options;        unsigned long int all_options;
3810        int count, backrefmax, first_char, need_char, okpartial, jchanged,        pcre_uint32 first_char, need_char;
3811          hascrorlf;        int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
3812            hascrorlf, maxlookbehind;
3813        int nameentrysize, namecount;        int nameentrysize, namecount;
3814        const pcre_uint8 *nametable;        const pcre_uint8 *nametable;
3815    
3816        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3817            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3818            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3819            new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +            new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
3820            new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +            new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
3821              new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
3822              new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
3823            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3824            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3825            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3826            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3827            new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +            new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3828            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3829              new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3830            != 0)            != 0)
3831          goto SKIP_DATA;          goto SKIP_DATA;
3832    
# Line 3094  while (!done) Line 3843  while (!done)
3843          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
3844          while (namecount-- > 0)          while (namecount-- > 0)
3845            {            {
3846  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16            int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
           int imm2_size = use_pcre16 ? 1 : 2;  
 #else  
           int imm2_size = IMM2_SIZE;  
 #endif  
3847            int length = (int)STRLEN(nametable + imm2_size);            int length = (int)STRLEN(nametable + imm2_size);
3848            fprintf(outfile, "  ");            fprintf(outfile, "  ");
3849            PCHARSV(nametable, imm2_size, length, outfile);            PCHARSV(nametable, imm2_size, length, outfile);
3850            while (length++ < nameentrysize - imm2_size) putc(' ', outfile);            while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3851  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16  #ifdef SUPPORT_PCRE32
3852            fprintf(outfile, "%3d\n", use_pcre16?            if (pcre_mode == PCRE32_MODE)
3853               (int)(((PCRE_SPTR16)nametable)[0])              fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
3854              :((int)nametable[0] << 8) | (int)nametable[1]);  #endif
3855            nametable += nameentrysize * (use_pcre16 ? 2 : 1);  #ifdef SUPPORT_PCRE16
3856  #else            if (pcre_mode == PCRE16_MODE)
3857            fprintf(outfile, "%3d\n", GET2(nametable, 0));              fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
 #ifdef SUPPORT_PCRE8  
           nametable += nameentrysize;  
 #else  
           nametable += nameentrysize * 2;  
3858  #endif  #endif
3859    #ifdef SUPPORT_PCRE8
3860              if (pcre_mode == PCRE8_MODE)
3861                fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
3862  #endif  #endif
3863              nametable += nameentrysize * CHAR_SIZE;
3864            }            }
3865          }          }
3866    
3867        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3868        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3869    
3870        all_options = ((REAL_PCRE *)re)->options;        all_options = REAL_PCRE_OPTIONS(re);
3871        if (do_flip) all_options = swap_uint32(all_options);        if (do_flip) all_options = swap_uint32(all_options);
3872    
3873        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
# Line 3173  while (!done) Line 3918  while (!done)
3918          break;          break;
3919          }          }
3920    
3921        if (first_char == -1)        if (first_char_set == 2)
3922          {          {
3923          fprintf(outfile, "First char at start or follows newline\n");          fprintf(outfile, "First char at start or follows newline\n");
3924          }          }
3925        else if (first_char < 0)        else if (first_char_set == 1)
         {  
         fprintf(outfile, "No first char\n");  
         }  
       else  
3926          {          {
3927          const char *caseless =          const char *caseless =
3928            ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?            ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
3929            "" : " (caseless)";            "" : " (caseless)";
3930    
3931          if (PRINTOK(first_char))          if (PRINTOK(first_char))
# Line 3196  while (!done) Line 3937  while (!done)
3937            fprintf(outfile, "%s\n", caseless);            fprintf(outfile, "%s\n", caseless);
3938            }            }
3939          }          }
3940          else
3941            {
3942            fprintf(outfile, "No first char\n");
3943            }
3944    
3945        if (need_char < 0)        if (need_char_set == 0)
3946          {          {
3947          fprintf(outfile, "No need char\n");          fprintf(outfile, "No need char\n");
3948          }          }
3949        else        else
3950          {          {
3951          const char *caseless =          const char *caseless =
3952            ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?            ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
3953            "" : " (caseless)";            "" : " (caseless)";
3954    
3955          if (PRINTOK(need_char))          if (PRINTOK(need_char))
# Line 3217  while (!done) Line 3962  while (!done)
3962            }            }
3963          }          }
3964    
3965          if (maxlookbehind > 0)
3966            fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3967    
3968        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
3969        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
3970        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
# Line 3274  while (!done) Line 4022  while (!done)
4022    
4023          /* Show this only if the JIT was set by /S, not by -s. */          /* Show this only if the JIT was set by /S, not by -s. */
4024    
4025          if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)          if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
4026                (force_study_options & PCRE_STUDY_ALLJIT) == 0)
4027            {            {
4028            int jit;            int jit;
4029            if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)            if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
# Line 3361  while (!done) Line 4110  while (!done)
4110    
4111    for (;;)    for (;;)
4112      {      {
4113      pcre_uint8 *q;  #ifdef SUPPORT_PCRE8
4114        pcre_uint8 *q8;
4115    #endif
4116    #ifdef SUPPORT_PCRE16
4117        pcre_uint16 *q16;
4118    #endif
4119    #ifdef SUPPORT_PCRE32
4120        pcre_uint32 *q32;
4121    #endif
4122      pcre_uint8 *bptr;      pcre_uint8 *bptr;
4123      int *use_offsets = offsets;      int *use_offsets = offsets;
4124      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
4125      int callout_data = 0;      int callout_data = 0;
4126      int callout_data_set = 0;      int callout_data_set = 0;
4127      int count, c;      int count;
4128        pcre_uint32 c;
4129      int copystrings = 0;      int copystrings = 0;
4130      int find_match_limit = default_find_match_limit;      int find_match_limit = default_find_match_limit;
4131      int getstrings = 0;      int getstrings = 0;
# Line 3381  while (!done) Line 4139  while (!done)
4139      *copynames = 0;      *copynames = 0;
4140      *getnames = 0;      *getnames = 0;
4141    
4142    #ifdef SUPPORT_PCRE32
4143        cn32ptr = copynames;
4144        gn32ptr = getnames;
4145    #endif
4146  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
4147      cn16ptr = copynames;      cn16ptr = copynames16;
4148      gn16ptr = getnames;      gn16ptr = getnames16;
4149  #endif  #endif
4150  #ifdef SUPPORT_PCRE8  #ifdef SUPPORT_PCRE8
4151      cn8ptr = copynames8;      cn8ptr = copynames8;
4152      gn8ptr = getnames8;      gn8ptr = getnames8;
4153  #endif  #endif
4154    
4155      SET_PCRE_CALLOUT(callout);      SET_PCRE_CALLOUT(callout);
4156      first_callout = 1;      first_callout = 1;
# Line 3428  while (!done) Line 4190  while (!done)
4190      p = buffer;      p = buffer;
4191      while (isspace(*p)) p++;      while (isspace(*p)) p++;
4192    
4193      bptr = q = dbuffer;  #ifndef NOUTF
4194        /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4195           invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4196        if (use_utf)
4197          {
4198          char *q;
4199          pcre_uint32 c;
4200          int n = 1;
4201    
4202          for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &c);
4203          if (n <= 0)
4204            {
4205            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4206            goto NEXT_DATA;
4207            }
4208          }
4209    #endif
4210    
4211        /* Allocate a buffer to hold the data line. len+1 is an upper bound on
4212           the number of pcre_uchar units that will be needed. */
4213        if (dbuffer == NULL || len >= dbuffer_size)
4214          {
4215          dbuffer_size *= 2;
4216          dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4217          if (dbuffer == NULL)
4218            {
4219            fprintf(stderr, "pcretest: malloc(%d) failed\n", dbuffer_size);
4220            exit(1);
4221            }
4222          }
4223    
4224    #ifdef SUPPORT_PCRE8
4225        q8 = (pcre_uint8 *) dbuffer;
4226    #endif
4227    #ifdef SUPPORT_PCRE16
4228        q16 = (pcre_uint16 *) dbuffer;
4229    #endif
4230    #ifdef SUPPORT_PCRE32
4231        q32 = (pcre_uint32 *) dbuffer;
4232    #endif
4233    
4234      while ((c = *p++) != 0)      while ((c = *p++) != 0)
4235        {        {
4236        int i = 0;        int i = 0;
# Line 3437  while (!done) Line 4239  while (!done)
4239        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4240        In non-UTF mode, allow the value of the byte to fall through to later,        In non-UTF mode, allow the value of the byte to fall through to later,
4241        where values greater than 127 are turned into UTF-8 when running in        where values greater than 127 are turned into UTF-8 when running in
4242        16-bit mode. */        16-bit or 32-bit mode. */
4243    
4244        if (c != '\\')        if (c != '\\')
4245          {          {
4246          if (use_utf)  #ifndef NOUTF
4247            {          if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4248            *q++ = c;  #endif
           continue;  
           }  
4249          }          }
4250    
4251        /* Handle backslash escapes */        /* Handle backslash escapes */
# Line 3483  while (!done) Line 4283  while (!done)
4283              {              {
4284              if (++i == 9)              if (++i == 9)
4285                fprintf(outfile, "** Too many hex digits in \\x{...} item; "                fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4286                                 "using only the first eight.\n");                                 "using only the first eight.\n");
4287              else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);              else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4288              }              }
4289            if (*pt == '}')            if (*pt == '}')
4290              {              {
4291              p = pt + 1;              p = pt + 1;
# Line 3498  while (!done) Line 4298  while (!done)
4298          allows UTF-8 characters to be constructed byte by byte, and also allows          allows UTF-8 characters to be constructed byte by byte, and also allows
4299          invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.          invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4300          Otherwise, pass it down to later code so that it can be turned into          Otherwise, pass it down to later code so that it can be turned into
4301          UTF-8 when running in 16-bit mode. */          UTF-8 when running in 16/32-bit mode. */
4302    
4303          c = 0;          c = 0;
4304          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
# Line 3506  while (!done) Line 4306  while (!done)
4306            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4307            p++;            p++;
4308            }            }
4309          if (use_utf)  #if !defined NOUTF && defined SUPPORT_PCRE8
4310            if (use_utf && (pcre_mode == PCRE8_MODE))
4311            {            {
4312            *q++ = c;            *q8++ = c;
4313            continue;            continue;
4314            }            }
4315    #endif
4316          break;          break;
4317    
4318          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 3543  while (!done) Line 4345  while (!done)
4345            }            }
4346          else if (isalnum(*p))          else if (isalnum(*p))
4347            {            {
4348            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4349            }            }
4350          else if (*p == '+')          else if (*p == '+')
4351            {            {
# Line 3606  while (!done) Line 4408  while (!done)
4408            }            }
4409          else if (isalnum(*p))          else if (isalnum(*p))
4410            {            {
4411            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4412            }            }
4413          continue;          continue;
4414    
# Line 3654  while (!done) Line 4456  while (!done)
4456            }            }
4457          use_size_offsets = n;          use_size_offsets = n;
4458          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
4459              else use_offsets = offsets + size_offsets_max - n;  /* To catch overruns */
4460          continue;          continue;
4461    
4462          case 'P':          case 'P':
# Line 3716  while (!done) Line 4519  while (!done)
4519          }          }
4520    
4521        /* We now have a character value in c that may be greater than 255. In        /* We now have a character value in c that may be greater than 255. In
4522        16-bit mode, we always convert characters to UTF-8 so that values greater        16-bit or 32-bit mode, we always convert characters to UTF-8 so that
4523        than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we        values greater than 255 can be passed to non-UTF 16- or 32-bit strings.
4524        convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF        In 8-bit       mode we convert to UTF-8 if we are in UTF mode. Values greater
4525        mode must have come from \x{...} or octal constructs because values from        than 127       in UTF mode must have come from \x{...} or octal constructs
4526        \x.. get this far only in non-UTF mode. */        because values from \x.. get this far only in non-UTF mode. */
4527    
4528  #if !defined NOUTF || defined SUPPORT_PCRE16  #ifdef SUPPORT_PCRE8
4529        if (use_pcre16 || use_utf)        if (pcre_mode == PCRE8_MODE)
4530          {          {
4531          pcre_uint8 buff8[8];  #ifndef NOUTF
4532          int ii, utn;          if (use_utf)
4533          utn = ord2utf8(c, buff8);            {
4534          for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];            q8 += ord2utf8(c, q8);
4535              }
4536            else
4537    #endif
4538              {
4539              if (c > 0xffu)
4540                {
4541                fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4542                  "and UTF-8 mode is not enabled.\n", c);
4543                fprintf(outfile, "** Truncation will probably give the wrong "
4544                  "result.\n");
4545                }
4546    
4547              *q8++ = c;
4548              }
4549          }          }
       else  
4550  #endif  #endif
4551    #ifdef SUPPORT_PCRE16
4552          if (pcre_mode == PCRE16_MODE)
4553          {          {
4554          if (c > 255)  #ifndef NOUTF
4555            if (use_utf)
4556              {
4557              if (c > 0x10ffffu)
4558                {
4559                fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4560                  "cannot be converted to UTF-16\n");
4561                goto NEXT_DATA;
4562                }
4563              else if (c >= 0x10000u)
4564                {
4565                c-= 0x10000u;
4566                *q16++ = 0xD800 | (c >> 10);
4567                *q16++ = 0xDC00 | (c & 0x3ff);
4568                }
4569              else
4570                *q16++ = c;
4571              }
4572            else
4573    #endif
4574            {            {
4575            fprintf(outfile, "** Character \\x{%x} is greater than 255 "            if (c > 0xffffu)
4576              "and UTF-8 mode is not enabled.\n", c);              {
4577            fprintf(outfile, "** Truncation will probably give the wrong "              fprintf(outfile, "** Character value is greater than 0xffff "
4578              "result.\n");                "and UTF-16 mode is not enabled.\n", c);
4579                fprintf(outfile, "** Truncation will probably give the wrong "
4580                  "result.\n");
4581                }
4582    
4583              *q16++ = c;
4584            }            }
         *q++ = c;  
4585          }          }
4586    #endif
4587    #ifdef SUPPORT_PCRE32
4588          if (pcre_mode == PCRE32_MODE)
4589            {
4590            *q32++ = c;
4591            }
4592    #endif
4593    
4594        }        }
4595    
4596      /* Reached end of subject string */      /* Reached end of subject string */
4597    
4598      *q = 0;  #ifdef SUPPORT_PCRE8
4599      len = (int)(q - dbuffer);      if (pcre_mode == PCRE8_MODE)
4600        {
4601          *q8 = 0;
4602          len = (int)(q8 - (pcre_uint8 *)dbuffer);
4603        }
4604    #endif
4605    #ifdef SUPPORT_PCRE16
4606        if (pcre_mode == PCRE16_MODE)
4607        {
4608          *q16 = 0;
4609          len = (int)(q16 - (pcre_uint16 *)dbuffer);
4610        }
4611    #endif
4612    #ifdef SUPPORT_PCRE32
4613        if (pcre_mode == PCRE32_MODE)
4614        {
4615          *q32 = 0;
4616          len = (int)(q32 - (pcre_uint32 *)dbuffer);
4617        }
4618    #endif
4619    
4620    #if defined SUPPORT_UTF && defined SUPPORT_PCRE32
4621        /* If we're requsted to test UTF-32 masking of high bits, change the data
4622        string to have high bits set, unless the string is invalid UTF-32.
4623        Since the JIT doesn't support this yet, only do it when not JITing. */
4624        if (use_utf && mask_utf32 && (study_options & PCRE_STUDY_ALLJIT) == 0 &&
4625            valid_utf32((pcre_uint32 *)dbuffer, len))
4626          {
4627          for (q32 = (pcre_uint32 *)dbuffer; *q32; q32++)
4628            *q32 |= ~(pcre_uint32)UTF32_MASK;
4629    
4630          options |= PCRE_NO_UTF32_CHECK;
4631          }
4632    #endif
4633    
4634      /* Move the data to the end of the buffer so that a read over the end of      /* Move the data to the end of the buffer so that a read over the end of
4635      the buffer will be seen by valgrind, even if it doesn't cause a crash. If      the buffer will be seen by valgrind, even if it doesn't cause a crash. If
4636      we are using the POSIX interface, we must include the terminating zero. */      we are using the POSIX interface, we must include the terminating zero. */
4637    
4638        bptr = dbuffer;
4639    
4640  #if !defined NOPOSIX  #if !defined NOPOSIX
4641      if (posix || do_posix)      if (posix || do_posix)
4642        {        {
4643        memmove(bptr + buffer_size - len - 1, bptr, len + 1);        memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
4644        bptr += buffer_size - len - 1;        bptr += dbuffer_size - len - 1;
4645        }        }
4646      else      else
4647  #endif  #endif
4648        {        {
4649        memmove(bptr + buffer_size - len, bptr, len);        bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
       bptr += buffer_size - len;  
4650        }        }
4651    
4652      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
# Line 3794  while (!done) Line 4677  while (!done)
4677          (void)regerror(rc, &preg, (char *)buffer, buffer_size);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
4678          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
4679          }          }
4680        else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)        else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
               != 0)  
4681          {          {
4682          fprintf(outfile, "Matched with REG_NOSUB\n");          fprintf(outfile, "Matched with REG_NOSUB\n");
4683          }          }
# Line 3828  while (!done) Line 4710  while (!done)
4710    
4711      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
4712    
4713  #ifdef SUPPORT_PCRE16      /* Ensure that there is a JIT callback if we want to verify that JIT was
4714      if (use_pcre16)      actually used. If jit_stack == NULL, no stack has yet been assigned. */
       {  
       len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);  
       switch(len)  
         {  
         case -1:  
         fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "  
           "converted to UTF-16\n");  
         goto NEXT_DATA;  
4715    
4716          case -2:      if (verify_jit && jit_stack == NULL && extra != NULL)
4717          fprintf(outfile, "**Failed: character value greater than 0x10ffff "         { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
           "cannot be converted to UTF-16\n");  
         goto NEXT_DATA;  
   
         case -3:  
         fprintf(outfile, "**Failed: character value greater than 0xffff "  
           "cannot be converted to 16-bit in non-UTF mode\n");  
         goto NEXT_DATA;  
   
         default:  
         break;  
         }  
       bptr = (pcre_uint8 *)buffer16;  
       }  
 #endif  
4718    
4719      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
4720        {        {
4721        markptr = NULL;        markptr = NULL;
4722          jit_was_used = FALSE;
4723    
4724        if (timeitm > 0)        if (timeitm > 0)
4725          {          {
# Line 3869  while (!done) Line 4730  while (!done)
4730  #if !defined NODFA  #if !defined NODFA
4731          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
4732            {            {
4733            int workspace[1000];            if ((options & PCRE_DFA_RESTART) != 0)
4734                {
4735                fprintf(outfile, "Timing DFA restarts is not supported\n");
4736                break;
4737                }
4738              if (dfa_workspace == NULL)
4739                dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4740            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
4741              {              {
4742              PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,              PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4743                (options | g_notempty), use_offsets, use_size_offsets, workspace,                (options | g_notempty), use_offsets, use_size_offsets,
4744                (sizeof(workspace)/sizeof(int)));                dfa_workspace, DFA_WS_DIMENSION);
4745              }              }
4746            }            }
4747          else          else
# Line 3900  while (!done) Line 4767  while (!done)
4767    
4768        if (find_match_limit)        if (find_match_limit)
4769          {          {
4770          if (extra == NULL)          if (extra != NULL) { PCRE_FREE_STUDY(extra); }
4771            {          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4772            extra = (pcre_extra *)malloc(sizeof(pcre_extra));          extra->flags = 0;
           extra->flags = 0;  
           }  
         else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;  
4773    
4774          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
4775            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
# Line 3940  while (!done) Line 4804  while (!done)
4804  #if !defined NODFA  #if !defined NODFA
4805        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
4806          {          {
4807          int workspace[1000];          if (dfa_workspace == NULL)
4808              dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4809            if (dfa_matched++ == 0)
4810              dfa_workspace[0] = -1;  /* To catch bad restart */
4811          PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,          PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4812            (options | g_notempty), use_offsets, use_size_offsets, workspace,            (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
4813            (sizeof(workspace)/sizeof(int)));            DFA_WS_DIMENSION);
4814          if (count == 0)          if (count == 0)
4815            {            {
4816            fprintf(outfile, "Matched, but too many subsidiary matches\n");            fprintf(outfile, "Matched, but too many subsidiary matches\n");
# Line 4020  while (!done) Line 4887  while (!done)
4887              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
4888              PCHARSV(bptr, use_offsets[i],              PCHARSV(bptr, use_offsets[i],
4889                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
4890                if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4891              fprintf(outfile, "\n");              fprintf(outfile, "\n");
4892              if (do_showcaprest || (i == 0 && do_showrest))              if (do_showcaprest || (i == 0 && do_showrest))
4893                {                {
# Line 4063  while (!done) Line 4931  while (!done)
4931            int rc;            int rc;
4932            char copybuffer[256];            char copybuffer[256];
4933    
4934            if (use_pcre16)  #ifdef SUPPORT_PCRE32
4935              if (pcre_mode == PCRE32_MODE)
4936                {
4937                if (*(pcre_uint32 *)cnptr == 0) break;
4938                }
4939    #endif
4940    #ifdef SUPPORT_PCRE16
4941              if (pcre_mode == PCRE16_MODE)
4942              {              {
4943              if (*(pcre_uint16 *)cnptr == 0) break;              if (*(pcre_uint16 *)cnptr == 0) break;
4944              }              }
4945            else  #endif
4946    #ifdef SUPPORT_PCRE8
4947              if (pcre_mode == PCRE8_MODE)
4948              {              {
4949              if (*(pcre_uint8 *)cnptr == 0) break;              if (*(pcre_uint8 *)cnptr == 0) break;
4950              }              }
4951    #endif
4952    
4953            PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,            PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4954              cnptr, copybuffer, sizeof(copybuffer));              cnptr, copybuffer, sizeof(copybuffer));
# Line 4118  while (!done) Line 4996  while (!done)
4996            int rc;            int rc;
4997            const char *substring;            const char *substring;
4998    
4999            if (use_pcre16)  #ifdef SUPPORT_PCRE32
5000              if (pcre_mode == PCRE32_MODE)
5001                {
5002                if (*(pcre_uint32 *)gnptr == 0) break;
5003                }
5004    #endif
5005    #ifdef SUPPORT_PCRE16
5006              if (pcre_mode == PCRE16_MODE)
5007              {              {
5008              if (*(pcre_uint16 *)gnptr == 0) break;              if (*(pcre_uint16 *)gnptr == 0) break;
5009              }              }
5010            else  #endif
5011    #ifdef SUPPORT_PCRE8
5012              if (pcre_mode == PCRE8_MODE)
5013              {              {
5014              if (*(pcre_uint8 *)gnptr == 0) break;              if (*(pcre_uint8 *)gnptr == 0) break;
5015              }              }
5016    #endif
5017    
5018            PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,            PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5019              gnptr, &substring);              gnptr, &substring);
# Line 4186  while (!done) Line 5074  while (!done)
5074            PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],            PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
5075              outfile);              outfile);
5076            }            }
5077            if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5078          fprintf(outfile, "\n");          fprintf(outfile, "\n");
5079          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
5080          }          }
# Line 4211  while (!done) Line 5100  while (!done)
5100          if (g_notempty != 0)          if (g_notempty != 0)
5101            {            {
5102            int onechar = 1;            int onechar = 1;
5103            unsigned int obits = ((REAL_PCRE *)re)->options;            unsigned int obits = REAL_PCRE_OPTIONS(re);
5104            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
5105            if ((obits & PCRE_NEWLINE_BITS) == 0)            if ((obits & PCRE_NEWLINE_BITS) == 0)
5106              {              {
# Line 4229  while (!done) Line 5118  while (!done)
5118                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
5119                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
5120                &&                &&
5121                start_offset < len - 1 &&                start_offset < len - 1 && (
5122  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16  #ifdef SUPPORT_PCRE8
5123                (use_pcre16?                (pcre_mode == PCRE8_MODE &&
5124                     ((PCRE_SPTR16)bptr)[start_offset] == '\r'                 bptr[start_offset] == '\r' &&
5125                  && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'                 bptr[start_offset + 1] == '\n') ||
5126                :  #endif
5127                     bptr[start_offset] == '\r'  #ifdef SUPPORT_PCRE16
5128                  && bptr[start_offset + 1] == '\n')                (pcre_mode == PCRE16_MODE &&
5129  #elif defined SUPPORT_PCRE16                 ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
5130                   ((PCRE_SPTR16)bptr)[start_offset] == '\r'                 ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
5131                && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'  #endif
5132  #else  #ifdef SUPPORT_PCRE32
5133                   bptr[start_offset] == '\r'                (pcre_mode == PCRE32_MODE &&
5134                && bptr[start_offset + 1] == '\n'                 ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
5135                   ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
5136  #endif  #endif
5137                )                0))
5138              onechar++;              onechar++;
5139            else if (use_utf)            else if (use_utf)
5140              {              {
# Line 4265  while (!done) Line 5155  while (!done)
5155                {                {
5156                if (markptr == NULL)                if (markptr == NULL)
5157                  {                  {
5158                  fprintf(outfile, "No match\n");                  fprintf(outfile, "No match");
5159                  }                  }
5160                else                else
5161                  {                  {
5162                  fprintf(outfile, "No match, mark = ");                  fprintf(outfile, "No match, mark = ");
5163                  PCHARSV(markptr, 0, -1, outfile);                  PCHARSV(markptr, 0, -1, outfile);
                 putc('\n', outfile);  
5164                  }                  }
5165                  if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5166                  putc('\n', outfile);
5167                }                }
5168              break;              break;
5169    
5170              case PCRE_ERROR_BADUTF8:              case PCRE_ERROR_BADUTF8:
5171              case PCRE_ERROR_SHORTUTF8:              case PCRE_ERROR_SHORTUTF8:
5172              fprintf(outfile, "Error %d (%s UTF-%s string)", count,              fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5173                (count == PCRE_ERROR_BADUTF8)? "bad" : "short",                (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5174                use_pcre16? "16" : "8");                8 * CHAR_SIZE);
5175              if (use_size_offsets >= 2)              if (use_size_offsets >= 2)
5176                fprintf(outfile, " offset=%d reason=%d", use_offsets[0],                fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5177                  use_offsets[1]);                  use_offsets[1]);
# Line 4288  while (!done) Line 5179  while (!done)
5179              break;              break;
5180    
5181              case PCRE_ERROR_BADUTF8_OFFSET:              case PCRE_ERROR_BADUTF8_OFFSET:
5182              fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,              fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5183                use_pcre16? "16" : "8");                8 * CHAR_SIZE);
5184              break;              break;
5185    
5186              default:              default:
5187              if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))              if (count < 0 &&
5188                    (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5189                fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);                fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5190              else              else
5191                fprintf(outfile, "Error %d (Unexpected value)\n", count);                fprintf(outfile, "Error %d (Unexpected value)\n", count);
# Line 4378  free(offsets); Line 5270  free(offsets);
5270  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
5271  if (buffer16 != NULL) free(buffer16);  if (buffer16 != NULL) free(buffer16);
5272  #endif  #endif
5273    #ifdef SUPPORT_PCRE32
5274    if (buffer32 != NULL) free(buffer32);
5275    #endif
5276    
5277    #if !defined NODFA
5278    if (dfa_workspace != NULL)
5279      free(dfa_workspace);
5280    #endif
5281    
5282  return yield;  return yield;
5283  }  }

Legend:
Removed from v.895  
changed lines
  Added in v.1117

  ViewVC Help
Powered by ViewVC 1.1.5