/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 895 by ph10, Fri Jan 20 12:12:03 2012 UTC revision 1101 by chpe, Tue Oct 16 15:56:30 2012 UTC
# Line 46  pcre_internal.h that depend on COMPILE_P Line 46  pcre_internal.h that depend on COMPILE_P
46  however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls  however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47  only supported library functions. */  only supported library functions. */
48    
   
49  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
50  #include "config.h"  #include "config.h"
51  #endif  #endif
# Line 59  only supported library functions. */ Line 58  only supported library functions. */
58  #include <locale.h>  #include <locale.h>
59  #include <errno.h>  #include <errno.h>
60    
61  #ifdef SUPPORT_LIBREADLINE  /* Both libreadline and libedit are optionally supported. The user-supplied
62    original patch uses readline/readline.h for libedit, but in at least one system
63    it is installed as editline/readline.h, so the configuration code now looks for
64    that first, falling back to readline/readline.h. */
65    
66    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67  #ifdef HAVE_UNISTD_H  #ifdef HAVE_UNISTD_H
68  #include <unistd.h>  #include <unistd.h>
69  #endif  #endif
70    #if defined(SUPPORT_LIBREADLINE)
71  #include <readline/readline.h>  #include <readline/readline.h>
72  #include <readline/history.h>  #include <readline/history.h>
73    #else
74    #if defined(HAVE_EDITLINE_READLINE_H)
75    #include <editline/readline.h>
76    #else
77    #include <readline/readline.h>
78    #endif
79    #endif
80  #endif  #endif
   
81    
82  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
83  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 101  input mode under Windows. */ Line 112  input mode under Windows. */
112  #else  #else
113  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
114  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
115    #if defined NATIVE_ZOS         /* z/OS uses non-binary I/O */
116    #define INPUT_MODE   "r"
117    #define OUTPUT_MODE  "w"
118    #else
119  #define INPUT_MODE   "rb"  #define INPUT_MODE   "rb"
120  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
121  #endif  #endif
122    #endif
123    
124    #define PRIV(name) name
125    
126  /* We have to include pcre_internal.h because we need the internal info for  /* We have to include pcre_internal.h because we need the internal info for
127  displaying the results of pcre_study() and we also need to know about the  displaying the results of pcre_study() and we also need to know about the
# Line 117  appropriately for an application, not fo Line 134  appropriately for an application, not fo
134    
135  #include "pcre.h"  #include "pcre.h"
136    
137  #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8  #if defined SUPPORT_PCRE32 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16
138    /* Configure internal macros to 32 bit mode. */
139    #define COMPILE_PCRE32
140    #endif
141    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE32
142  /* Configure internal macros to 16 bit mode. */  /* Configure internal macros to 16 bit mode. */
143  #define COMPILE_PCRE16  #define COMPILE_PCRE16
144  #endif  #endif
145    #if defined SUPPORT_PCRE8 && !defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE32
146    /* Configure internal macros to 16 bit mode. */
147    #define COMPILE_PCRE8
148    #endif
149    
150  #include "pcre_internal.h"  #include "pcre_internal.h"
151    
152  /* The pcre_printint() function, which prints the internal form of a compiled  /* The pcre_printint() function, which prints the internal form of a compiled
153  regex, is held in a separate file so that (a) it can be compiled in either  regex, is held in a separate file so that (a) it can be compiled in either
154  8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c  8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
155  when that is compiled in debug mode. */  when that is compiled in debug mode. */
156    
157  #ifdef SUPPORT_PCRE8  #ifdef SUPPORT_PCRE8
# Line 135  void pcre_printint(pcre *external_re, FI Line 160  void pcre_printint(pcre *external_re, FI
160  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
161  void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);  void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
162  #endif  #endif
163    #ifdef SUPPORT_PCRE32
164    void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
165    #endif
166    
167  /* We need access to some of the data tables that PCRE uses. So as not to have  /* We need access to some of the data tables that PCRE uses. So as not to have
168  to keep two copies, we include the source file here, changing the names of the  to keep two copies, we include the source files here, changing the names of the
169  external symbols to prevent clashes. */  external symbols to prevent clashes. */
170    
171  #define PCRE_INCLUDED  #define PCRE_INCLUDED
 #undef PRIV  
 #define PRIV(name) name  
172    
173  #include "pcre_tables.c"  #include "pcre_tables.c"
174    #include "pcre_ucd.c"
175    
176  /* The definition of the macro PRINTABLE, which determines whether to print an  /* The definition of the macro PRINTABLE, which determines whether to print an
177  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
# Line 160  that differ in their output from isprint Line 187  that differ in their output from isprint
187    
188  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
189    
190  /* Posix support is disabled in 16 bit only mode. */  /* Posix support is disabled in 16 or 32 bit only mode. */
191  #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX  #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
192  #define NOPOSIX  #define NOPOSIX
193  #endif  #endif
194    
# Line 184  automatically cut out the UTF support if Line 211  automatically cut out the UTF support if
211  #endif  #endif
212  #endif  #endif
213    
214  /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros  /* To make the code a bit tidier for 8/16/32-bit support, we define macros
215  for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called  for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
216  only from one place and is handled differently). I couldn't dream up any way of  only from one place and is handled differently). I couldn't dream up any way of
217  using a single macro to do this in a generic way, because of the many different  using a single macro to do this in a generic way, because of the many different
# Line 206  argument, the casting might be incorrect Line 233  argument, the casting might be incorrect
233  #define PCHARSV8(p, offset, len, f) \  #define PCHARSV8(p, offset, len, f) \
234    (void)pchars((pcre_uint8 *)(p) + offset, len, f)    (void)pchars((pcre_uint8 *)(p) + offset, len, f)
235    
236  #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \  #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
237    p = read_capture_name8(p, cn8, re)    p = read_capture_name8(p, cn8, re)
238    
239  #define STRLEN8(p) ((int)strlen((char *)p))  #define STRLEN8(p) ((int)strlen((char *)p))
# Line 288  argument, the casting might be incorrect Line 315  argument, the casting might be incorrect
315  #define PCHARSV16(p, offset, len, f) \  #define PCHARSV16(p, offset, len, f) \
316    (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)    (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
317    
318  #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \  #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
319    p = read_capture_name16(p, cn16, re)    p = read_capture_name16(p, cn16, re)
320    
321  #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))  #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
# Line 367  argument, the casting might be incorrect Line 394  argument, the casting might be incorrect
394    
395  #endif /* SUPPORT_PCRE16 */  #endif /* SUPPORT_PCRE16 */
396    
397    /* -----------------------------------------------------------*/
398    
399    #ifdef SUPPORT_PCRE32
400    
401    #define PCHARS32(lv, p, offset, len, f) \
402      lv = pchars32((PCRE_SPTR32)(p) + offset, len, f)
403    
404    #define PCHARSV32(p, offset, len, f) \
405      (void)pchars32((PCRE_SPTR32)(p) + offset, len, f)
406    
407    #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408      p = read_capture_name32(p, cn32, re)
409    
410    #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
411    
412    #define SET_PCRE_CALLOUT32(callout) \
413      pcre32_callout = (int (*)(pcre32_callout_block *))callout
414    
415    #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
416      pcre32_assign_jit_stack((pcre32_extra *)extra, \
417        (pcre32_jit_callback)callback, userdata)
418    
419    #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
420      re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
421        tables)
422    
423    #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
424        namesptr, cbuffer, size) \
425      rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
426        count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
427    
428    #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
429      rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
430        (PCRE_UCHAR32 *)cbuffer, size/2)
431    
432    #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
433        offsets, size_offsets, workspace, size_workspace) \
434      count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
435        (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
436        workspace, size_workspace)
437    
438    #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
439        offsets, size_offsets) \
440      count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
441        len, start_offset, options, offsets, size_offsets)
442    
443    #define PCRE_FREE_STUDY32(extra) \
444      pcre32_free_study((pcre32_extra *)extra)
445    
446    #define PCRE_FREE_SUBSTRING32(substring) \
447      pcre32_free_substring((PCRE_SPTR32)substring)
448    
449    #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
450      pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
451    
452    #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
453        getnamesptr, subsptr) \
454      rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
455        count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
456    
457    #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
458      n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
459    
460    #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
461      rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
462        (PCRE_SPTR32 *)(void*)subsptr)
463    
464    #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
465      rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
466        (PCRE_SPTR32 **)(void*)listptr)
467    
468    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
469      rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
470        tables)
471    
472    #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
473      pcre32_printint(re, outfile, debug_lengths)
474    
475    #define PCRE_STUDY32(extra, re, options, error) \
476      extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
477    
478    #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
479      (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
480    
481    #define PCRE_JIT_STACK_FREE32(stack) \
482      pcre32_jit_stack_free((pcre32_jit_stack *)stack)
483    
484    #endif /* SUPPORT_PCRE32 */
485    
486    
487  /* ----- Both modes are supported; a runtime test is needed, except for  /* ----- Both modes are supported; a runtime test is needed, except for
488  pcre_config(), and the JIT stack functions, when it doesn't matter which  pcre_config(), and the JIT stack functions, when it doesn't matter which
489  version is called. ----- */  version is called. ----- */
490    
491  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16  enum {
492      PCRE8_MODE,
493      PCRE16_MODE,
494      PCRE32_MODE
495    };
496    
497    #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + defined (SUPPORT_PCRE32)) >= 2
498    
499  #define CHAR_SIZE (use_pcre16? 2:1)  #define CHAR_SIZE (1 << pcre_mode)
500    
501  #define PCHARS(lv, p, offset, len, f) \  #define PCHARS(lv, p, offset, len, f) \
502    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
503        PCHARS32(lv, p, offset, len, f); \
504      else if (pcre_mode == PCRE16_MODE) \
505      PCHARS16(lv, p, offset, len, f); \      PCHARS16(lv, p, offset, len, f); \
506    else \    else \
507      PCHARS8(lv, p, offset, len, f)      PCHARS8(lv, p, offset, len, f)
508    
509  #define PCHARSV(p, offset, len, f) \  #define PCHARSV(p, offset, len, f) \
510    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
511        PCHARSV32(p, offset, len, f); \
512      else if (pcre_mode == PCRE16_MODE) \
513      PCHARSV16(p, offset, len, f); \      PCHARSV16(p, offset, len, f); \
514    else \    else \
515      PCHARSV8(p, offset, len, f)      PCHARSV8(p, offset, len, f)
516    
517  #define READ_CAPTURE_NAME(p, cn8, cn16, re) \  #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
518    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
519      READ_CAPTURE_NAME16(p, cn8, cn16, re); \      READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
520      else if (pcre_mode == PCRE16_MODE) \
521        READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
522    else \    else \
523      READ_CAPTURE_NAME8(p, cn8, cn16, re)      READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
524    
525  #define SET_PCRE_CALLOUT(callout) \  #define SET_PCRE_CALLOUT(callout) \
526    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
527        SET_PCRE_CALLOUT32(callout); \
528      else if (pcre_mode == PCRE16_MODE) \
529      SET_PCRE_CALLOUT16(callout); \      SET_PCRE_CALLOUT16(callout); \
530    else \    else \
531      SET_PCRE_CALLOUT8(callout)      SET_PCRE_CALLOUT8(callout)
532    
533  #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))  #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
534    
535  #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \  #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
536    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
537        PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
538      else if (pcre_mode == PCRE16_MODE) \
539      PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \      PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
540    else \    else \
541      PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)      PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
542    
543  #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \  #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
544    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
545        PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
546      else if (pcre_mode == PCRE16_MODE) \
547      PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \      PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
548    else \    else \
549      PCRE_COMPILE8(re, pat, options, error, erroffset, tables)      PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
# Line 418  version is called. ----- */ Line 552  version is called. ----- */
552    
553  #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \  #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
554      namesptr, cbuffer, size) \      namesptr, cbuffer, size) \
555    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
556        PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
557          namesptr, cbuffer, size); \
558      else if (pcre_mode == PCRE16_MODE) \
559      PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \      PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
560        namesptr, cbuffer, size); \        namesptr, cbuffer, size); \
561    else \    else \
# Line 426  version is called. ----- */ Line 563  version is called. ----- */
563        namesptr, cbuffer, size)        namesptr, cbuffer, size)
564    
565  #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \  #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
566    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
567        PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
568      else if (pcre_mode == PCRE16_MODE) \
569      PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \      PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
570    else \    else \
571      PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)      PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
572    
573  #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \  #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
574      offsets, size_offsets, workspace, size_workspace) \      offsets, size_offsets, workspace, size_workspace) \
575    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
576        PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
577          offsets, size_offsets, workspace, size_workspace); \
578      else if (pcre_mode == PCRE16_MODE) \
579      PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \      PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
580        offsets, size_offsets, workspace, size_workspace); \        offsets, size_offsets, workspace, size_workspace); \
581    else \    else \
# Line 442  version is called. ----- */ Line 584  version is called. ----- */
584    
585  #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \  #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
586      offsets, size_offsets) \      offsets, size_offsets) \
587    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
588        PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
589          offsets, size_offsets); \
590      else if (pcre_mode == PCRE16_MODE) \
591      PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \      PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
592        offsets, size_offsets); \        offsets, size_offsets); \
593    else \    else \
# Line 450  version is called. ----- */ Line 595  version is called. ----- */
595        offsets, size_offsets)        offsets, size_offsets)
596    
597  #define PCRE_FREE_STUDY(extra) \  #define PCRE_FREE_STUDY(extra) \
598    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
599        PCRE_FREE_STUDY32(extra); \
600      else if (pcre_mode == PCRE16_MODE) \
601      PCRE_FREE_STUDY16(extra); \      PCRE_FREE_STUDY16(extra); \
602    else \    else \
603      PCRE_FREE_STUDY8(extra)      PCRE_FREE_STUDY8(extra)
604    
605  #define PCRE_FREE_SUBSTRING(substring) \  #define PCRE_FREE_SUBSTRING(substring) \
606    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
607        PCRE_FREE_SUBSTRING32(substring); \
608      else if (pcre_mode == PCRE16_MODE) \
609      PCRE_FREE_SUBSTRING16(substring); \      PCRE_FREE_SUBSTRING16(substring); \
610    else \    else \
611      PCRE_FREE_SUBSTRING8(substring)      PCRE_FREE_SUBSTRING8(substring)
612    
613  #define PCRE_FREE_SUBSTRING_LIST(listptr) \  #define PCRE_FREE_SUBSTRING_LIST(listptr) \
614    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
615        PCRE_FREE_SUBSTRING_LIST32(listptr); \
616      else if (pcre_mode == PCRE16_MODE) \
617      PCRE_FREE_SUBSTRING_LIST16(listptr); \      PCRE_FREE_SUBSTRING_LIST16(listptr); \
618    else \    else \
619      PCRE_FREE_SUBSTRING_LIST8(listptr)      PCRE_FREE_SUBSTRING_LIST8(listptr)
620    
621  #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \  #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
622      getnamesptr, subsptr) \      getnamesptr, subsptr) \
623    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
624        PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
625          getnamesptr, subsptr); \
626      else if (pcre_mode == PCRE16_MODE) \
627      PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \      PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
628        getnamesptr, subsptr); \        getnamesptr, subsptr); \
629    else \    else \
# Line 477  version is called. ----- */ Line 631  version is called. ----- */
631        getnamesptr, subsptr)        getnamesptr, subsptr)
632    
633  #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \  #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
634    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
635        PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
636      else if (pcre_mode == PCRE16_MODE) \
637      PCRE_GET_STRINGNUMBER16(n, rc, ptr); \      PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
638    else \    else \
639      PCRE_GET_STRINGNUMBER8(n, rc, ptr)      PCRE_GET_STRINGNUMBER8(n, rc, ptr)
640    
641  #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \  #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
642    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
643        PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
644      else if (pcre_mode == PCRE16_MODE) \
645      PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \      PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
646    else \    else \
647      PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)      PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
648    
649  #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \  #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
650    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
651        PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
652      else if (pcre_mode == PCRE16_MODE) \
653      PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \      PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
654    else \    else \
655      PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)      PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
656    
657  #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \  #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
658    (use_pcre16 ? \    (pcre_mode == PCRE32_MODE ? \
659       PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \       PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
660      :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))      : pcre_mode == PCRE16_MODE ? \
661          PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
662          : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
663    
664  #define PCRE_JIT_STACK_FREE(stack) \  #define PCRE_JIT_STACK_FREE(stack) \
665    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
666        PCRE_JIT_STACK_FREE32(stack); \
667      else if (pcre_mode == PCRE16_MODE) \
668      PCRE_JIT_STACK_FREE16(stack); \      PCRE_JIT_STACK_FREE16(stack); \
669    else \    else \
670      PCRE_JIT_STACK_FREE8(stack)      PCRE_JIT_STACK_FREE8(stack)
671    
672  #define PCRE_MAKETABLES \  #define PCRE_MAKETABLES \
673    (use_pcre16? pcre16_maketables() : pcre_maketables())    (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
674    
675  #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \  #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
676    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
677        PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
678      else if (pcre_mode == PCRE16_MODE) \
679      PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \      PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
680    else \    else \
681      PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)      PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
682    
683  #define PCRE_PRINTINT(re, outfile, debug_lengths) \  #define PCRE_PRINTINT(re, outfile, debug_lengths) \
684    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
685        PCRE_PRINTINT32(re, outfile, debug_lengths); \
686      else if (pcre_mode == PCRE16_MODE) \
687      PCRE_PRINTINT16(re, outfile, debug_lengths); \      PCRE_PRINTINT16(re, outfile, debug_lengths); \
688    else \    else \
689      PCRE_PRINTINT8(re, outfile, debug_lengths)      PCRE_PRINTINT8(re, outfile, debug_lengths)
690    
691  #define PCRE_STUDY(extra, re, options, error) \  #define PCRE_STUDY(extra, re, options, error) \
692    if (use_pcre16) \    if (pcre_mode == PCRE32_MODE) \
693        PCRE_STUDY32(extra, re, options, error); \
694      else if (pcre_mode == PCRE16_MODE) \
695      PCRE_STUDY16(extra, re, options, error); \      PCRE_STUDY16(extra, re, options, error); \
696    else \    else \
697      PCRE_STUDY8(extra, re, options, error)      PCRE_STUDY8(extra, re, options, error)
# Line 558  version is called. ----- */ Line 728  version is called. ----- */
728    
729  /* ----- Only 16-bit mode is supported ----- */  /* ----- Only 16-bit mode is supported ----- */
730    
731  #else  #elif defined SUPPORT_PCRE16
732  #define CHAR_SIZE                 2  #define CHAR_SIZE                 2
733  #define PCHARS                    PCHARS16  #define PCHARS                    PCHARS16
734  #define PCHARSV                   PCHARSV16  #define PCHARSV                   PCHARSV16
# Line 585  version is called. ----- */ Line 755  version is called. ----- */
755  #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16  #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
756  #define PCRE_PRINTINT             PCRE_PRINTINT16  #define PCRE_PRINTINT             PCRE_PRINTINT16
757  #define PCRE_STUDY                PCRE_STUDY16  #define PCRE_STUDY                PCRE_STUDY16
758    
759    /* ----- Only 32-bit mode is supported ----- */
760    
761    #elif defined SUPPORT_PCRE32
762    #define CHAR_SIZE                 4
763    #define PCHARS                    PCHARS32
764    #define PCHARSV                   PCHARSV32
765    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME32
766    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT32
767    #define STRLEN                    STRLEN32
768    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK32
769    #define PCRE_COMPILE              PCRE_COMPILE32
770    #define PCRE_CONFIG               pcre32_config
771    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
772    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING32
773    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC32
774    #define PCRE_EXEC                 PCRE_EXEC32
775    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY32
776    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING32
777    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST32
778    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING32
779    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER32
780    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING32
781    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST32
782    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC32
783    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE32
784    #define PCRE_MAKETABLES           pcre32_maketables()
785    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
786    #define PCRE_PRINTINT             PCRE_PRINTINT32
787    #define PCRE_STUDY                PCRE_STUDY32
788    
789  #endif  #endif
790    
791  /* ----- End of mode-specific function call macros ----- */  /* ----- End of mode-specific function call macros ----- */
# Line 600  version is called. ----- */ Line 801  version is called. ----- */
801  #endif  #endif
802  #endif  #endif
803    
804    #if !defined NODFA
805    #define DFA_WS_DIMENSION 1000
806    #endif
807    
808  /* This is the default loop count for timing. */  /* This is the default loop count for timing. */
809    
810  #define LOOPREPEAT 500000  #define LOOPREPEAT 500000
# Line 614  static int callout_fail_count; Line 819  static int callout_fail_count;
819  static int callout_fail_id;  static int callout_fail_id;
820  static int debug_lengths;  static int debug_lengths;
821  static int first_callout;  static int first_callout;
822    static int jit_was_used;
823  static int locale_set = 0;  static int locale_set = 0;
824  static int show_malloc;  static int show_malloc;
825  static int use_utf;  static int use_utf;
# Line 625  static const unsigned char *last_callout Line 831  static const unsigned char *last_callout
831    
832  static int buffer_size = 50000;  static int buffer_size = 50000;
833  static pcre_uint8 *buffer = NULL;  static pcre_uint8 *buffer = NULL;
 static pcre_uint8 *dbuffer = NULL;  
834  static pcre_uint8 *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
835    
836  /* Another buffer is needed translation to 16-bit character strings. It will  /* Another buffer is needed translation to 16/32-bit character strings. It will
837  obtained and extended as required. */  obtained and extended as required. */
838    
839  #ifdef SUPPORT_PCRE16  #if defined SUPPORT_PCRE8 && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32)
 static int buffer16_size = 0;  
 static pcre_uint16 *buffer16 = NULL;  
   
 #ifdef SUPPORT_PCRE8  
840    
841  /* We need the table of operator lengths that is used for 16-bit compiling, in  /* We need the table of operator lengths that is used for 16/32-bit compiling, in
842  order to swap bytes in a pattern for saving/reloading testing. Luckily, the  order to swap bytes in a pattern for saving/reloading testing. Luckily, the
843  data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted  data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
844  appropriately for the 16-bit world. Just as a safety check, make sure that  appropriately for the 16/32-bit world. Just as a safety check, make sure that
845  COMPILE_PCRE16 is *not* set. */  COMPILE_PCRE[16|32] is *not* set. */
846    
847  #ifdef COMPILE_PCRE16  #ifdef COMPILE_PCRE16
848  #error COMPILE_PCRE16 must not be set when compiling pcretest.c  #error COMPILE_PCRE16 must not be set when compiling pcretest.c
849  #endif  #endif
850    
851    #ifdef COMPILE_PCRE32
852    #error COMPILE_PCRE32 must not be set when compiling pcretest.c
853    #endif
854    
855  #if LINK_SIZE == 2  #if LINK_SIZE == 2
856  #undef LINK_SIZE  #undef LINK_SIZE
857  #define LINK_SIZE 1  #define LINK_SIZE 1
# Line 660  COMPILE_PCRE16 is *not* set. */ Line 865  COMPILE_PCRE16 is *not* set. */
865  #undef IMM2_SIZE  #undef IMM2_SIZE
866  #define IMM2_SIZE 1  #define IMM2_SIZE 1
867    
868  #endif /* SUPPORT_PCRE8 */  #endif /* SUPPORT_PCRE8 && (SUPPORT_PCRE16 || SUPPORT_PCRE32) */
869    
870    #ifdef SUPPORT_PCRE16
871    static int buffer16_size = 0;
872    static pcre_uint16 *buffer16 = NULL;
873  static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };  static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
874  #endif  /* SUPPORT_PCRE16 */  #endif  /* SUPPORT_PCRE16 */
875    
876  /* If we have 8-bit support, default use_pcre16 to false; if there is also  #ifdef SUPPORT_PCRE32
877  16-bit support, it can be changed by an option. If there is no 8-bit support,  static int buffer32_size = 0;
878  there must be 16-bit support, so default it to 1. */  static pcre_uint32 *buffer32 = NULL;
879    static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
880    #endif  /* SUPPORT_PCRE32 */
881    
882    /* If we have 8-bit support, default to it; if there is also
883    16-or 32-bit support, it can be changed by an option. If there is no 8-bit support,
884    there must be 16-or 32-bit support, so default it to 1. */
885    
886  #ifdef SUPPORT_PCRE8  #if defined SUPPORT_PCRE8
887  static int use_pcre16 = 0;  static int pcre_mode = PCRE8_MODE;
888  #else  #elif defined SUPPORT_PCRE16
889  static int use_pcre16 = 1;  static int pcre_mode = PCRE16_MODE;
890    #elif defined SUPPORT_PCRE32
891    static int pcre_mode = PCRE32_MODE;
892  #endif  #endif
893    
894    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
895    
896    static int jit_study_bits[] =
897      {
898      PCRE_STUDY_JIT_COMPILE,
899      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
900      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
901      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
902      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
903      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
904      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
905        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
906    };
907    
908    #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
909      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
910    
911  /* Textual explanations for runtime error codes */  /* Textual explanations for runtime error codes */
912    
913  static const char *errtexts[] = {  static const char *errtexts[] = {
# Line 706  static const char *errtexts[] = { Line 939  static const char *errtexts[] = {
939    NULL,  /* SHORTUTF8/16 is handled specially */    NULL,  /* SHORTUTF8/16 is handled specially */
940    "nested recursion at the same subject position",    "nested recursion at the same subject position",
941    "JIT stack limit reached",    "JIT stack limit reached",
942    "pattern compiled in wrong mode: 8-bit/16-bit error"    "pattern compiled in wrong mode: 8-bit/16-bit error",
943      "pattern compiled with other endianness",
944      "invalid data in workspace for DFA restart"
945  };  };
946    
947    
# Line 1058  return sys_errlist[n]; Line 1293  return sys_errlist[n];
1293  #endif /* HAVE_STRERROR */  #endif /* HAVE_STRERROR */
1294    
1295    
1296    
1297    /*************************************************
1298    *       Print newline configuration              *
1299    *************************************************/
1300    
1301    /*
1302    Arguments:
1303      rc         the return code from PCRE_CONFIG_NEWLINE
1304      isc        TRUE if called from "-C newline"
1305    Returns:     nothing
1306    */
1307    
1308    static void
1309    print_newline_config(int rc, BOOL isc)
1310    {
1311    const char *s = NULL;
1312    if (!isc) printf("  Newline sequence is ");
1313    switch(rc)
1314      {
1315      case CHAR_CR: s = "CR"; break;
1316      case CHAR_LF: s = "LF"; break;
1317      case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1318      case -1: s = "ANY"; break;
1319      case -2: s = "ANYCRLF"; break;
1320    
1321      default:
1322      printf("a non-standard value: 0x%04x\n", rc);
1323      return;
1324      }
1325    
1326    printf("%s\n", s);
1327    }
1328    
1329    
1330    
1331  /*************************************************  /*************************************************
1332  *         JIT memory callback                    *  *         JIT memory callback                    *
1333  *************************************************/  *************************************************/
1334    
1335  static pcre_jit_stack* jit_callback(void *arg)  static pcre_jit_stack* jit_callback(void *arg)
1336  {  {
1337    jit_was_used = TRUE;
1338  return (pcre_jit_stack *)arg;  return (pcre_jit_stack *)arg;
1339  }  }
1340    
1341    
1342  #if !defined NOUTF || defined SUPPORT_PCRE16  #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1343  /*************************************************  /*************************************************
1344  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
1345  *************************************************/  *************************************************/
# Line 1085  Returns:      >  0 => the number of byte Line 1356  Returns:      >  0 => the number of byte
1356  */  */
1357    
1358  static int  static int
1359  utf82ord(pcre_uint8 *utf8bytes, int *vptr)  utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1360  {  {
1361  int c = *utf8bytes++;  pcre_uint32 c = *utf8bytes++;
1362  int d = c;  pcre_uint32 d = c;
1363  int i, j, s;  int i, j, s;
1364    
1365  for (i = -1; i < 6; i++)               /* i is number of additional bytes */  for (i = -1; i < 6; i++)               /* i is number of additional bytes */
# Line 1128  return i+1; Line 1399  return i+1;
1399    
1400    
1401    
1402  #if !defined NOUTF || defined SUPPORT_PCRE16  #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1403  /*************************************************  /*************************************************
1404  *       Convert character value to UTF-8         *  *       Convert character value to UTF-8         *
1405  *************************************************/  *************************************************/
# Line 1144  Returns:     number of characters placed Line 1415  Returns:     number of characters placed
1415  */  */
1416    
1417  static int  static int
1418  ord2utf8(int cvalue, pcre_uint8 *utf8bytes)  ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1419  {  {
1420  register int i, j;  register int i, j;
1421    if (cvalue > 0x7fffffffu)
1422      return -1;
1423  for (i = 0; i < utf8_table1_size; i++)  for (i = 0; i < utf8_table1_size; i++)
1424    if (cvalue <= utf8_table1[i]) break;    if (cvalue <= utf8_table1[i]) break;
1425  utf8bytes += i;  utf8bytes += i;
# Line 1217  if (!utf && !data) Line 1490  if (!utf && !data)
1490    
1491  else  else
1492    {    {
1493    int c = 0;    pcre_uint32 c = 0;
1494    while (len > 0)    while (len > 0)
1495      {      {
1496      int chlen = utf82ord(p, &c);      int chlen = utf82ord(p, &c);
# Line 1240  return pp - buffer16; Line 1513  return pp - buffer16;
1513  }  }
1514  #endif  #endif
1515    
1516    #ifdef SUPPORT_PCRE32
1517    /*************************************************
1518    *         Convert a string to 32-bit             *
1519    *************************************************/
1520    
1521    /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1522    8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1523    times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1524    in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1525    result is always left in buffer32.
1526    
1527    Note that this function does not object to surrogate values. This is
1528    deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1529    for the purpose of testing that they are correctly faulted.
1530    
1531    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1532    in UTF-8 so that values greater than 255 can be handled.
1533    
1534    Arguments:
1535      data       TRUE if converting a data line; FALSE for a regex
1536      p          points to a byte string
1537      utf        true if UTF-8 (to be converted to UTF-32)
1538      len        number of bytes in the string (excluding trailing zero)
1539    
1540    Returns:     number of 32-bit data items used (excluding trailing zero)
1541                 OR -1 if a UTF-8 string is malformed
1542                 OR -2 if a value > 0x10ffff is encountered
1543                 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1544    */
1545    
1546    static int
1547    to32(int data, pcre_uint8 *p, int utf, int len)
1548    {
1549    pcre_uint32 *pp;
1550    
1551    if (buffer32_size < 4*len + 4)
1552      {
1553      if (buffer32 != NULL) free(buffer32);
1554      buffer32_size = 4*len + 4;
1555      buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1556      if (buffer32 == NULL)
1557        {
1558        fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1559        exit(1);
1560        }
1561      }
1562    
1563    pp = buffer32;
1564    
1565    if (!utf && !data)
1566      {
1567      while (len-- > 0) *pp++ = *p++;
1568      }
1569    
1570    else
1571      {
1572      pcre_uint32 c = 0;
1573      while (len > 0)
1574        {
1575        int chlen = utf82ord(p, &c);
1576        if (chlen <= 0) return -1;
1577        if (utf)
1578          {
1579          if (c > 0x10ffff) return -2;
1580          if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1581          }
1582    
1583        p += chlen;
1584        len -= chlen;
1585        *pp++ = c;
1586        }
1587      }
1588    
1589    *pp = 0;
1590    return pp - buffer32;
1591    }
1592    #endif
1593    
1594  /*************************************************  /*************************************************
1595  *        Read or extend an input line            *  *        Read or extend an input line            *
# Line 1271  pcre_uint8 *here = start; Line 1621  pcre_uint8 *here = start;
1621    
1622  for (;;)  for (;;)
1623    {    {
1624    int rlen = (int)(buffer_size - (here - buffer));    size_t rlen = (size_t)(buffer_size - (here - buffer));
1625    
1626    if (rlen > 1000)    if (rlen > 1000)
1627      {      {
1628      int dlen;      int dlen;
1629    
1630      /* If libreadline support is required, use readline() to read a line if the      /* If libreadline or libedit support is required, use readline() to read a
1631      input is a terminal. Note that readline() removes the trailing newline, so      line if the input is a terminal. Note that readline() removes the trailing
1632      we must put it back again, to be compatible with fgets(). */      newline, so we must put it back again, to be compatible with fgets(). */
1633    
1634  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1635      if (isatty(fileno(f)))      if (isatty(fileno(f)))
1636        {        {
1637        size_t len;        size_t len;
# Line 1315  for (;;) Line 1665  for (;;)
1665      {      {
1666      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1667      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
     pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);  
1668      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1669    
1670      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_pbuffer == NULL)
1671        {        {
1672        fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);        fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1673        exit(1);        exit(1);
# Line 1333  for (;;) Line 1682  for (;;)
1682      here = new_buffer + (here - buffer);      here = new_buffer + (here - buffer);
1683    
1684      free(buffer);      free(buffer);
     free(dbuffer);  
1685      free(pbuffer);      free(pbuffer);
1686    
1687      buffer = new_buffer;      buffer = new_buffer;
     dbuffer = new_dbuffer;  
1688      pbuffer = new_pbuffer;      pbuffer = new_pbuffer;
1689      }      }
1690    }    }
# Line 1380  return(result); Line 1727  return(result);
1727    
1728  /* Print a single character either literally, or as a hex escape. */  /* Print a single character either literally, or as a hex escape. */
1729    
1730  static int pchar(int c, FILE *f)  static int pchar(pcre_uint32 c, FILE *f)
1731  {  {
1732    int n;
1733  if (PRINTOK(c))  if (PRINTOK(c))
1734    {    {
1735    if (f != NULL) fprintf(f, "%c", c);    if (f != NULL) fprintf(f, "%c", c);
# Line 1402  if (c < 0x100) Line 1750  if (c < 0x100)
1750      }      }
1751    }    }
1752    
1753  if (f != NULL) fprintf(f, "\\x{%02x}", c);  if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
1754  return (c <= 0x000000ff)? 6 :  return n >= 0 ? n : 0;
        (c <= 0x00000fff)? 7 :  
        (c <= 0x0000ffff)? 8 :  
        (c <= 0x000fffff)? 9 : 10;  
1755  }  }
1756    
1757    
# Line 1421  If handed a NULL file, just counts chars Line 1766  If handed a NULL file, just counts chars
1766    
1767  static int pchars(pcre_uint8 *p, int length, FILE *f)  static int pchars(pcre_uint8 *p, int length, FILE *f)
1768  {  {
1769  int c = 0;  pcre_uint32 c = 0;
1770  int yield = 0;  int yield = 0;
1771    
1772  if (length < 0)  if (length < 0)
# Line 1466  return len; Line 1811  return len;
1811  #endif  /* SUPPORT_PCRE16 */  #endif  /* SUPPORT_PCRE16 */
1812    
1813    
1814    
1815    #ifdef SUPPORT_PCRE32
1816    /*************************************************
1817    *    Find length of 0-terminated 32-bit string   *
1818    *************************************************/
1819    
1820    static int strlen32(PCRE_SPTR32 p)
1821    {
1822    int len = 0;
1823    while (*p++ != 0) len++;
1824    return len;
1825    }
1826    #endif  /* SUPPORT_PCRE32 */
1827    
1828    
1829    
1830  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
1831  /*************************************************  /*************************************************
1832  *           Print 16-bit character string        *  *           Print 16-bit character string        *
# Line 1483  if (length < 0) Line 1844  if (length < 0)
1844    
1845  while (length-- > 0)  while (length-- > 0)
1846    {    {
1847    int c = *p++ & 0xffff;    pcre_uint32 c = *p++ & 0xffff;
1848  #if !defined NOUTF  #if !defined NOUTF
1849    if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)    if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1850      {      {
# Line 1505  return yield; Line 1866  return yield;
1866    
1867    
1868    
1869    #ifdef SUPPORT_PCRE32
1870    /*************************************************
1871    *           Print 32-bit character string        *
1872    *************************************************/
1873    
1874    /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
1875    If handed a NULL file, just counts chars without printing. */
1876    
1877    static int pchars32(PCRE_SPTR32 p, int length, FILE *f)
1878    {
1879    int yield = 0;
1880    
1881    if (length < 0)
1882      length = strlen32(p);
1883    
1884    while (length-- > 0)
1885      {
1886      pcre_uint32 c = *p++;
1887      yield += pchar(c, f);
1888      }
1889    
1890    return yield;
1891    }
1892    #endif  /* SUPPORT_PCRE32 */
1893    
1894    
1895    
1896  #ifdef SUPPORT_PCRE8  #ifdef SUPPORT_PCRE8
1897  /*************************************************  /*************************************************
1898  *     Read a capture name (8-bit) and check it   *  *     Read a capture name (8-bit) and check it   *
# Line 1558  return p; Line 1946  return p;
1946    
1947    
1948    
1949    #ifdef SUPPORT_PCRE32
1950    /*************************************************
1951    *     Read a capture name (32-bit) and check it  *
1952    *************************************************/
1953    
1954    /* Note that the text being read is 8-bit. */
1955    
1956    static pcre_uint8 *
1957    read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
1958    {
1959    pcre_uint32 *npp = *pp;
1960    while (isalnum(*p)) *npp++ = *p++;
1961    *npp++ = 0;
1962    *npp = 0;
1963    if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
1964      {
1965      fprintf(outfile, "no parentheses with name \"");
1966      PCHARSV(*pp, 0, -1, outfile);
1967      fprintf(outfile, "\"\n");
1968      }
1969    *pp = npp;
1970    return p;
1971    }
1972    #endif  /* SUPPORT_PCRE32 */
1973    
1974    
1975    
1976  /*************************************************  /*************************************************
1977  *              Callout function                  *  *              Callout function                  *
1978  *************************************************/  *************************************************/
# Line 1715  free(block); Line 2130  free(block);
2130  *************************************************/  *************************************************/
2131    
2132  /* Get one piece of information from the pcre_fullinfo() function. When only  /* Get one piece of information from the pcre_fullinfo() function. When only
2133  one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct  one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2134  value, but the code is defensive.  value, but the code is defensive.
2135    
2136  Arguments:  Arguments:
# Line 1732  new_info(pcre *re, pcre_extra *study, in Line 2147  new_info(pcre *re, pcre_extra *study, in
2147  {  {
2148  int rc;  int rc;
2149    
2150  if (use_pcre16)  if (pcre_mode == PCRE32_MODE)
2151    #ifdef SUPPORT_PCRE32
2152      rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2153    #else
2154      rc = PCRE_ERROR_BADMODE;
2155    #endif
2156    else if (pcre_mode == PCRE16_MODE)
2157  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
2158    rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);    rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2159  #else  #else
# Line 1748  else Line 2169  else
2169  if (rc < 0)  if (rc < 0)
2170    {    {
2171    fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,    fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2172      use_pcre16? "16" : "", option);      pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2173    if (rc == PCRE_ERROR_BADMODE)    if (rc == PCRE_ERROR_BADMODE)
2174      fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "      fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2175        "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");        "%d-bit mode\n", 8 * CHAR_SIZE,
2176          8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2177    }    }
2178    
2179  return rc;  return rc;
# Line 1800  bytes in the pattern itself. This is to Line 2222  bytes in the pattern itself. This is to
2222  ability to reload byte-flipped patterns, e.g. those compiled on a different  ability to reload byte-flipped patterns, e.g. those compiled on a different
2223  architecture. */  architecture. */
2224    
2225    #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2226  static void  static void
2227  regexflip(pcre *ere, pcre_extra *extra)  regexflip8_or_16(pcre *ere, pcre_extra *extra)
2228  {  {
2229  REAL_PCRE *re = (REAL_PCRE *)ere;  real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2230  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
2231  int op;  int op;
2232  pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;  pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
# Line 1840  if (extra != NULL) Line 2263  if (extra != NULL)
2263  in the name table, if present, and then in the pattern itself. */  in the name table, if present, and then in the pattern itself. */
2264    
2265  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
2266  if (!use_pcre16) return;  if (pcre_mode != PCRE16_MODE) return;
2267    
2268  while(TRUE)  while(TRUE)
2269    {    {
# Line 1978  while(TRUE) Line 2401  while(TRUE)
2401  /* Control should never reach here in 16 bit mode. */  /* Control should never reach here in 16 bit mode. */
2402  #endif /* SUPPORT_PCRE16 */  #endif /* SUPPORT_PCRE16 */
2403  }  }
2404    #endif /* SUPPORT_PCRE[8|16] */
2405    
2406    
2407    
2408  /*************************************************  #if defined SUPPORT_PCRE32
2409  *        Check match or recursion limit          *  static void
2410  *************************************************/  regexflip_32(pcre *ere, pcre_extra *extra)
   
 static int  
 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,  
   int start_offset, int options, int *use_offsets, int use_size_offsets,  
   int flag, unsigned long int *limit, int errnumber, const char *msg)  
2411  {  {
2412  int count;  real_pcre32 *re = (real_pcre32 *)ere;
2413  int min = 0;  int op;
2414  int mid = 64;  pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2415  int max = -1;  int length = re->name_count * re->name_entry_size;
2416    #ifdef SUPPORT_UTF
2417    BOOL utf = (re->options & PCRE_UTF32) != 0;
2418    #endif /* SUPPORT_UTF */
2419    
2420  extra->flags |= flag;  /* Always flip the bytes in the main data block and study blocks. */
2421    
2422  for (;;)  re->magic_number = REVERSED_MAGIC_NUMBER;
2423    re->size = swap_uint32(re->size);
2424    re->options = swap_uint32(re->options);
2425    re->flags = swap_uint16(re->flags);
2426    re->top_bracket = swap_uint16(re->top_bracket);
2427    re->top_backref = swap_uint16(re->top_backref);
2428    re->first_char = swap_uint32(re->first_char);
2429    re->req_char = swap_uint32(re->req_char);
2430    re->name_table_offset = swap_uint16(re->name_table_offset);
2431    re->name_entry_size = swap_uint16(re->name_entry_size);
2432    re->name_count = swap_uint16(re->name_count);
2433    
2434    if (extra != NULL)
2435    {    {
2436    *limit = mid;    pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2437      rsd->size = swap_uint32(rsd->size);
2438      rsd->flags = swap_uint32(rsd->flags);
2439      rsd->minlength = swap_uint32(rsd->minlength);
2440      }
2441    
2442    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,  /* In 32-bit mode we must swap bytes
2443      use_offsets, use_size_offsets);  in the name table, if present, and then in the pattern itself. */
2444    
2445    if (count == errnumber)  while(TRUE)
2446      {
2447      /* Swap previous characters. */
2448      while (length-- > 0)
2449      {      {
2450      /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */      *ptr = swap_uint32(*ptr);
2451      min = mid;      ptr++;
     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
2452      }      }
2453    
2454    else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||    /* Get next opcode. */
2455    
2456      length = 0;
2457      op = *ptr;
2458      *ptr++ = swap_uint32(op);
2459    
2460      switch (op)
2461        {
2462        case OP_END:
2463        return;
2464    
2465        default:
2466        length = OP_lengths32[op] - 1;
2467        break;
2468    
2469        case OP_CLASS:
2470        case OP_NCLASS:
2471        /* Skip the character bit map. */
2472        ptr += 32/sizeof(pcre_uint32);
2473        length = 0;
2474        break;
2475    
2476        case OP_XCLASS:
2477        /* LINK_SIZE can only be 1 in 32-bit mode. */
2478        length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2479    
2480        /* Reverse the size of the XCLASS instance. */
2481        *ptr = swap_uint32(*ptr);
2482        ptr++;
2483    
2484        op = *ptr;
2485        *ptr = swap_uint32(op);
2486        ptr++;
2487        if ((op & XCL_MAP) != 0)
2488          {
2489          /* Skip the character bit map. */
2490          ptr += 32/sizeof(pcre_uint32);
2491          length -= 32/sizeof(pcre_uint32);
2492          }
2493        break;
2494        }
2495      }
2496    /* Control should never reach here in 32 bit mode. */
2497    }
2498    
2499    #endif /* SUPPORT_PCRE32 */
2500    
2501    
2502    
2503    static void
2504    regexflip(pcre *ere, pcre_extra *extra)
2505    {
2506    #if defined SUPPORT_PCRE32
2507      if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2508        regexflip_32(ere, extra);
2509    #endif
2510    #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2511      if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2512        regexflip8_or_16(ere, extra);
2513    #endif
2514    }
2515    
2516    
2517    
2518    /*************************************************
2519    *        Check match or recursion limit          *
2520    *************************************************/
2521    
2522    static int
2523    check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2524      int start_offset, int options, int *use_offsets, int use_size_offsets,
2525      int flag, unsigned long int *limit, int errnumber, const char *msg)
2526    {
2527    int count;
2528    int min = 0;
2529    int mid = 64;
2530    int max = -1;
2531    
2532    extra->flags |= flag;
2533    
2534    for (;;)
2535      {
2536      *limit = mid;
2537    
2538      PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2539        use_offsets, use_size_offsets);
2540    
2541      if (count == errnumber)
2542        {
2543        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2544        min = mid;
2545        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2546        }
2547    
2548      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2549                           count == PCRE_ERROR_PARTIAL)                           count == PCRE_ERROR_PARTIAL)
2550      {      {
2551      if (mid == min + 1)      if (mid == min + 1)
# Line 2097  usage(void) Line 2631  usage(void)
2631  {  {
2632  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2633  printf("Input and output default to stdin and stdout.\n");  printf("Input and output default to stdin and stdout.\n");
2634  #ifdef SUPPORT_LIBREADLINE  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2635  printf("If input is a terminal, readline() is used to read from it.\n");  printf("If input is a terminal, readline() is used to read from it.\n");
2636  #else  #else
2637  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
# Line 2106  printf("\nOptions:\n"); Line 2640  printf("\nOptions:\n");
2640  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
2641  printf("  -16      use the 16-bit library\n");  printf("  -16      use the 16-bit library\n");
2642  #endif  #endif
2643    #ifdef SUPPORT_PCRE32
2644    printf("  -32      use the 32-bit library\n");
2645    #endif
2646  printf("  -b       show compiled code\n");  printf("  -b       show compiled code\n");
2647  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2648  printf("  -C arg   show a specific compile-time option\n");  printf("  -C arg   show a specific compile-time option\n");
# Line 2113  printf("           and exit with its val Line 2650  printf("           and exit with its val
2650  printf("     linksize     internal link size [2, 3, 4]\n");  printf("     linksize     internal link size [2, 3, 4]\n");
2651  printf("     pcre8        8 bit library support enabled [0, 1]\n");  printf("     pcre8        8 bit library support enabled [0, 1]\n");
2652  printf("     pcre16       16 bit library support enabled [0, 1]\n");  printf("     pcre16       16 bit library support enabled [0, 1]\n");
2653    printf("     pcre32       32 bit library support enabled [0, 1]\n");
2654  printf("     utf          Unicode Transformation Format supported [0, 1]\n");  printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2655  printf("     ucp          Unicode Properties supported [0, 1]\n");  printf("     ucp          Unicode Properties supported [0, 1]\n");
2656  printf("     jit          Just-in-time compiler supported [0, 1]\n");  printf("     jit          Just-in-time compiler supported [0, 1]\n");
# Line 2133  printf("  -q       quiet: do not output Line 2671  printf("  -q       quiet: do not output
2671  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
2672  printf("  -s       force each pattern to be studied at basic level\n"  printf("  -s       force each pattern to be studied at basic level\n"
2673         "  -s+      force each pattern to be studied, using JIT if available\n"         "  -s+      force each pattern to be studied, using JIT if available\n"
2674           "  -s++     ditto, verifying when JIT was actually used\n"
2675           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2676           "             where 1 <= n <= 7 selects JIT options\n"
2677           "  -s++n    ditto, verifying when JIT was actually used\n"
2678         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
2679  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2680  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 2167  int quiet = 0; Line 2709  int quiet = 0;
2709  int size_offsets = 45;  int size_offsets = 45;
2710  int size_offsets_max;  int size_offsets_max;
2711  int *offsets = NULL;  int *offsets = NULL;
 #if !defined NOPOSIX  
 int posix = 0;  
 #endif  
2712  int debug = 0;  int debug = 0;
2713  int done = 0;  int done = 0;
2714  int all_use_dfa = 0;  int all_use_dfa = 0;
2715    int verify_jit = 0;
2716  int yield = 0;  int yield = 0;
2717  int stack_size;  int stack_size;
2718    pcre_uint8 *dbuffer = NULL;
2719    size_t dbuffer_size = 1u << 14;
2720    
2721    #if !defined NOPOSIX
2722    int posix = 0;
2723    #endif
2724    #if !defined NODFA
2725    int *dfa_workspace = NULL;
2726    #endif
2727    
2728  pcre_jit_stack *jit_stack = NULL;  pcre_jit_stack *jit_stack = NULL;
2729    
2730  /* These vectors store, end-to-end, a list of zero-terminated captured  /* These vectors store, end-to-end, a list of zero-terminated captured
2731  substring names, each list itself being terminated by an empty name. Assume  substring names, each list itself being terminated by an empty name. Assume
2732  that 1024 is plenty long enough for the few names we'll be testing. It is  that 1024 is plenty long enough for the few names we'll be testing. It is
2733  easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version  easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
2734  for the actual memory, to ensure alignment. */  for the actual memory, to ensure alignment. */
2735    
2736  pcre_uint16 copynames[1024];  pcre_uint32 copynames[1024];
2737  pcre_uint16 getnames[1024];  pcre_uint32 getnames[1024];
2738    
2739    #ifdef SUPPORT_PCRE32
2740    pcre_uint32 *cn32ptr;
2741    pcre_uint32 *gn32ptr;
2742    #endif
2743    
2744  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
2745    pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
2746    pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
2747  pcre_uint16 *cn16ptr;  pcre_uint16 *cn16ptr;
2748  pcre_uint16 *gn16ptr;  pcre_uint16 *gn16ptr;
2749  #endif  #endif
# Line 2200  pcre_uint8 *gn8ptr; Line 2756  pcre_uint8 *gn8ptr;
2756  #endif  #endif
2757    
2758  /* Get buffers from malloc() so that valgrind will check their misuse when  /* Get buffers from malloc() so that valgrind will check their misuse when
2759  debugging. They grow automatically when very long lines are read. The 16-bit  debugging. They grow automatically when very long lines are read. The 16-
2760  buffer (buffer16) is obtained only if needed. */  and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
2761    
2762  buffer = (pcre_uint8 *)malloc(buffer_size);  buffer = (pcre_uint8 *)malloc(buffer_size);
 dbuffer = (pcre_uint8 *)malloc(buffer_size);  
2763  pbuffer = (pcre_uint8 *)malloc(buffer_size);  pbuffer = (pcre_uint8 *)malloc(buffer_size);
2764    
2765  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
# Line 2223  _setmode( _fileno( stdout ), _O_BINARY ) Line 2778  _setmode( _fileno( stdout ), _O_BINARY )
2778  /* Get the version number: both pcre_version() and pcre16_version() give the  /* Get the version number: both pcre_version() and pcre16_version() give the
2779  same answer. We just need to ensure that we call one that is available. */  same answer. We just need to ensure that we call one that is available. */
2780    
2781  #ifdef SUPPORT_PCRE8  #if defined SUPPORT_PCRE8
2782  version = pcre_version();  version = pcre_version();
2783  #else  #elif defined SUPPORT_PCRE16
2784  version = pcre16_version();  version = pcre16_version();
2785    #elif defined SUPPORT_PCRE32
2786    version = pcre32_version();
2787  #endif  #endif
2788    
2789  /* Scan options */  /* Scan options */
# Line 2234  version = pcre16_version(); Line 2791  version = pcre16_version();
2791  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2792    {    {
2793    pcre_uint8 *endptr;    pcre_uint8 *endptr;
2794      char *arg = argv[op];
2795    
2796      if (strcmp(arg, "-m") == 0) showstore = 1;
2797      else if (strcmp(arg, "-s") == 0) force_study = 0;
2798    
2799    if (strcmp(argv[op], "-m") == 0) showstore = 1;    else if (strncmp(arg, "-s+", 3) == 0)
   else if (strcmp(argv[op], "-s") == 0) force_study = 0;  
   else if (strcmp(argv[op], "-s+") == 0)  
2800      {      {
2801        arg += 3;
2802        if (*arg == '+') { arg++; verify_jit = TRUE; }
2803      force_study = 1;      force_study = 1;
2804      force_study_options = PCRE_STUDY_JIT_COMPILE;      if (*arg == 0)
2805          force_study_options = jit_study_bits[6];
2806        else if (*arg >= '1' && *arg <= '7')
2807          force_study_options = jit_study_bits[*arg - '1'];
2808        else goto BAD_ARG;
2809        }
2810      else if (strcmp(arg, "-8") == 0)
2811        {
2812    #ifdef SUPPORT_PCRE8
2813        pcre_mode = PCRE8_MODE;
2814    #else
2815        printf("** This version of PCRE was built without 8-bit support\n");
2816        exit(1);
2817    #endif
2818      }      }
2819    else if (strcmp(argv[op], "-16") == 0)    else if (strcmp(arg, "-16") == 0)
2820      {      {
2821  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
2822      use_pcre16 = 1;      pcre_mode = PCRE16_MODE;
2823  #else  #else
2824      printf("** This version of PCRE was built without 16-bit support\n");      printf("** This version of PCRE was built without 16-bit support\n");
2825      exit(1);      exit(1);
2826  #endif  #endif
2827      }      }
2828    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(arg, "-32") == 0)
2829    else if (strcmp(argv[op], "-b") == 0) debug = 1;      {
2830    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;  #ifdef SUPPORT_PCRE32
2831    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;      pcre_mode = PCRE32_MODE;
2832    else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;  #else
2833        printf("** This version of PCRE was built without 32-bit support\n");
2834        exit(1);
2835    #endif
2836        }
2837      else if (strcmp(arg, "-q") == 0) quiet = 1;
2838      else if (strcmp(arg, "-b") == 0) debug = 1;
2839      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2840      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2841      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2842  #if !defined NODFA  #if !defined NODFA
2843    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2844  #endif  #endif
2845    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2846        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2847          *endptr == 0))          *endptr == 0))
2848      {      {
2849      op++;      op++;
2850      argc--;      argc--;
2851      }      }
2852    else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)    else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2853      {      {
2854      int both = argv[op][2] == 0;      int both = arg[2] == 0;
2855      int temp;      int temp;
2856      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2857                       *endptr == 0))                       *endptr == 0))
# Line 2280  while (argc > 1 && argv[op][0] == '-') Line 2863  while (argc > 1 && argv[op][0] == '-')
2863      else timeitm = LOOPREPEAT;      else timeitm = LOOPREPEAT;
2864      if (both) timeit = timeitm;      if (both) timeit = timeitm;
2865      }      }
2866    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2867        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2868          *endptr == 0))          *endptr == 0))
2869      {      {
2870  #if defined(_WIN32) || defined(WIN32) || defined(__minix)  #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS)
2871      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
2872      exit(1);      exit(1);
2873  #else  #else
# Line 2303  while (argc > 1 && argv[op][0] == '-') Line 2886  while (argc > 1 && argv[op][0] == '-')
2886  #endif  #endif
2887      }      }
2888  #if !defined NOPOSIX  #if !defined NOPOSIX
2889    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(arg, "-p") == 0) posix = 1;
2890  #endif  #endif
2891    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(arg, "-C") == 0)
2892      {      {
2893      int rc;      int rc;
2894      unsigned long int lrc;      unsigned long int lrc;
# Line 2317  while (argc > 1 && argv[op][0] == '-') Line 2900  while (argc > 1 && argv[op][0] == '-')
2900          (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);          (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2901          printf("%d\n", rc);          printf("%d\n", rc);
2902          yield = rc;          yield = rc;
         goto EXIT;  
2903          }          }
2904        if (strcmp(argv[op + 1], "pcre8") == 0)        else if (strcmp(argv[op + 1], "pcre8") == 0)
2905          {          {
2906  #ifdef SUPPORT_PCRE8  #ifdef SUPPORT_PCRE8
2907          printf("1\n");          printf("1\n");
# Line 2328  while (argc > 1 && argv[op][0] == '-') Line 2910  while (argc > 1 && argv[op][0] == '-')
2910          printf("0\n");          printf("0\n");
2911          yield = 0;          yield = 0;
2912  #endif  #endif
         goto EXIT;  
2913          }          }
2914        if (strcmp(argv[op + 1], "pcre16") == 0)        else if (strcmp(argv[op + 1], "pcre16") == 0)
2915          {          {
2916  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
2917          printf("1\n");          printf("1\n");
# Line 2339  while (argc > 1 && argv[op][0] == '-') Line 2920  while (argc > 1 && argv[op][0] == '-')
2920          printf("0\n");          printf("0\n");
2921          yield = 0;          yield = 0;
2922  #endif  #endif
2923            }
2924          else if (strcmp(argv[op + 1], "pcre32") == 0)
2925            {
2926    #ifdef SUPPORT_PCRE32
2927            printf("1\n");
2928            yield = 1;
2929    #else
2930            printf("0\n");
2931            yield = 0;
2932    #endif
2933          goto EXIT;          goto EXIT;
2934          }          }
2935        if (strcmp(argv[op + 1], "utf") == 0)        if (strcmp(argv[op + 1], "utf") == 0)
2936          {          {
2937  #ifdef SUPPORT_PCRE8  #ifdef SUPPORT_PCRE8
2938          (void)pcre_config(PCRE_CONFIG_UTF8, &rc);          if (pcre_mode == PCRE8_MODE)
2939          printf("%d\n", rc);            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2940          yield = rc;  #endif
2941  #else  #ifdef SUPPORT_PCRE16
2942          (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);          if (pcre_mode == PCRE16_MODE)
2943              (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2944    #endif
2945    #ifdef SUPPORT_PCRE32
2946            if (pcre_mode == PCRE32_MODE)
2947              (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
2948    #endif
2949          printf("%d\n", rc);          printf("%d\n", rc);
2950          yield = rc;          yield = rc;
 #endif  
2951          goto EXIT;          goto EXIT;
2952          }          }
2953        if (strcmp(argv[op + 1], "ucp") == 0)        else if (strcmp(argv[op + 1], "ucp") == 0)
2954          {          {
2955          (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);          (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2956          printf("%d\n", rc);          printf("%d\n", rc);
2957          yield = rc;          yield = rc;
         goto EXIT;  
2958          }          }
2959        if (strcmp(argv[op + 1], "jit") == 0)        else if (strcmp(argv[op + 1], "jit") == 0)
2960          {          {
2961          (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);          (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2962          printf("%d\n", rc);          printf("%d\n", rc);
2963          yield = rc;          yield = rc;
         goto EXIT;  
2964          }          }
2965        if (strcmp(argv[op + 1], "newline") == 0)        else if (strcmp(argv[op + 1], "newline") == 0)
2966          {          {
2967          (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);          (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2968          /* Note that these values are always the ASCII values, even          print_newline_config(rc, TRUE);
         in EBCDIC environments. CR is 13 and NL is 10. */  
         printf("%s\n", (rc == 13)? "CR" :  
           (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :  
           (rc == -2)? "ANYCRLF" :  
           (rc == -1)? "ANY" : "???");  
         goto EXIT;  
2969          }          }
2970        printf("Unknown -C option: %s\n", argv[op + 1]);        else if (strcmp(argv[op + 1], "ebcdic") == 0)
2971            {
2972    #ifdef EBCDIC
2973            printf("1\n");
2974            yield = 1;
2975    #else
2976            printf("0\n");
2977    #endif
2978            }
2979          else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
2980            {
2981    #ifdef EBCDIC
2982            printf("0x%02x\n", CHAR_LF);
2983    #else
2984            printf("0\n");
2985    #endif
2986            }
2987          else
2988            {
2989            printf("Unknown -C option: %s\n", argv[op + 1]);
2990            }
2991        goto EXIT;        goto EXIT;
2992        }        }
2993    
2994        /* No argument for -C: output all configuration information. */
2995    
2996      printf("PCRE version %s\n", version);      printf("PCRE version %s\n", version);
2997      printf("Compiled with\n");      printf("Compiled with\n");
2998    
2999    #ifdef EBCDIC
3000        printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3001    #endif
3002    
3003  /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both  /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3004  are set, either both UTFs are supported or both are not supported. */  are set, either both UTFs are supported or both are not supported. */
3005    
3006  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16  #ifdef SUPPORT_PCRE8
3007      printf("  8-bit and 16-bit support\n");      printf("  8-bit support\n");
     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);  
     if (rc)  
       printf("  UTF-8 and UTF-16 support\n");  
     else  
       printf("  No UTF-8 or UTF-16 support\n");  
 #elif defined SUPPORT_PCRE8  
     printf("  8-bit support only\n");  
3008      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3009      printf("  %sUTF-8 support\n", rc? "" : "No ");        printf ("  %sUTF-8 support\n", rc ? "" : "No ");
3010  #else  #endif
3011      printf("  16-bit support only\n");  #ifdef SUPPORT_PCRE16
3012        printf("  16-bit support\n");
3013      (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);      (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3014      printf("  %sUTF-16 support\n", rc? "" : "No ");      printf ("  %sUTF-16 support\n", rc ? "" : "No ");
3015    #endif
3016    #ifdef SUPPORT_PCRE32
3017        printf("  32-bit support\n");
3018        (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3019        printf ("  %sUTF-32 support\n", rc ? "" : "No ");
3020  #endif  #endif
3021    
3022      (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
# Line 2412  are set, either both UTFs are supported Line 3025  are set, either both UTFs are supported
3025      if (rc)      if (rc)
3026        {        {
3027        const char *arch;        const char *arch;
3028        (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, &arch);        (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3029        printf("  Just-in-time compiler support: %s\n", arch);        printf("  Just-in-time compiler support: %s\n", arch);
3030        }        }
3031      else      else
3032        printf("  No just-in-time compiler support\n");        printf("  No just-in-time compiler support\n");
3033      (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3034      /* Note that these values are always the ASCII values, even      print_newline_config(rc, FALSE);
     in EBCDIC environments. CR is 13 and NL is 10. */  
     printf("  Newline sequence is %s\n", (rc == 13)? "CR" :  
       (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :  
       (rc == -2)? "ANYCRLF" :  
       (rc == -1)? "ANY" : "???");  
3035      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3036      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3037                                       "all Unicode newlines");                                       "all Unicode newlines");
# Line 2438  are set, either both UTFs are supported Line 3046  are set, either both UTFs are supported
3046      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3047      printf("  Match recursion uses %s", rc? "stack" : "heap");      printf("  Match recursion uses %s", rc? "stack" : "heap");
3048      if (showstore)      if (showstore)
3049        {        {
3050        PCRE_EXEC(stack_size, NULL, NULL, NULL, -1, -1, 0, NULL, 0);        PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3051        printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);        printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3052        }        }
3053      printf("\n");      printf("\n");
3054      goto EXIT;      goto EXIT;
3055      }      }
3056    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(arg, "-help") == 0 ||
3057             strcmp(argv[op], "--help") == 0)             strcmp(arg, "--help") == 0)
3058      {      {
3059      usage();      usage();
3060      goto EXIT;      goto EXIT;
3061      }      }
3062    else    else
3063      {      {
3064      printf("** Unknown or malformed option %s\n", argv[op]);      BAD_ARG:
3065        printf("** Unknown or malformed option %s\n", arg);
3066      usage();      usage();
3067      yield = 1;      yield = 1;
3068      goto EXIT;      goto EXIT;
# Line 2514  pcre16_stack_malloc = stack_malloc; Line 3123  pcre16_stack_malloc = stack_malloc;
3123  pcre16_stack_free = stack_free;  pcre16_stack_free = stack_free;
3124  #endif  #endif
3125    
3126    #ifdef SUPPORT_PCRE32
3127    pcre32_malloc = new_malloc;
3128    pcre32_free = new_free;
3129    pcre32_stack_malloc = stack_malloc;
3130    pcre32_stack_free = stack_free;
3131    #endif
3132    
3133  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
3134    
3135  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
# Line 2551  while (!done) Line 3167  while (!done)
3167    int do_flip = 0;    int do_flip = 0;
3168    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
3169    
3170    #if !defined NODFA
3171      int dfa_matched = 0;
3172    #endif
3173    
3174    use_utf = 0;    use_utf = 0;
3175    debug_lengths = 1;    debug_lengths = 1;
3176    
# Line 2598  while (!done) Line 3218  while (!done)
3218        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3219    
3220      re = (pcre *)new_malloc(true_size);      re = (pcre *)new_malloc(true_size);
3221        if (re == NULL)
3222          {
3223          printf("** Failed to get %d bytes of memory for pcre object\n",
3224            (int)true_size);
3225          yield = 1;
3226          goto EXIT;
3227          }
3228      regex_gotten_store = first_gotten_store;      regex_gotten_store = first_gotten_store;
3229    
3230      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3231    
3232      magic = ((REAL_PCRE *)re)->magic_number;      magic = REAL_PCRE_MAGIC(re);
3233      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
3234        {        {
3235        if (swap_uint32(magic) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
# Line 2612  while (!done) Line 3239  while (!done)
3239        else        else
3240          {          {
3241          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);          fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3242            new_free(re);
3243          fclose(f);          fclose(f);
3244          continue;          continue;
3245          }          }
# Line 2641  while (!done) Line 3269  while (!done)
3269            {            {
3270            PCRE_FREE_STUDY(extra);            PCRE_FREE_STUDY(extra);
3271            }            }
3272          if (re != NULL) new_free(re);          new_free(re);
3273          fclose(f);          fclose(f);
3274          continue;          continue;
3275          }          }
# Line 2659  while (!done) Line 3287  while (!done)
3287          {          {
3288          /* Simulate the result of the function call below. */          /* Simulate the result of the function call below. */
3289          fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,          fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3290            use_pcre16? "16" : "", PCRE_INFO_OPTIONS);            pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3291          fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "            PCRE_INFO_OPTIONS);
3292            "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");          fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3293              "%d-bit mode\n", 8 * CHAR_SIZE,
3294              8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
3295            new_free(re);
3296            fclose(f);
3297          continue;          continue;
3298          }          }
3299        }        }
3300    
3301      /* Need to know if UTF-8 for printing data strings. */      /* Need to know if UTF-8 for printing data strings. */
3302    
3303      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3304          {
3305          new_free(re);
3306          fclose(f);
3307          continue;
3308          }
3309      use_utf = (get_options & PCRE_UTF8) != 0;      use_utf = (get_options & PCRE_UTF8) != 0;
3310    
3311      fclose(f);      fclose(f);
# Line 2727  while (!done) Line 3364  while (!done)
3364    /* Look for options after final delimiter */    /* Look for options after final delimiter */
3365    
3366    options = 0;    options = 0;
3367    study_options = 0;    study_options = force_study_options;
3368    log_store = showstore;  /* default from command line */    log_store = showstore;  /* default from command line */
3369    
3370    while (*pp != 0)    while (*pp != 0)
# Line 2764  while (!done) Line 3401  while (!done)
3401  #endif  #endif
3402    
3403        case 'S':        case 'S':
3404        if (do_study == 0)        do_study = 1;
3405          for (;;)
3406          {          {
3407          do_study = 1;          switch (*pp++)
         if (*pp == '+')  
3408            {            {
3409            study_options |= PCRE_STUDY_JIT_COMPILE;            case 'S':
3410            pp++;            do_study = 0;
3411              no_force_study = 1;
3412              break;
3413    
3414              case '!':
3415              study_options |= PCRE_STUDY_EXTRA_NEEDED;
3416              break;
3417    
3418              case '+':
3419              if (*pp == '+')
3420                {
3421                verify_jit = TRUE;
3422                pp++;
3423                }
3424              if (*pp >= '1' && *pp <= '7')
3425                study_options |= jit_study_bits[*pp++ - '1'];
3426              else
3427                study_options |= jit_study_bits[6];
3428              break;
3429    
3430              case '-':
3431              study_options &= ~PCRE_STUDY_ALLJIT;
3432              break;
3433    
3434              default:
3435              pp--;
3436              goto ENDLOOP;
3437            }            }
3438          }          }
3439        else        ENDLOOP:
         {  
         do_study = 0;  
         no_force_study = 1;  
         }  
3440        break;        break;
3441    
3442        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
# Line 2896  while (!done) Line 3555  while (!done)
3555  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
3556    
3557      {      {
3558      /* In 16-bit mode, convert the input. */      /* In 16- or 32-bit mode, convert the input. */
3559    
3560  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
3561      if (use_pcre16)      if (pcre_mode == PCRE16_MODE)
3562        {        {
3563        switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))        switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3564          {          {
# Line 2925  while (!done) Line 3584  while (!done)
3584        }        }
3585  #endif  #endif
3586    
3587    #ifdef SUPPORT_PCRE32
3588        if (pcre_mode == PCRE32_MODE)
3589          {
3590          switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3591            {
3592            case -1:
3593            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3594              "converted to UTF-32\n");
3595            goto SKIP_DATA;
3596    
3597            case -2:
3598            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3599              "cannot be converted to UTF-32\n");
3600            goto SKIP_DATA;
3601    
3602            case -3:
3603            fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
3604            goto SKIP_DATA;
3605    
3606            default:
3607            break;
3608            }
3609          p = (pcre_uint8 *)buffer32;
3610          }
3611    #endif
3612    
3613      /* Compile many times when timing */      /* Compile many times when timing */
3614    
3615      if (timeit > 0)      if (timeit > 0)
# Line 2982  while (!done) Line 3667  while (!done)
3667      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
3668      and remember the store that was got. */      and remember the store that was got. */
3669    
3670      true_size = ((REAL_PCRE *)re)->size;      true_size = REAL_PCRE_SIZE(re);
3671      regex_gotten_store = first_gotten_store;      regex_gotten_store = first_gotten_store;
3672    
3673      /* Output code size information if requested */      /* Output code size information if requested */
3674    
3675      if (log_store)      if (log_store)
3676          {
3677          int name_count, name_entry_size, real_pcre_size;
3678    
3679          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
3680          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
3681    #ifdef SUPPORT_PCRE8
3682          if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
3683            real_pcre_size = sizeof(real_pcre);
3684    #endif
3685    #ifdef SUPPORT_PCRE16
3686          if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
3687            real_pcre_size = sizeof(real_pcre16);
3688    #endif
3689    #ifdef SUPPORT_PCRE32
3690          if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
3691            real_pcre_size = sizeof(real_pcre32);
3692    #endif
3693        fprintf(outfile, "Memory allocation (code space): %d\n",        fprintf(outfile, "Memory allocation (code space): %d\n",
3694          (int)(first_gotten_store -          (int)(first_gotten_store - real_pcre_size - name_count * name_entry_size));
3695                sizeof(REAL_PCRE) -        }
               ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));  
3696    
3697      /* If -s or /S was present, study the regex to generate additional info to      /* If -s or /S was present, study the regex to generate additional info to
3698      help with the matching, unless the pattern has the SS option, which      help with the matching, unless the pattern has the SS option, which
# Line 3007  while (!done) Line 3708  while (!done)
3708          clock_t start_time = clock();          clock_t start_time = clock();
3709          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
3710            {            {
3711            PCRE_STUDY(extra, re, study_options | force_study_options, &error);            PCRE_STUDY(extra, re, study_options, &error);
3712            }            }
3713          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3714          if (extra != NULL)          if (extra != NULL)
# Line 3018  while (!done) Line 3719  while (!done)
3719            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
3720              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3721          }          }
3722        PCRE_STUDY(extra, re, study_options | force_study_options, &error);        PCRE_STUDY(extra, re, study_options, &error);
3723        if (error != NULL)        if (error != NULL)
3724          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
3725        else if (extra != NULL)        else if (extra != NULL)
# Line 3062  while (!done) Line 3763  while (!done)
3763      if (do_showinfo)      if (do_showinfo)
3764        {        {
3765        unsigned long int all_options;        unsigned long int all_options;
3766        int count, backrefmax, first_char, need_char, okpartial, jchanged,        pcre_uint32 first_char, need_char;
3767          hascrorlf;        int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
3768            hascrorlf, maxlookbehind;
3769        int nameentrysize, namecount;        int nameentrysize, namecount;
3770        const pcre_uint8 *nametable;        const pcre_uint8 *nametable;
3771    
3772        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3773            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3774            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3775            new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +            new_info(re, NULL, PCRE_INFO_FIRSTLITERAL, &first_char) +
3776            new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +            new_info(re, NULL, PCRE_INFO_FIRSTLITERALSET, &first_char_set) +
3777              new_info(re, NULL, PCRE_INFO_LASTLITERAL2, &need_char) +
3778              new_info(re, NULL, PCRE_INFO_LASTLITERAL2SET, &need_char_set) +
3779            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3780            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3781            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3782            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3783            new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +            new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3784            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3785              new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3786            != 0)            != 0)
3787          goto SKIP_DATA;          goto SKIP_DATA;
3788    
# Line 3094  while (!done) Line 3799  while (!done)
3799          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
3800          while (namecount-- > 0)          while (namecount-- > 0)
3801            {            {
3802  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16            int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
           int imm2_size = use_pcre16 ? 1 : 2;  
 #else  
           int imm2_size = IMM2_SIZE;  
 #endif  
3803            int length = (int)STRLEN(nametable + imm2_size);            int length = (int)STRLEN(nametable + imm2_size);
3804            fprintf(outfile, "  ");            fprintf(outfile, "  ");
3805            PCHARSV(nametable, imm2_size, length, outfile);            PCHARSV(nametable, imm2_size, length, outfile);
3806            while (length++ < nameentrysize - imm2_size) putc(' ', outfile);            while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3807  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16  #ifdef SUPPORT_PCRE32
3808            fprintf(outfile, "%3d\n", use_pcre16?            if (pcre_mode == PCRE32_MODE)
3809               (int)(((PCRE_SPTR16)nametable)[0])              fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
             :((int)nametable[0] << 8) | (int)nametable[1]);  
           nametable += nameentrysize * (use_pcre16 ? 2 : 1);  
 #else  
           fprintf(outfile, "%3d\n", GET2(nametable, 0));  
 #ifdef SUPPORT_PCRE8  
           nametable += nameentrysize;  
 #else  
           nametable += nameentrysize * 2;  
3810  #endif  #endif
3811    #ifdef SUPPORT_PCRE16
3812              if (pcre_mode == PCRE16_MODE)
3813                fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
3814  #endif  #endif
3815    #ifdef SUPPORT_PCRE8
3816              if (pcre_mode == PCRE8_MODE)
3817                fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
3818    #endif
3819              nametable += nameentrysize * CHAR_SIZE;
3820            }            }
3821          }          }
3822    
3823        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3824        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3825    
3826        all_options = ((REAL_PCRE *)re)->options;        all_options = REAL_PCRE_OPTIONS(re);
3827        if (do_flip) all_options = swap_uint32(all_options);        if (do_flip) all_options = swap_uint32(all_options);
3828    
3829        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
# Line 3173  while (!done) Line 3874  while (!done)
3874          break;          break;
3875          }          }
3876    
3877        if (first_char == -1)        if (first_char_set == 2)
3878          {          {
3879          fprintf(outfile, "First char at start or follows newline\n");          fprintf(outfile, "First char at start or follows newline\n");
3880          }          }
3881        else if (first_char < 0)        else if (first_char_set == 1)
         {  
         fprintf(outfile, "No first char\n");  
         }  
       else  
3882          {          {
3883          const char *caseless =          const char *caseless =
3884            ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?            ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
3885            "" : " (caseless)";            "" : " (caseless)";
3886    
3887          if (PRINTOK(first_char))          if (PRINTOK(first_char))
# Line 3196  while (!done) Line 3893  while (!done)
3893            fprintf(outfile, "%s\n", caseless);            fprintf(outfile, "%s\n", caseless);
3894            }            }
3895          }          }
3896          else
3897            {
3898            fprintf(outfile, "No first char\n");
3899            }
3900    
3901        if (need_char < 0)        if (need_char_set == 0)
3902          {          {
3903          fprintf(outfile, "No need char\n");          fprintf(outfile, "No need char\n");
3904          }          }
3905        else        else
3906          {          {
3907          const char *caseless =          const char *caseless =
3908            ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?            ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
3909            "" : " (caseless)";            "" : " (caseless)";
3910    
3911          if (PRINTOK(need_char))          if (PRINTOK(need_char))
# Line 3217  while (!done) Line 3918  while (!done)
3918            }            }
3919          }          }
3920    
3921          if (maxlookbehind > 0)
3922            fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3923    
3924        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
3925        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
3926        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
# Line 3274  while (!done) Line 3978  while (!done)
3978    
3979          /* Show this only if the JIT was set by /S, not by -s. */          /* Show this only if the JIT was set by /S, not by -s. */
3980    
3981          if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)          if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
3982                (force_study_options & PCRE_STUDY_ALLJIT) == 0)
3983            {            {
3984            int jit;            int jit;
3985            if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)            if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
# Line 3361  while (!done) Line 4066  while (!done)
4066    
4067    for (;;)    for (;;)
4068      {      {
4069      pcre_uint8 *q;  #ifdef SUPPORT_PCRE8
4070        pcre_uint8 *q8;
4071    #endif
4072    #ifdef SUPPORT_PCRE16
4073        pcre_uint16 *q16;
4074    #endif
4075    #ifdef SUPPORT_PCRE32
4076        pcre_uint32 *q32;
4077    #endif
4078      pcre_uint8 *bptr;      pcre_uint8 *bptr;
4079      int *use_offsets = offsets;      int *use_offsets = offsets;
4080      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
4081      int callout_data = 0;      int callout_data = 0;
4082      int callout_data_set = 0;      int callout_data_set = 0;
4083      int count, c;      int count;
4084        pcre_uint32 c;
4085      int copystrings = 0;      int copystrings = 0;
4086      int find_match_limit = default_find_match_limit;      int find_match_limit = default_find_match_limit;
4087      int getstrings = 0;      int getstrings = 0;
# Line 3381  while (!done) Line 4095  while (!done)
4095      *copynames = 0;      *copynames = 0;
4096      *getnames = 0;      *getnames = 0;
4097    
4098    #ifdef SUPPORT_PCRE32
4099        cn32ptr = copynames;
4100        gn32ptr = getnames;
4101    #endif
4102  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
4103      cn16ptr = copynames;      cn16ptr = copynames16;
4104      gn16ptr = getnames;      gn16ptr = getnames16;
4105  #endif  #endif
4106  #ifdef SUPPORT_PCRE8  #ifdef SUPPORT_PCRE8
4107      cn8ptr = copynames8;      cn8ptr = copynames8;
4108      gn8ptr = getnames8;      gn8ptr = getnames8;
4109  #endif  #endif
4110    
4111      SET_PCRE_CALLOUT(callout);      SET_PCRE_CALLOUT(callout);
4112      first_callout = 1;      first_callout = 1;
# Line 3428  while (!done) Line 4146  while (!done)
4146      p = buffer;      p = buffer;
4147      while (isspace(*p)) p++;      while (isspace(*p)) p++;
4148    
4149      bptr = q = dbuffer;  #ifndef NOUTF
4150        /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4151           invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4152        if (use_utf)
4153          {
4154          char *q;
4155          pcre_uint32 c;
4156          int n = 1;
4157    
4158          for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &c);
4159          if (n <= 0)
4160            {
4161            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4162            goto NEXT_DATA;
4163            }
4164          }
4165    #endif
4166    
4167        /* Allocate a buffer to hold the data line. len+1 is an upper bound on
4168           the number of pcre_uchar units that will be needed. */
4169        if (dbuffer == NULL || len >= dbuffer_size)
4170          {
4171          dbuffer_size *= 2;
4172          dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4173          if (dbuffer == NULL)
4174            {
4175            fprintf(stderr, "pcretest: malloc(%d) failed\n", dbuffer_size);
4176            exit(1);
4177            }
4178          }
4179    
4180    #ifdef SUPPORT_PCRE8
4181        q8 = (pcre_uint8 *) dbuffer;
4182    #endif
4183    #ifdef SUPPORT_PCRE16
4184        q16 = (pcre_uint16 *) dbuffer;
4185    #endif
4186    #ifdef SUPPORT_PCRE32
4187        q32 = (pcre_uint32 *) dbuffer;
4188    #endif
4189    
4190      while ((c = *p++) != 0)      while ((c = *p++) != 0)
4191        {        {
4192        int i = 0;        int i = 0;
# Line 3437  while (!done) Line 4195  while (!done)
4195        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4196        In non-UTF mode, allow the value of the byte to fall through to later,        In non-UTF mode, allow the value of the byte to fall through to later,
4197        where values greater than 127 are turned into UTF-8 when running in        where values greater than 127 are turned into UTF-8 when running in
4198        16-bit mode. */        16-bit or 32-bit mode. */
4199    
4200        if (c != '\\')        if (c != '\\')
4201          {          {
4202          if (use_utf)  #ifndef NOUTF
4203            {          if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4204            *q++ = c;  #endif
           continue;  
           }  
4205          }          }
4206    
4207        /* Handle backslash escapes */        /* Handle backslash escapes */
# Line 3483  while (!done) Line 4239  while (!done)
4239              {              {
4240              if (++i == 9)              if (++i == 9)
4241                fprintf(outfile, "** Too many hex digits in \\x{...} item; "                fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4242                                 "using only the first eight.\n");                                 "using only the first eight.\n");
4243              else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);              else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4244              }              }
4245            if (*pt == '}')            if (*pt == '}')
4246              {              {
4247              p = pt + 1;              p = pt + 1;
# Line 3498  while (!done) Line 4254  while (!done)
4254          allows UTF-8 characters to be constructed byte by byte, and also allows          allows UTF-8 characters to be constructed byte by byte, and also allows
4255          invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.          invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4256          Otherwise, pass it down to later code so that it can be turned into          Otherwise, pass it down to later code so that it can be turned into
4257          UTF-8 when running in 16-bit mode. */          UTF-8 when running in 16/32-bit mode. */
4258    
4259          c = 0;          c = 0;
4260          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
# Line 3506  while (!done) Line 4262  while (!done)
4262            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4263            p++;            p++;
4264            }            }
4265          if (use_utf)  #if !defined NOUTF && defined SUPPORT_PCRE8
4266            if (use_utf && (pcre_mode == PCRE8_MODE))
4267            {            {
4268            *q++ = c;            *q8++ = c;
4269            continue;            continue;
4270            }            }
4271    #endif
4272          break;          break;
4273    
4274          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 3543  while (!done) Line 4301  while (!done)
4301            }            }
4302          else if (isalnum(*p))          else if (isalnum(*p))
4303            {            {
4304            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4305            }            }
4306          else if (*p == '+')          else if (*p == '+')
4307            {            {
# Line 3606  while (!done) Line 4364  while (!done)
4364            }            }
4365          else if (isalnum(*p))          else if (isalnum(*p))
4366            {            {
4367            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4368            }            }
4369          continue;          continue;
4370    
# Line 3654  while (!done) Line 4412  while (!done)
4412            }            }
4413          use_size_offsets = n;          use_size_offsets = n;
4414          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
4415              else use_offsets = offsets + size_offsets_max - n;  /* To catch overruns */
4416          continue;          continue;
4417    
4418          case 'P':          case 'P':
# Line 3716  while (!done) Line 4475  while (!done)
4475          }          }
4476    
4477        /* We now have a character value in c that may be greater than 255. In        /* We now have a character value in c that may be greater than 255. In
4478        16-bit mode, we always convert characters to UTF-8 so that values greater        16-bit or 32-bit mode, we always convert characters to UTF-8 so that
4479        than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we        values greater than 255 can be passed to non-UTF 16- or 32-bit strings.
4480        convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF        In 8-bit       mode we convert to UTF-8 if we are in UTF mode. Values greater
4481        mode must have come from \x{...} or octal constructs because values from        than 127       in UTF mode must have come from \x{...} or octal constructs
4482        \x.. get this far only in non-UTF mode. */        because values from \x.. get this far only in non-UTF mode. */
4483    
4484  #if !defined NOUTF || defined SUPPORT_PCRE16  #ifdef SUPPORT_PCRE8
4485        if (use_pcre16 || use_utf)        if (pcre_mode == PCRE8_MODE)
4486          {          {
4487          pcre_uint8 buff8[8];  #ifndef NOUTF
4488          int ii, utn;          if (use_utf)
4489          utn = ord2utf8(c, buff8);            {
4490          for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];            q8 += ord2utf8(c, q8);
4491              }
4492            else
4493    #endif
4494              {
4495              if (c > 0xffu)
4496                {
4497                fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4498                  "and UTF-8 mode is not enabled.\n", c);
4499                fprintf(outfile, "** Truncation will probably give the wrong "
4500                  "result.\n");
4501                }
4502    
4503              *q8++ = c;
4504              }
4505          }          }
       else  
4506  #endif  #endif
4507    #ifdef SUPPORT_PCRE16
4508          if (pcre_mode == PCRE16_MODE)
4509          {          {
4510          if (c > 255)  #ifndef NOUTF
4511            if (use_utf)
4512            {            {
4513            fprintf(outfile, "** Character \\x{%x} is greater than 255 "            if (c > 0x10ffffu)
4514              "and UTF-8 mode is not enabled.\n", c);              {
4515            fprintf(outfile, "** Truncation will probably give the wrong "              fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4516              "result.\n");                "cannot be converted to UTF-16\n");
4517                goto NEXT_DATA;
4518                }
4519              else if (c >= 0x10000u)
4520                {
4521                c-= 0x10000u;
4522                *q16++ = 0xD800 | (c >> 10);
4523                *q16++ = 0xDC00 | (c & 0x3ff);
4524                }
4525              else
4526                *q16++ = c;
4527            }            }
4528          *q++ = c;          else
4529    #endif
4530              {
4531              if (c > 0xffffu)
4532                {
4533                fprintf(outfile, "** Character value is greater than 0xffff "
4534                  "and UTF-16 mode is not enabled.\n", c);
4535                fprintf(outfile, "** Truncation will probably give the wrong "
4536                  "result.\n");
4537                }
4538    
4539              *q16++ = c;
4540              }
4541            }
4542    #endif
4543    #ifdef SUPPORT_PCRE32
4544          if (pcre_mode == PCRE32_MODE)
4545            {
4546            *q32++ = c;
4547          }          }
4548    #endif
4549    
4550        }        }
4551    
4552      /* Reached end of subject string */      /* Reached end of subject string */
4553    
4554      *q = 0;  #ifdef SUPPORT_PCRE8
4555      len = (int)(q - dbuffer);      if (pcre_mode == PCRE8_MODE)
4556        {
4557          *q8 = 0;
4558          len = (int)(q8 - (pcre_uint8 *)dbuffer);
4559        }
4560    #endif
4561    #ifdef SUPPORT_PCRE16
4562        if (pcre_mode == PCRE16_MODE)
4563        {
4564          *q16 = 0;
4565          len = (int)(q16 - (pcre_uint16 *)dbuffer);
4566        }
4567    #endif
4568    #ifdef SUPPORT_PCRE32
4569        if (pcre_mode == PCRE32_MODE)
4570        {
4571          *q32 = 0;
4572          len = (int)(q32 - (pcre_uint32 *)dbuffer);
4573        }
4574    #endif
4575    
4576      /* Move the data to the end of the buffer so that a read over the end of      /* Move the data to the end of the buffer so that a read over the end of
4577      the buffer will be seen by valgrind, even if it doesn't cause a crash. If      the buffer will be seen by valgrind, even if it doesn't cause a crash. If
4578      we are using the POSIX interface, we must include the terminating zero. */      we are using the POSIX interface, we must include the terminating zero. */
4579    
4580        bptr = dbuffer;
4581    
4582  #if !defined NOPOSIX  #if !defined NOPOSIX
4583      if (posix || do_posix)      if (posix || do_posix)
4584        {        {
4585        memmove(bptr + buffer_size - len - 1, bptr, len + 1);        memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
4586        bptr += buffer_size - len - 1;        bptr += dbuffer_size - len - 1;
4587        }        }
4588      else      else
4589  #endif  #endif
4590        {        {
4591        memmove(bptr + buffer_size - len, bptr, len);        bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
       bptr += buffer_size - len;  
4592        }        }
4593    
4594      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
# Line 3794  while (!done) Line 4619  while (!done)
4619          (void)regerror(rc, &preg, (char *)buffer, buffer_size);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
4620          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
4621          }          }
4622        else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)        else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
               != 0)  
4623          {          {
4624          fprintf(outfile, "Matched with REG_NOSUB\n");          fprintf(outfile, "Matched with REG_NOSUB\n");
4625          }          }
# Line 3828  while (!done) Line 4652  while (!done)
4652    
4653      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
4654    
4655  #ifdef SUPPORT_PCRE16      /* Ensure that there is a JIT callback if we want to verify that JIT was
4656      if (use_pcre16)      actually used. If jit_stack == NULL, no stack has yet been assigned. */
       {  
       len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);  
       switch(len)  
         {  
         case -1:  
         fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "  
           "converted to UTF-16\n");  
         goto NEXT_DATA;  
   
         case -2:  
         fprintf(outfile, "**Failed: character value greater than 0x10ffff "  
           "cannot be converted to UTF-16\n");  
         goto NEXT_DATA;  
   
         case -3:  
         fprintf(outfile, "**Failed: character value greater than 0xffff "  
           "cannot be converted to 16-bit in non-UTF mode\n");  
         goto NEXT_DATA;  
4657    
4658          default:      if (verify_jit && jit_stack == NULL && extra != NULL)
4659          break;         { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
         }  
       bptr = (pcre_uint8 *)buffer16;  
       }  
 #endif  
4660    
4661      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
4662        {        {
4663        markptr = NULL;        markptr = NULL;
4664          jit_was_used = FALSE;
4665    
4666        if (timeitm > 0)        if (timeitm > 0)
4667          {          {
# Line 3869  while (!done) Line 4672  while (!done)
4672  #if !defined NODFA  #if !defined NODFA
4673          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
4674            {            {
4675            int workspace[1000];            if ((options & PCRE_DFA_RESTART) != 0)
4676                {
4677                fprintf(outfile, "Timing DFA restarts is not supported\n");
4678                break;
4679                }
4680              if (dfa_workspace == NULL)
4681                dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4682            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
4683              {              {
4684              PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,              PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4685                (options | g_notempty), use_offsets, use_size_offsets, workspace,                (options | g_notempty), use_offsets, use_size_offsets,
4686                (sizeof(workspace)/sizeof(int)));                dfa_workspace, DFA_WS_DIMENSION);
4687              }              }
4688            }            }
4689          else          else
# Line 3900  while (!done) Line 4709  while (!done)
4709    
4710        if (find_match_limit)        if (find_match_limit)
4711          {          {
4712          if (extra == NULL)          if (extra != NULL) { PCRE_FREE_STUDY(extra); }
4713            {          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4714            extra = (pcre_extra *)malloc(sizeof(pcre_extra));          extra->flags = 0;
           extra->flags = 0;  
           }  
         else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;  
4715    
4716          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
4717            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
# Line 3940  while (!done) Line 4746  while (!done)
4746  #if !defined NODFA  #if !defined NODFA
4747        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
4748          {          {
4749          int workspace[1000];          if (dfa_workspace == NULL)
4750              dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
4751            if (dfa_matched++ == 0)
4752              dfa_workspace[0] = -1;  /* To catch bad restart */
4753          PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,          PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
4754            (options | g_notempty), use_offsets, use_size_offsets, workspace,            (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
4755            (sizeof(workspace)/sizeof(int)));            DFA_WS_DIMENSION);
4756          if (count == 0)          if (count == 0)
4757            {            {
4758            fprintf(outfile, "Matched, but too many subsidiary matches\n");            fprintf(outfile, "Matched, but too many subsidiary matches\n");
# Line 4020  while (!done) Line 4829  while (!done)
4829              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
4830              PCHARSV(bptr, use_offsets[i],              PCHARSV(bptr, use_offsets[i],
4831                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
4832                if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4833              fprintf(outfile, "\n");              fprintf(outfile, "\n");
4834              if (do_showcaprest || (i == 0 && do_showrest))              if (do_showcaprest || (i == 0 && do_showrest))
4835                {                {
# Line 4063  while (!done) Line 4873  while (!done)
4873            int rc;            int rc;
4874            char copybuffer[256];            char copybuffer[256];
4875    
4876            if (use_pcre16)  #ifdef SUPPORT_PCRE32
4877              if (pcre_mode == PCRE32_MODE)
4878                {
4879                if (*(pcre_uint32 *)cnptr == 0) break;
4880                }
4881    #endif
4882    #ifdef SUPPORT_PCRE16
4883              if (pcre_mode == PCRE16_MODE)
4884              {              {
4885              if (*(pcre_uint16 *)cnptr == 0) break;              if (*(pcre_uint16 *)cnptr == 0) break;
4886              }              }
4887            else  #endif
4888    #ifdef SUPPORT_PCRE8
4889              if (pcre_mode == PCRE8_MODE)
4890              {              {
4891              if (*(pcre_uint8 *)cnptr == 0) break;              if (*(pcre_uint8 *)cnptr == 0) break;
4892              }              }
4893    #endif
4894    
4895            PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,            PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4896              cnptr, copybuffer, sizeof(copybuffer));              cnptr, copybuffer, sizeof(copybuffer));
# Line 4118  while (!done) Line 4938  while (!done)
4938            int rc;            int rc;
4939            const char *substring;            const char *substring;
4940    
4941            if (use_pcre16)  #ifdef SUPPORT_PCRE32
4942              if (pcre_mode == PCRE32_MODE)
4943                {
4944                if (*(pcre_uint32 *)gnptr == 0) break;
4945                }
4946    #endif
4947    #ifdef SUPPORT_PCRE16
4948              if (pcre_mode == PCRE16_MODE)
4949              {              {
4950              if (*(pcre_uint16 *)gnptr == 0) break;              if (*(pcre_uint16 *)gnptr == 0) break;
4951              }              }
4952            else  #endif
4953    #ifdef SUPPORT_PCRE8
4954              if (pcre_mode == PCRE8_MODE)
4955              {              {
4956              if (*(pcre_uint8 *)gnptr == 0) break;              if (*(pcre_uint8 *)gnptr == 0) break;
4957              }              }
4958    #endif
4959    
4960            PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,            PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4961              gnptr, &substring);              gnptr, &substring);
# Line 4186  while (!done) Line 5016  while (!done)
5016            PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],            PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
5017              outfile);              outfile);
5018            }            }
5019            if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5020          fprintf(outfile, "\n");          fprintf(outfile, "\n");
5021          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
5022          }          }
# Line 4211  while (!done) Line 5042  while (!done)
5042          if (g_notempty != 0)          if (g_notempty != 0)
5043            {            {
5044            int onechar = 1;            int onechar = 1;
5045            unsigned int obits = ((REAL_PCRE *)re)->options;            unsigned int obits = REAL_PCRE_OPTIONS(re);
5046            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
5047            if ((obits & PCRE_NEWLINE_BITS) == 0)            if ((obits & PCRE_NEWLINE_BITS) == 0)
5048              {              {
# Line 4229  while (!done) Line 5060  while (!done)
5060                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
5061                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
5062                &&                &&
5063                start_offset < len - 1 &&                start_offset < len - 1 && (
5064  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16  #ifdef SUPPORT_PCRE8
5065                (use_pcre16?                (pcre_mode == PCRE8_MODE &&
5066                     ((PCRE_SPTR16)bptr)[start_offset] == '\r'                 bptr[start_offset] == '\r' &&
5067                  && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'                 bptr[start_offset + 1] == '\n') ||
5068                :  #endif
5069                     bptr[start_offset] == '\r'  #ifdef SUPPORT_PCRE16
5070                  && bptr[start_offset + 1] == '\n')                (pcre_mode == PCRE16_MODE &&
5071  #elif defined SUPPORT_PCRE16                 ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
5072                   ((PCRE_SPTR16)bptr)[start_offset] == '\r'                 ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
5073                && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'  #endif
5074  #else  #ifdef SUPPORT_PCRE32
5075                   bptr[start_offset] == '\r'                (pcre_mode == PCRE32_MODE &&
5076                && bptr[start_offset + 1] == '\n'                 ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
5077                   ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
5078  #endif  #endif
5079                )                0))
5080              onechar++;              onechar++;
5081            else if (use_utf)            else if (use_utf)
5082              {              {
# Line 4265  while (!done) Line 5097  while (!done)
5097                {                {
5098                if (markptr == NULL)                if (markptr == NULL)
5099                  {                  {
5100                  fprintf(outfile, "No match\n");                  fprintf(outfile, "No match");
5101                  }                  }
5102                else                else
5103                  {                  {
5104                  fprintf(outfile, "No match, mark = ");                  fprintf(outfile, "No match, mark = ");
5105                  PCHARSV(markptr, 0, -1, outfile);                  PCHARSV(markptr, 0, -1, outfile);
                 putc('\n', outfile);  
5106                  }                  }
5107                  if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5108                  putc('\n', outfile);
5109                }                }
5110              break;              break;
5111    
5112              case PCRE_ERROR_BADUTF8:              case PCRE_ERROR_BADUTF8:
5113              case PCRE_ERROR_SHORTUTF8:              case PCRE_ERROR_SHORTUTF8:
5114              fprintf(outfile, "Error %d (%s UTF-%s string)", count,              fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5115                (count == PCRE_ERROR_BADUTF8)? "bad" : "short",                (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5116                use_pcre16? "16" : "8");                8 * CHAR_SIZE);
5117              if (use_size_offsets >= 2)              if (use_size_offsets >= 2)
5118                fprintf(outfile, " offset=%d reason=%d", use_offsets[0],                fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5119                  use_offsets[1]);                  use_offsets[1]);
# Line 4288  while (!done) Line 5121  while (!done)
5121              break;              break;
5122    
5123              case PCRE_ERROR_BADUTF8_OFFSET:              case PCRE_ERROR_BADUTF8_OFFSET:
5124              fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,              fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5125                use_pcre16? "16" : "8");                8 * CHAR_SIZE);
5126              break;              break;
5127    
5128              default:              default:
5129              if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))              if (count < 0 &&
5130                    (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5131                fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);                fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5132              else              else
5133                fprintf(outfile, "Error %d (Unexpected value)\n", count);                fprintf(outfile, "Error %d (Unexpected value)\n", count);
# Line 4378  free(offsets); Line 5212  free(offsets);
5212  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
5213  if (buffer16 != NULL) free(buffer16);  if (buffer16 != NULL) free(buffer16);
5214  #endif  #endif
5215    #ifdef SUPPORT_PCRE32
5216    if (buffer32 != NULL) free(buffer32);
5217    #endif
5218    
5219    #if !defined NODFA
5220    if (dfa_workspace != NULL)
5221      free(dfa_workspace);
5222    #endif
5223    
5224  return yield;  return yield;
5225  }  }

Legend:
Removed from v.895  
changed lines
  Added in v.1101

  ViewVC Help
Powered by ViewVC 1.1.5