/[pcre]/code/trunk/pcre_internal.h
ViewVC logotype

Diff of /code/trunk/pcre_internal.h

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 932 by ph10, Fri Feb 24 18:54:43 2012 UTC revision 1187 by zherczeg, Mon Oct 29 11:30:45 2012 UTC
# Line 40  POSSIBILITY OF SUCH DAMAGE. Line 40  POSSIBILITY OF SUCH DAMAGE.
40    
41  /* This header contains definitions that are shared between the different  /* This header contains definitions that are shared between the different
42  modules, but which are not relevant to the exported API. This includes some  modules, but which are not relevant to the exported API. This includes some
43  functions whose names all begin with "_pcre_" or "_pcre16_" depending on  functions whose names all begin with "_pcre_", "_pcre16_" or "_pcre32_"
44  the PRIV macro. */  depending on the PRIV macro. */
45    
46  #ifndef PCRE_INTERNAL_H  #ifndef PCRE_INTERNAL_H
47  #define PCRE_INTERNAL_H  #define PCRE_INTERNAL_H
# Line 53  the PRIV macro. */ Line 53  the PRIV macro. */
53  #endif  #endif
54    
55  /* PCRE is compiled as an 8 bit library if it is not requested otherwise. */  /* PCRE is compiled as an 8 bit library if it is not requested otherwise. */
56  #ifndef COMPILE_PCRE16  
57    #if !defined COMPILE_PCRE16 && !defined COMPILE_PCRE32
58  #define COMPILE_PCRE8  #define COMPILE_PCRE8
59  #endif  #endif
60    
# Line 78  Until then we define it if SUPPORT_UTF i Line 79  Until then we define it if SUPPORT_UTF i
79  #define SUPPORT_UTF8 1  #define SUPPORT_UTF8 1
80  #endif  #endif
81    
82  /* We do not support both EBCDIC and UTF-8/16 at the same time. The "configure"  /* We do not support both EBCDIC and UTF-8/16/32 at the same time. The "configure"
83  script prevents both being selected, but not everybody uses "configure". */  script prevents both being selected, but not everybody uses "configure". */
84    
85  #if defined EBCDIC && defined SUPPORT_UTF  #if defined EBCDIC && defined SUPPORT_UTF
86  #error The use of both EBCDIC and SUPPORT_UTF8/16 is not supported.  #error The use of both EBCDIC and SUPPORT_UTF is not supported.
87  #endif  #endif
88    
89  /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef  /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
# Line 111  setjmp and stdarg are used is when NO_RE Line 112  setjmp and stdarg are used is when NO_RE
112  #include <stdlib.h>  #include <stdlib.h>
113  #include <string.h>  #include <string.h>
114    
115    /* Valgrind (memcheck) support */
116    
117    #ifdef SUPPORT_VALGRIND
118    #include <valgrind/memcheck.h>
119    #endif
120    
121  /* When compiling a DLL for Windows, the exported symbols have to be declared  /* When compiling a DLL for Windows, the exported symbols have to be declared
122  using some MS magic. I found some useful information on this web page:  using some MS magic. I found some useful information on this web page:
123  http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the  http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the
# Line 214  stdint.h is available, include it; it ma Line 221  stdint.h is available, include it; it ma
221  have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set  have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set
222  by "configure". */  by "configure". */
223    
224  #if HAVE_STDINT_H  #if defined HAVE_STDINT_H
225  #include <stdint.h>  #include <stdint.h>
226  #elif HAVE_INTTYPES_H  #elif defined HAVE_INTTYPES_H
227  #include <inttypes.h>  #include <inttypes.h>
228  #endif  #endif
229    
# Line 243  exactly 256 items. When the character is Line 250  exactly 256 items. When the character is
250  items, some check is needed before accessing these tables.  items, some check is needed before accessing these tables.
251  */  */
252    
253  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
254    
255  typedef unsigned char pcre_uchar;  typedef unsigned char pcre_uchar;
256  #define IN_UCHARS(x) (x)  #define IN_UCHARS(x) (x)
257  #define MAX_255(c) 1  #define MAX_255(c) 1
258  #define TABLE_GET(c, table, default) ((table)[c])  #define TABLE_GET(c, table, default) ((table)[c])
259    
260  #else  #elif defined COMPILE_PCRE16
261    
 #ifdef COMPILE_PCRE16  
262  #if USHRT_MAX != 65535  #if USHRT_MAX != 65535
263  /* This is a warning message. Change PCRE_UCHAR16 to a 16 bit data type in  /* This is a warning message. Change PCRE_UCHAR16 to a 16 bit data type in
264  pcre.h(.in) and disable (comment out) this message. */  pcre.h(.in) and disable (comment out) this message. */
# Line 260  pcre.h(.in) and disable (comment out) th Line 266  pcre.h(.in) and disable (comment out) th
266  #endif  #endif
267    
268  typedef pcre_uint16 pcre_uchar;  typedef pcre_uint16 pcre_uchar;
269  #define IN_UCHARS(x) ((x) << 1)  #define UCHAR_SHIFT (1)
270    #define IN_UCHARS(x) ((x) << UCHAR_SHIFT)
271    #define MAX_255(c) ((c) <= 255u)
272    #define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
273    
274    #elif defined COMPILE_PCRE32
275    
276    typedef pcre_uint32 pcre_uchar;
277    #define UCHAR_SHIFT (2)
278    #define IN_UCHARS(x) ((x) << UCHAR_SHIFT)
279  #define MAX_255(c) ((c) <= 255u)  #define MAX_255(c) ((c) <= 255u)
280  #define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))  #define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
281    
282    /* Assert that pcre_uchar32 is a 32-bit type */
283    typedef int __assert_pcre_uchar32_size[sizeof(pcre_uchar) == 4 ? 1 : -1];
284    
285  #else  #else
286  #error Unsupported compiling mode  #error Unsupported compiling mode
287  #endif /* COMPILE_PCRE16 */  #endif /* COMPILE_PCRE[8|16|32] */
   
 #endif /* COMPILE_PCRE8 */  
288    
289  /* This is an unsigned int value that no character can ever have. UTF-8  /* This is an unsigned int value that no character can ever have. UTF-8
290  characters only go up to 0x7fffffff (though Unicode doesn't go beyond  characters only go up to 0x7fffffff (though Unicode doesn't go beyond
# Line 295  start/end of string field names are. */ Line 311  start/end of string field names are. */
311         &(NLBLOCK->nllen), utf)) \         &(NLBLOCK->nllen), utf)) \
312      : \      : \
313      ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \      ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
314       (p)[0] == NLBLOCK->nl[0] && \       RAWUCHARTEST(p) == NLBLOCK->nl[0] && \
315       (NLBLOCK->nllen == 1 || (p)[1] == NLBLOCK->nl[1]) \       (NLBLOCK->nllen == 1 || RAWUCHARTEST(p+1) == NLBLOCK->nl[1])       \
316      ) \      ) \
317    )    )
318    
# Line 309  start/end of string field names are. */ Line 325  start/end of string field names are. */
325         &(NLBLOCK->nllen), utf)) \         &(NLBLOCK->nllen), utf)) \
326      : \      : \
327      ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \      ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
328       (p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \       RAWUCHARTEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] &&              \
329       (NLBLOCK->nllen == 1 || (p)[-NLBLOCK->nllen+1] == NLBLOCK->nl[1]) \       (NLBLOCK->nllen == 1 || RAWUCHARTEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \
330      ) \      ) \
331    )    )
332    
# Line 396  The macros are controlled by the value o Line 412  The macros are controlled by the value o
412  the config.h file, but can be overridden by using -D on the command line. This  the config.h file, but can be overridden by using -D on the command line. This
413  is automated on Unix systems via the "configure" command. */  is automated on Unix systems via the "configure" command. */
414    
415  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
416    
417  #if LINK_SIZE == 2  #if LINK_SIZE == 2
418    
# Line 441  is automated on Unix systems via the "co Line 457  is automated on Unix systems via the "co
457  #error LINK_SIZE must be either 2, 3, or 4  #error LINK_SIZE must be either 2, 3, or 4
458  #endif  #endif
459    
460  #else /* COMPILE_PCRE8 */  #elif defined COMPILE_PCRE16
   
 #ifdef COMPILE_PCRE16  
461    
462  #if LINK_SIZE == 2  #if LINK_SIZE == 2
463    
464    /* Redefine LINK_SIZE as a multiple of sizeof(pcre_uchar) */
465  #undef LINK_SIZE  #undef LINK_SIZE
466  #define LINK_SIZE 1  #define LINK_SIZE 1
467    
# Line 460  is automated on Unix systems via the "co Line 475  is automated on Unix systems via the "co
475    
476  #elif LINK_SIZE == 3 || LINK_SIZE == 4  #elif LINK_SIZE == 3 || LINK_SIZE == 4
477    
478    /* Redefine LINK_SIZE as a multiple of sizeof(pcre_uchar) */
479  #undef LINK_SIZE  #undef LINK_SIZE
480  #define LINK_SIZE 2  #define LINK_SIZE 2
481    
# Line 477  is automated on Unix systems via the "co Line 493  is automated on Unix systems via the "co
493  #error LINK_SIZE must be either 2, 3, or 4  #error LINK_SIZE must be either 2, 3, or 4
494  #endif  #endif
495    
496    #elif defined COMPILE_PCRE32
497    
498    /* Only supported LINK_SIZE is 4 */
499    /* Redefine LINK_SIZE as a multiple of sizeof(pcre_uchar) */
500    #undef LINK_SIZE
501    #define LINK_SIZE 1
502    
503    #define PUT(a,n,d)   \
504      (a[n] = (d))
505    
506    #define GET(a,n) \
507      (a[n])
508    
509    /* Keep it positive */
510    #define MAX_PATTERN_SIZE (1 << 30)
511    
512  #else  #else
513  #error Unsupported compiling mode  #error Unsupported compiling mode
514  #endif /* COMPILE_PCRE16 */  #endif /* COMPILE_PCRE[8|16|32] */
   
 #endif /* COMPILE_PCRE8 */  
515    
516  /* Convenience macro defined in terms of the others */  /* Convenience macro defined in terms of the others */
517    
# Line 492  is automated on Unix systems via the "co Line 522  is automated on Unix systems via the "co
522  offsets changes. There are used for repeat counts and for other things such as  offsets changes. There are used for repeat counts and for other things such as
523  capturing parenthesis numbers in back references. */  capturing parenthesis numbers in back references. */
524    
525  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
526    
527  #define IMM2_SIZE 2  #define IMM2_SIZE 2
528    
529  #define PUT2(a,n,d)   \  #define PUT2(a,n,d)   \
530    a[n] = (d) >> 8; \    a[n] = (d) >> 8; \
531    a[(n)+1] = (d) & 255    a[(n)+1] = (d) & 255
532    
533    /* For reasons that I do not understand, the expression in this GET2 macro is
534    treated by gcc as a signed expression, even when a is declared as unsigned. It
535    seems that any kind of arithmetic results in a signed value. */
536    
537  #define GET2(a,n) \  #define GET2(a,n) \
538    (((a)[n] << 8) | (a)[(n)+1])    (unsigned int)(((a)[n] << 8) | (a)[(n)+1])
539    
540  #else /* COMPILE_PCRE8 */  #elif defined COMPILE_PCRE16
541    
542  #ifdef COMPILE_PCRE16  #define IMM2_SIZE 1
543    
544    #define PUT2(a,n,d)   \
545       a[n] = d
546    
547    #define GET2(a,n) \
548       a[n]
549    
550    #elif defined COMPILE_PCRE32
551    
552  #define IMM2_SIZE 1  #define IMM2_SIZE 1
553    
# Line 517  capturing parenthesis numbers in back re Line 559  capturing parenthesis numbers in back re
559    
560  #else  #else
561  #error Unsupported compiling mode  #error Unsupported compiling mode
562  #endif /* COMPILE_PCRE16 */  #endif /* COMPILE_PCRE[8|16|32] */
   
 #endif /* COMPILE_PCRE8 */  
563    
564  #define PUT2INC(a,n,d)  PUT2(a,n,d), a += IMM2_SIZE  #define PUT2INC(a,n,d)  PUT2(a,n,d), a += IMM2_SIZE
565    
566    /* The maximum length of a MARK name is currently one data unit; it may be
567    changed in future to be a fixed number of bytes or to depend on LINK_SIZE. */
568    
569    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
570    #define MAX_MARK ((1u << 16) - 1)
571    #else
572    #define MAX_MARK ((1u << 8) - 1)
573    #endif
574    
575  /* When UTF encoding is being used, a character is no longer just a single  /* When UTF encoding is being used, a character is no longer just a single
576  character. The macros for character handling generate simple sequences when  byte. The macros for character handling generate simple sequences when used in
577  used in character-mode, and more complicated ones for UTF characters.  character-mode, and more complicated ones for UTF characters. GETCHARLENTEST
578  GETCHARLENTEST and other macros are not used when UTF is not supported,  and other macros are not used when UTF is not supported, so they are not
579  so they are not defined. To make sure they can never even appear when  defined. To make sure they can never even appear when UTF support is omitted,
580  UTF support is omitted, we don't even define them. */  we don't even define them. */
581    
582  #ifndef SUPPORT_UTF  #ifndef SUPPORT_UTF
583    
# Line 541  UTF support is omitted, we don't even de Line 590  UTF support is omitted, we don't even de
590  #define GETCHARINC(c, eptr) c = *eptr++;  #define GETCHARINC(c, eptr) c = *eptr++;
591  #define GETCHARINCTEST(c, eptr) c = *eptr++;  #define GETCHARINCTEST(c, eptr) c = *eptr++;
592  #define GETCHARLEN(c, eptr, len) c = *eptr;  #define GETCHARLEN(c, eptr, len) c = *eptr;
593    #define RAWUCHAR(eptr) (*(eptr))
594    #define RAWUCHARINC(eptr) (*(eptr)++)
595    #define RAWUCHARTEST(eptr) (*(eptr))
596    #define RAWUCHARINCTEST(eptr) (*(eptr)++)
597  /* #define GETCHARLENTEST(c, eptr, len) */  /* #define GETCHARLENTEST(c, eptr, len) */
598  /* #define BACKCHAR(eptr) */  /* #define BACKCHAR(eptr) */
599  /* #define FORWARDCHAR(eptr) */  /* #define FORWARDCHAR(eptr) */
# Line 548  UTF support is omitted, we don't even de Line 601  UTF support is omitted, we don't even de
601    
602  #else   /* SUPPORT_UTF */  #else   /* SUPPORT_UTF */
603    
 #ifdef COMPILE_PCRE8  
   
 /* These macros were originally written in the form of loops that used data  
 from the tables whose names start with PRIV(utf8_table). They were rewritten by  
 a user so as not to use loops, because in some environments this gives a  
 significant performance advantage, and it seems never to do any harm. */  
   
 /* Tells the biggest code point which can be encoded as a single character. */  
   
 #define MAX_VALUE_FOR_SINGLE_CHAR 127  
   
604  /* Tests whether the code point needs extra characters to decode. */  /* Tests whether the code point needs extra characters to decode. */
605    
606  #define HAS_EXTRALEN(c) ((c) >= 0xc0)  #define HASUTF8EXTRALEN(c) ((c) >= 0xc0)
   
 /* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.  
 Otherwise it has an undefined behaviour. */  
   
 #define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])  
   
 /* Returns TRUE, if the given character is not the first character  
 of a UTF sequence. */  
   
 #define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80)  
607    
608  /* Base macro to pick up the remaining bytes of a UTF-8 character, not  /* Base macro to pick up the remaining bytes of a UTF-8 character, not
609  advancing the pointer. */  advancing the pointer. */
# Line 595  advancing the pointer. */ Line 627  advancing the pointer. */
627            ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \            ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \
628      }      }
629    
 /* Get the next UTF-8 character, not advancing the pointer. This is called when  
 we know we are in UTF-8 mode. */  
   
 #define GETCHAR(c, eptr) \  
   c = *eptr; \  
   if (c >= 0xc0) GETUTF8(c, eptr);  
   
 /* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the  
 pointer. */  
   
 #define GETCHARTEST(c, eptr) \  
   c = *eptr; \  
   if (utf && c >= 0xc0) GETUTF8(c, eptr);  
   
630  /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing  /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing
631  the pointer. */  the pointer. */
632    
# Line 643  the pointer. */ Line 661  the pointer. */
661        } \        } \
662      }      }
663    
664    #if defined COMPILE_PCRE8
665    
666    /* These macros were originally written in the form of loops that used data
667    from the tables whose names start with PRIV(utf8_table). They were rewritten by
668    a user so as not to use loops, because in some environments this gives a
669    significant performance advantage, and it seems never to do any harm. */
670    
671    /* Tells the biggest code point which can be encoded as a single character. */
672    
673    #define MAX_VALUE_FOR_SINGLE_CHAR 127
674    
675    /* Tests whether the code point needs extra characters to decode. */
676    
677    #define HAS_EXTRALEN(c) ((c) >= 0xc0)
678    
679    /* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
680    Otherwise it has an undefined behaviour. */
681    
682    #define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])
683    
684    /* Returns TRUE, if the given character is not the first character
685    of a UTF sequence. */
686    
687    #define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80)
688    
689    /* Get the next UTF-8 character, not advancing the pointer. This is called when
690    we know we are in UTF-8 mode. */
691    
692    #define GETCHAR(c, eptr) \
693      c = *eptr; \
694      if (c >= 0xc0) GETUTF8(c, eptr);
695    
696    /* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
697    pointer. */
698    
699    #define GETCHARTEST(c, eptr) \
700      c = *eptr; \
701      if (utf && c >= 0xc0) GETUTF8(c, eptr);
702    
703  /* Get the next UTF-8 character, advancing the pointer. This is called when we  /* Get the next UTF-8 character, advancing the pointer. This is called when we
704  know we are in UTF-8 mode. */  know we are in UTF-8 mode. */
705    
# Line 709  do not know if we are in UTF-8 mode. */ Line 766  do not know if we are in UTF-8 mode. */
766    c = *eptr; \    c = *eptr; \
767    if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len);    if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len);
768    
769    /* Returns the next uchar, not advancing the pointer. This is called when
770    we know we are in UTF mode. */
771    
772    #define RAWUCHAR(eptr) \
773      (*(eptr))
774    
775    /* Returns the next uchar, advancing the pointer. This is called when
776    we know we are in UTF mode. */
777    
778    #define RAWUCHARINC(eptr) \
779      (*(eptr)++)
780    
781    /* Returns the next uchar, testing for UTF mode, and not advancing the
782    pointer. */
783    
784    #define RAWUCHARTEST(eptr) \
785      (*(eptr))
786    
787    /* Returns the next uchar, testing for UTF mode, advancing the
788    pointer. */
789    
790    #define RAWUCHARINCTEST(eptr) \
791      (*(eptr)++)
792    
793  /* If the pointer is not at the start of a character, move it back until  /* If the pointer is not at the start of a character, move it back until
794  it is. This is called only in UTF-8 mode - we don't put a test within the macro  it is. This is called only in UTF-8 mode - we don't put a test within the macro
795  because almost all calls are already within a block of UTF-8 only code. */  because almost all calls are already within a block of UTF-8 only code. */
# Line 722  because almost all calls are already wit Line 803  because almost all calls are already wit
803  #define ACROSSCHAR(condition, eptr, action) \  #define ACROSSCHAR(condition, eptr, action) \
804    while((condition) && ((eptr) & 0xc0) == 0x80) action    while((condition) && ((eptr) & 0xc0) == 0x80) action
805    
806  #else /* COMPILE_PCRE8 */  #elif defined COMPILE_PCRE16
   
 #ifdef COMPILE_PCRE16  
807    
808  /* Tells the biggest code point which can be encoded as a single character. */  /* Tells the biggest code point which can be encoded as a single character. */
809    
# Line 806  we do not know if we are in UTF-16 mode. Line 885  we do not know if we are in UTF-16 mode.
885    c = *eptr; \    c = *eptr; \
886    if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);    if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);
887    
888    /* Returns the next uchar, not advancing the pointer. This is called when
889    we know we are in UTF mode. */
890    
891    #define RAWUCHAR(eptr) \
892      (*(eptr))
893    
894    /* Returns the next uchar, advancing the pointer. This is called when
895    we know we are in UTF mode. */
896    
897    #define RAWUCHARINC(eptr) \
898      (*(eptr)++)
899    
900    /* Returns the next uchar, testing for UTF mode, and not advancing the
901    pointer. */
902    
903    #define RAWUCHARTEST(eptr) \
904      (*(eptr))
905    
906    /* Returns the next uchar, testing for UTF mode, advancing the
907    pointer. */
908    
909    #define RAWUCHARINCTEST(eptr) \
910      (*(eptr)++)
911    
912  /* If the pointer is not at the start of a character, move it back until  /* If the pointer is not at the start of a character, move it back until
913  it is. This is called only in UTF-16 mode - we don't put a test within the  it is. This is called only in UTF-16 mode - we don't put a test within the
914  macro because almost all calls are already within a block of UTF-16 only  macro because almost all calls are already within a block of UTF-16 only
# Line 820  code. */ Line 923  code. */
923  #define ACROSSCHAR(condition, eptr, action) \  #define ACROSSCHAR(condition, eptr, action) \
924    if ((condition) && ((eptr) & 0xfc00) == 0xdc00) action    if ((condition) && ((eptr) & 0xfc00) == 0xdc00) action
925    
926  #endif  #elif defined COMPILE_PCRE32
927    
928  #endif /* COMPILE_PCRE8 */  /* These are trivial for the 32-bit library, since all UTF-32 characters fit
929    into one pcre_uchar unit. */
930    #define MAX_VALUE_FOR_SINGLE_CHAR (0x10ffffu)
931    #define HAS_EXTRALEN(c) (0)
932    #define GET_EXTRALEN(c) (0)
933    #define NOT_FIRSTCHAR(c) (0)
934    
935    #define UTF32_MASK (0x1fffffu)
936    
937    /* Get the next UTF-32 character, not advancing the pointer. This is called when
938    we know we are in UTF-32 mode. */
939    
940    #define GETCHAR(c, eptr) \
941      c = (*eptr) & UTF32_MASK;
942    
943    /* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the
944    pointer. */
945    
946    #define GETCHARTEST(c, eptr) \
947      c = *eptr; \
948      if (utf) c &= UTF32_MASK;
949    
950    /* Get the next UTF-32 character, advancing the pointer. This is called when we
951    know we are in UTF-32 mode. */
952    
953    #define GETCHARINC(c, eptr) \
954      c = (*eptr++) & UTF32_MASK;
955    
956    /* Get the next character, testing for UTF-32 mode, and advancing the pointer.
957    This is called when we don't know if we are in UTF-32 mode. */
958    
959    #define GETCHARINCTEST(c, eptr) \
960      c = *eptr++; \
961      if (utf) c &= UTF32_MASK;
962    
963    /* Get the next UTF-32 character, not advancing the pointer, not incrementing
964    length (since all UTF-32 is of length 1). This is called when we know we are in
965    UTF-32 mode. */
966    
967    #define GETCHARLEN(c, eptr, len) \
968      GETCHAR(c, eptr)
969    
970    /* Get the next UTF-32character, testing for UTF-32 mode, not advancing the
971    pointer, not incrementing the length (since all UTF-32 is of length 1).
972    This is called when we do not know if we are in UTF-32 mode. */
973    
974    #define GETCHARLENTEST(c, eptr, len) \
975      GETCHARTEST(c, eptr)
976    
977    /* Returns the next uchar, not advancing the pointer. This is called when
978    we know we are in UTF mode. */
979    
980    #define RAWUCHAR(eptr) \
981      (*(eptr) & UTF32_MASK)
982    
983    /* Returns the next uchar, advancing the pointer. This is called when
984    we know we are in UTF mode. */
985    
986    #define RAWUCHARINC(eptr) \
987      (*(eptr)++ & UTF32_MASK)
988    
989    /* Returns the next uchar, testing for UTF mode, and not advancing the
990    pointer. */
991    
992    #define RAWUCHARTEST(eptr) \
993      (utf ? (*(eptr) & UTF32_MASK) : *(eptr))
994    
995    /* Returns the next uchar, testing for UTF mode, advancing the
996    pointer. */
997    
998    #define RAWUCHARINCTEST(eptr) \
999      (utf ? (*(eptr)++ & UTF32_MASK) : *(eptr)++)
1000    
1001    /* If the pointer is not at the start of a character, move it back until
1002    it is. This is called only in UTF-32 mode - we don't put a test within the
1003    macro because almost all calls are already within a block of UTF-32 only
1004    code.
1005    These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */
1006    
1007    #define BACKCHAR(eptr) do { } while (0)
1008    
1009    /* Same as above, just in the other direction. */
1010    #define FORWARDCHAR(eptr) do { } while (0)
1011    
1012    /* Same as above, but it allows a fully customizable form. */
1013    #define ACROSSCHAR(condition, eptr, action) do { } while (0)
1014    
1015    #else
1016    #error Unsupported compiling mode
1017    #endif /* COMPILE_PCRE[8|16|32] */
1018    
1019  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
1020    
1021    /* Tests for Unicode horizontal and vertical whitespace characters must check a
1022    number of different values. Using a switch statement for this generates the
1023    fastest code (no loop, no memory access), and there are several places in the
1024    interpreter code where this happens. In order to ensure that all the case lists
1025    remain in step, we use macros so that there is only one place where the lists
1026    are defined.
1027    
1028    These values are also required as lists in pcre_compile.c when processing \h,
1029    \H, \v and \V in a character class. The lists are defined in pcre_tables.c, but
1030    macros that define the values are here so that all the definitions are
1031    together. The lists must be in ascending character order, terminated by
1032    NOTACHAR (which is 0xffffffff).
1033    
1034    Any changes should ensure that the various macros are kept in step with each
1035    other. NOTE: The values also appear in pcre_jit_compile.c. */
1036    
1037    /* ------ ASCII/Unicode environments ------ */
1038    
1039    #ifndef EBCDIC
1040    
1041    #define HSPACE_LIST \
1042      CHAR_HT, CHAR_SPACE, 0xa0, \
1043      0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \
1044      0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202f, 0x205f, 0x3000, \
1045      NOTACHAR
1046    
1047    #define HSPACE_MULTIBYTE_CASES \
1048      case 0x1680:  /* OGHAM SPACE MARK */ \
1049      case 0x180e:  /* MONGOLIAN VOWEL SEPARATOR */ \
1050      case 0x2000:  /* EN QUAD */ \
1051      case 0x2001:  /* EM QUAD */ \
1052      case 0x2002:  /* EN SPACE */ \
1053      case 0x2003:  /* EM SPACE */ \
1054      case 0x2004:  /* THREE-PER-EM SPACE */ \
1055      case 0x2005:  /* FOUR-PER-EM SPACE */ \
1056      case 0x2006:  /* SIX-PER-EM SPACE */ \
1057      case 0x2007:  /* FIGURE SPACE */ \
1058      case 0x2008:  /* PUNCTUATION SPACE */ \
1059      case 0x2009:  /* THIN SPACE */ \
1060      case 0x200A:  /* HAIR SPACE */ \
1061      case 0x202f:  /* NARROW NO-BREAK SPACE */ \
1062      case 0x205f:  /* MEDIUM MATHEMATICAL SPACE */ \
1063      case 0x3000   /* IDEOGRAPHIC SPACE */
1064    
1065    #define HSPACE_BYTE_CASES \
1066      case CHAR_HT: \
1067      case CHAR_SPACE: \
1068      case 0xa0     /* NBSP */
1069    
1070    #define HSPACE_CASES \
1071      HSPACE_BYTE_CASES: \
1072      HSPACE_MULTIBYTE_CASES
1073    
1074    #define VSPACE_LIST \
1075      CHAR_LF, CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, 0x2028, 0x2029, NOTACHAR
1076    
1077    #define VSPACE_MULTIBYTE_CASES \
1078      case 0x2028:    /* LINE SEPARATOR */ \
1079      case 0x2029     /* PARAGRAPH SEPARATOR */
1080    
1081    #define VSPACE_BYTE_CASES \
1082      case CHAR_LF: \
1083      case CHAR_VT: \
1084      case CHAR_FF: \
1085      case CHAR_CR: \
1086      case CHAR_NEL
1087    
1088    #define VSPACE_CASES \
1089      VSPACE_BYTE_CASES: \
1090      VSPACE_MULTIBYTE_CASES
1091    
1092  /* In case there is no definition of offsetof() provided - though any proper  /* ------ EBCDIC environments ------ */
1093  Standard C system should have one. */  
1094    #else
1095    #define HSPACE_LIST CHAR_HT, CHAR_SPACE
1096    
1097    #define HSPACE_BYTE_CASES \
1098      case CHAR_HT: \
1099      case CHAR_SPACE
1100    
1101    #define HSPACE_CASES HSPACE_BYTE_CASES
1102    
1103    #ifdef EBCDIC_NL25
1104    #define VSPACE_LIST \
1105      CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, CHAR_LF, NOTACHAR
1106    #else
1107    #define VSPACE_LIST \
1108      CHAR_VT, CHAR_FF, CHAR_CR, CHAR_LF, CHAR_NEL, NOTACHAR
1109    #endif
1110    
1111    #define VSPACE_BYTE_CASES \
1112      case CHAR_LF: \
1113      case CHAR_VT: \
1114      case CHAR_FF: \
1115      case CHAR_CR: \
1116      case CHAR_NEL
1117    
1118    #define VSPACE_CASES VSPACE_BYTE_CASES
1119    #endif  /* EBCDIC */
1120    
1121    /* ------ End of whitespace macros ------ */
1122    
 #ifndef offsetof  
 #define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field))  
 #endif  
1123    
1124    
1125  /* Private flags containing information about the compiled regex. They used to  /* Private flags containing information about the compiled regex. They used to
# Line 841  are in a 16-bit flags word. From release Line 1128  are in a 16-bit flags word. From release
1128  the restrictions on partial matching have been lifted. It remains for backwards  the restrictions on partial matching have been lifted. It remains for backwards
1129  compatibility. */  compatibility. */
1130    
1131  #ifdef COMPILE_PCRE8  #define PCRE_MODE8         0x0001  /* compiled in 8 bit mode */
1132  #define PCRE_MODE          0x0001  /* compiled in 8 bit mode */  #define PCRE_MODE16        0x0002  /* compiled in 16 bit mode */
1133  #endif  #define PCRE_MODE32        0x0004  /* compiled in 32 bit mode */
 #ifdef COMPILE_PCRE16  
 #define PCRE_MODE          0x0002  /* compiled in 16 bit mode */  
 #endif  
1134  #define PCRE_FIRSTSET      0x0010  /* first_char is set */  #define PCRE_FIRSTSET      0x0010  /* first_char is set */
1135  #define PCRE_FCH_CASELESS  0x0020  /* caseless first char */  #define PCRE_FCH_CASELESS  0x0020  /* caseless first char */
1136  #define PCRE_REQCHSET      0x0040  /* req_byte is set */  #define PCRE_REQCHSET      0x0040  /* req_byte is set */
# Line 857  compatibility. */ Line 1141  compatibility. */
1141  #define PCRE_HASCRORLF     0x0800  /* explicit \r or \n in pattern */  #define PCRE_HASCRORLF     0x0800  /* explicit \r or \n in pattern */
1142  #define PCRE_HASTHEN       0x1000  /* pattern contains (*THEN) */  #define PCRE_HASTHEN       0x1000  /* pattern contains (*THEN) */
1143    
1144    #if defined COMPILE_PCRE8
1145    #define PCRE_MODE          PCRE_MODE8
1146    #elif defined COMPILE_PCRE16
1147    #define PCRE_MODE          PCRE_MODE16
1148    #elif defined COMPILE_PCRE32
1149    #define PCRE_MODE          PCRE_MODE32
1150    #endif
1151    #define PCRE_MODE_MASK     (PCRE_MODE8 | PCRE_MODE16 | PCRE_MODE32)
1152    
1153  /* Flags for the "extra" block produced by pcre_study(). */  /* Flags for the "extra" block produced by pcre_study(). */
1154    
1155  #define PCRE_STUDY_MAPPED  0x0001  /* a map of starting chars exists */  #define PCRE_STUDY_MAPPED  0x0001  /* a map of starting chars exists */
# Line 888  time, run time, or study time, respectiv Line 1181  time, run time, or study time, respectiv
1181    
1182  #define PUBLIC_STUDY_OPTIONS \  #define PUBLIC_STUDY_OPTIONS \
1183     (PCRE_STUDY_JIT_COMPILE|PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE| \     (PCRE_STUDY_JIT_COMPILE|PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE| \
1184      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE|PCRE_STUDY_EXTRA_NEEDED)
1185    
1186    #define PUBLIC_JIT_EXEC_OPTIONS \
1187       (PCRE_NO_UTF8_CHECK|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|\
1188        PCRE_NOTEMPTY_ATSTART|PCRE_PARTIAL_SOFT|PCRE_PARTIAL_HARD)
1189    
1190  /* Magic number to provide a small check against being handed junk. */  /* Magic number to provide a small check against being handed junk. */
1191    
# Line 899  in different endianness. */ Line 1196  in different endianness. */
1196    
1197  #define REVERSED_MAGIC_NUMBER  0x45524350UL   /* 'ERCP' */  #define REVERSED_MAGIC_NUMBER  0x45524350UL   /* 'ERCP' */
1198    
 /* Negative values for the firstchar and reqchar variables */  
   
 #define REQ_UNSET (-2)  
 #define REQ_NONE  (-1)  
   
1199  /* The maximum remaining length of subject we are prepared to search for a  /* The maximum remaining length of subject we are prepared to search for a
1200  req_byte match. */  req_byte match. */
1201    
# Line 940  macros to give the functions distinct na Line 1232  macros to give the functions distinct na
1232  #ifndef SUPPORT_UTF  #ifndef SUPPORT_UTF
1233    
1234  /* UTF-8 support is not enabled; use the platform-dependent character literals  /* UTF-8 support is not enabled; use the platform-dependent character literals
1235  so that PCRE works on both ASCII and EBCDIC platforms, in non-UTF-mode only. */  so that PCRE works in both ASCII and EBCDIC environments, but only in non-UTF
1236    mode. Newline characters are problematic in EBCDIC. Though it has CR and LF
1237    characters, a common practice has been to use its NL (0x15) character as the
1238    line terminator in C-like processing environments. However, sometimes the LF
1239    (0x25) character is used instead, according to this Unicode document:
1240    
1241    http://unicode.org/standard/reports/tr13/tr13-5.html
1242    
1243    PCRE defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25
1244    instead. Whichever is *not* chosen is defined as NEL.
1245    
1246    In both ASCII and EBCDIC environments, CHAR_NL and CHAR_LF are synonyms for the
1247    same code point. */
1248    
1249    #ifdef EBCDIC
1250    
1251    #ifndef EBCDIC_NL25
1252    #define CHAR_NL                     '\x15'
1253    #define CHAR_NEL                    '\x25'
1254    #define STR_NL                      "\x15"
1255    #define STR_NEL                     "\x25"
1256    #else
1257    #define CHAR_NL                     '\x25'
1258    #define CHAR_NEL                    '\x15'
1259    #define STR_NL                      "\x25"
1260    #define STR_NEL                     "\x15"
1261    #endif
1262    
1263    #define CHAR_LF                     CHAR_NL
1264    #define STR_LF                      STR_NL
1265    
1266    #define CHAR_ESC                    '\047'
1267    #define CHAR_DEL                    '\007'
1268    #define STR_ESC                     "\047"
1269    #define STR_DEL                     "\007"
1270    
1271    #else  /* Not EBCDIC */
1272    
1273    /* In ASCII/Unicode, linefeed is '\n' and we equate this to NL for
1274    compatibility. NEL is the Unicode newline character; make sure it is
1275    a positive value. */
1276    
1277    #define CHAR_LF                     '\n'
1278    #define CHAR_NL                     CHAR_LF
1279    #define CHAR_NEL                    ((unsigned char)'\x85')
1280    #define CHAR_ESC                    '\033'
1281    #define CHAR_DEL                    '\177'
1282    
1283    #define STR_LF                      "\n"
1284    #define STR_NL                      STR_LF
1285    #define STR_NEL                     "\x85"
1286    #define STR_ESC                     "\033"
1287    #define STR_DEL                     "\177"
1288    
1289    #endif  /* EBCDIC */
1290    
1291    /* The remaining definitions work in both environments. */
1292    
1293    #define CHAR_NULL                   '\0'
1294  #define CHAR_HT                     '\t'  #define CHAR_HT                     '\t'
1295  #define CHAR_VT                     '\v'  #define CHAR_VT                     '\v'
1296  #define CHAR_FF                     '\f'  #define CHAR_FF                     '\f'
1297  #define CHAR_CR                     '\r'  #define CHAR_CR                     '\r'
 #define CHAR_NL                     '\n'  
1298  #define CHAR_BS                     '\b'  #define CHAR_BS                     '\b'
1299  #define CHAR_BEL                    '\a'  #define CHAR_BEL                    '\a'
 #ifdef EBCDIC  
 #define CHAR_ESC                    '\047'  
 #define CHAR_DEL                    '\007'  
 #else  
 #define CHAR_ESC                    '\033'  
 #define CHAR_DEL                    '\177'  
 #endif  
1300    
1301  #define CHAR_SPACE                  ' '  #define CHAR_SPACE                  ' '
1302  #define CHAR_EXCLAMATION_MARK       '!'  #define CHAR_EXCLAMATION_MARK       '!'
# Line 1057  so that PCRE works on both ASCII and EBC Line 1398  so that PCRE works on both ASCII and EBC
1398  #define STR_VT                      "\v"  #define STR_VT                      "\v"
1399  #define STR_FF                      "\f"  #define STR_FF                      "\f"
1400  #define STR_CR                      "\r"  #define STR_CR                      "\r"
 #define STR_NL                      "\n"  
1401  #define STR_BS                      "\b"  #define STR_BS                      "\b"
1402  #define STR_BEL                     "\a"  #define STR_BEL                     "\a"
 #ifdef EBCDIC  
 #define STR_ESC                     "\047"  
 #define STR_DEL                     "\007"  
 #else  
 #define STR_ESC                     "\033"  
 #define STR_DEL                     "\177"  
 #endif  
1403    
1404  #define STR_SPACE                   " "  #define STR_SPACE                   " "
1405  #define STR_EXCLAMATION_MARK        "!"  #define STR_EXCLAMATION_MARK        "!"
# Line 1203  so that PCRE works on both ASCII and EBC Line 1536  so that PCRE works on both ASCII and EBC
1536  #ifdef COMPILE_PCRE16  #ifdef COMPILE_PCRE16
1537  #define STRING_UTF_RIGHTPAR            "UTF16)"  #define STRING_UTF_RIGHTPAR            "UTF16)"
1538  #endif  #endif
1539    #ifdef COMPILE_PCRE32
1540    #define STRING_UTF_RIGHTPAR            "UTF32)"
1541    #endif
1542  #define STRING_UCP_RIGHTPAR            "UCP)"  #define STRING_UCP_RIGHTPAR            "UCP)"
1543  #define STRING_NO_START_OPT_RIGHTPAR   "NO_START_OPT)"  #define STRING_NO_START_OPT_RIGHTPAR   "NO_START_OPT)"
1544    
# Line 1216  only. */ Line 1552  only. */
1552  #define CHAR_VT                     '\013'  #define CHAR_VT                     '\013'
1553  #define CHAR_FF                     '\014'  #define CHAR_FF                     '\014'
1554  #define CHAR_CR                     '\015'  #define CHAR_CR                     '\015'
1555  #define CHAR_NL                     '\012'  #define CHAR_LF                     '\012'
1556    #define CHAR_NL                     CHAR_LF
1557    #define CHAR_NEL                    ((unsigned char)'\x85')
1558  #define CHAR_BS                     '\010'  #define CHAR_BS                     '\010'
1559  #define CHAR_BEL                    '\007'  #define CHAR_BEL                    '\007'
1560  #define CHAR_ESC                    '\033'  #define CHAR_ESC                    '\033'
1561  #define CHAR_DEL                    '\177'  #define CHAR_DEL                    '\177'
1562    
1563    #define CHAR_NULL                   '\0'
1564  #define CHAR_SPACE                  '\040'  #define CHAR_SPACE                  '\040'
1565  #define CHAR_EXCLAMATION_MARK       '\041'  #define CHAR_EXCLAMATION_MARK       '\041'
1566  #define CHAR_QUOTATION_MARK         '\042'  #define CHAR_QUOTATION_MARK         '\042'
# Line 1463  only. */ Line 1802  only. */
1802  #ifdef COMPILE_PCRE16  #ifdef COMPILE_PCRE16
1803  #define STRING_UTF_RIGHTPAR            STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS  #define STRING_UTF_RIGHTPAR            STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
1804  #endif  #endif
1805    #ifdef COMPILE_PCRE32
1806    #define STRING_UTF_RIGHTPAR            STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
1807    #endif
1808  #define STRING_UCP_RIGHTPAR            STR_U STR_C STR_P STR_RIGHT_PARENTHESIS  #define STRING_UCP_RIGHTPAR            STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
1809  #define STRING_NO_START_OPT_RIGHTPAR   STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS  #define STRING_NO_START_OPT_RIGHTPAR   STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
1810    
# Line 1479  only. */ Line 1821  only. */
1821  #endif  #endif
1822    
1823  #ifndef ESC_n  #ifndef ESC_n
1824  #define ESC_n CHAR_NL  #define ESC_n CHAR_LF
1825  #endif  #endif
1826    
1827  #ifndef ESC_r  #ifndef ESC_r
# Line 1504  only. */ Line 1846  only. */
1846  #define PT_SPACE      6    /* Perl space - Z plus 9,10,12,13 */  #define PT_SPACE      6    /* Perl space - Z plus 9,10,12,13 */
1847  #define PT_PXSPACE    7    /* POSIX space - Z plus 9,10,11,12,13 */  #define PT_PXSPACE    7    /* POSIX space - Z plus 9,10,11,12,13 */
1848  #define PT_WORD       8    /* Word - L plus N plus underscore */  #define PT_WORD       8    /* Word - L plus N plus underscore */
1849    #define PT_CLIST      9    /* Pseudo-property: match character list */
1850    
1851  /* Flag bits and data types for the extended class (OP_XCLASS) for classes that  /* Flag bits and data types for the extended class (OP_XCLASS) for classes that
1852  contain characters with values greater than 255. */  contain characters with values greater than 255. */
# Line 1519  contain characters with values greater t Line 1862  contain characters with values greater t
1862    
1863  /* These are escaped items that aren't just an encoding of a particular data  /* These are escaped items that aren't just an encoding of a particular data
1864  value such as \n. They must have non-zero values, as check_escape() returns  value such as \n. They must have non-zero values, as check_escape() returns
1865  their negation. Also, they must appear in the same order as in the opcode  0 for a data character.  Also, they must appear in the same order as in the opcode
1866  definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it  definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
1867  corresponds to "." in DOTALL mode rather than an escape sequence. It is also  corresponds to "." in DOTALL mode rather than an escape sequence. It is also
1868  used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In  used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In
1869  non-DOTALL mode, "." behaves like \N.  non-DOTALL mode, "." behaves like \N.
1870    
1871  The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.  The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.
1872  when PCRE_UCP is set, when replacement of \d etc by \p sequences is required.  when PCRE_UCP is set and replacement of \d etc by \p sequences is required.
1873  They must be contiguous, and remain in order so that the replacements can be  They must be contiguous, and remain in order so that the replacements can be
1874  looked up from a table.  looked up from a table.
1875    
1876  The final escape must be ESC_REF as subsequent values are used for  Negative numbers are used to encode a backreference (\1, \2, \3, etc.) in
1877  backreferences (\1, \2, \3, etc). There are two tests in the code for an escape  check_escape(). There are two tests in the code for an escape
1878  greater than ESC_b and less than ESC_Z to detect the types that may be  greater than ESC_b and less than ESC_Z to detect the types that may be
1879  repeated. These are the types that consume characters. If any new escapes are  repeated. These are the types that consume characters. If any new escapes are
1880  put in between that don't consume a character, that code will have to change.  put in between that don't consume a character, that code will have to change.
# Line 1541  enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, E Line 1884  enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, E
1884         ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,         ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,
1885         ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z,         ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z,
1886         ESC_E, ESC_Q, ESC_g, ESC_k,         ESC_E, ESC_Q, ESC_g, ESC_k,
1887         ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu,         ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu };
        ESC_REF };  
1888    
1889  /* Opcode table: Starting from 1 (i.e. after OP_END), the values up to  /* Opcode table: Starting from 1 (i.e. after OP_END), the values up to
1890  OP_EOD must correspond in order to the list of escapes immediately above.  OP_EOD must correspond in order to the list of escapes immediately above.
# Line 1568  enum { Line 1910  enum {
1910    OP_NOT_WORDCHAR,       /* 10 \W */    OP_NOT_WORDCHAR,       /* 10 \W */
1911    OP_WORDCHAR,           /* 11 \w */    OP_WORDCHAR,           /* 11 \w */
1912    
1913    OP_ANY,            /* 12 Match any character except newline */    OP_ANY,            /* 12 Match any character except newline (\N) */
1914    OP_ALLANY,         /* 13 Match any character */    OP_ALLANY,         /* 13 Match any character */
1915    OP_ANYBYTE,        /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */    OP_ANYBYTE,        /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */
1916    OP_NOTPROP,        /* 15 \P (not Unicode property) */    OP_NOTPROP,        /* 15 \P (not Unicode property) */
# Line 1579  enum { Line 1921  enum {
1921    OP_NOT_VSPACE,     /* 20 \V (not vertical whitespace) */    OP_NOT_VSPACE,     /* 20 \V (not vertical whitespace) */
1922    OP_VSPACE,         /* 21 \v (vertical whitespace) */    OP_VSPACE,         /* 21 \v (vertical whitespace) */
1923    OP_EXTUNI,         /* 22 \X (extended Unicode sequence */    OP_EXTUNI,         /* 22 \X (extended Unicode sequence */
1924    OP_EODN,           /* 23 End of data or \n at end of data: \Z. */    OP_EODN,           /* 23 End of data or \n at end of data (\Z) */
1925    OP_EOD,            /* 24 End of data: \z */    OP_EOD,            /* 24 End of data (\z) */
1926    
1927    OP_CIRC,           /* 25 Start of line - not multiline */    OP_CIRC,           /* 25 Start of line - not multiline */
1928    OP_CIRCM,          /* 26 Start of line - multiline */    OP_CIRCM,          /* 26 Start of line - multiline */
# Line 1940  enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4, Line 2282  enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,
2282         ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,         ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
2283         ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,         ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
2284         ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,         ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
2285         ERR70, ERR71, ERR72, ERR73, ERR74, ERRCOUNT };         ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERRCOUNT };
2286    
2287  /* JIT compiling modes. The function list is indexed by them. */  /* JIT compiling modes. The function list is indexed by them. */
2288  enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,  enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
# Line 1963  fields are present. Currently PCRE alway Line 2305  fields are present. Currently PCRE alway
2305  NOTE NOTE NOTE  NOTE NOTE NOTE
2306  */  */
2307    
2308  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
2309  #define REAL_PCRE real_pcre  #define REAL_PCRE real_pcre
2310  #else  #elif defined COMPILE_PCRE16
2311  #define REAL_PCRE real_pcre16  #define REAL_PCRE real_pcre16
2312    #elif defined COMPILE_PCRE32
2313    #define REAL_PCRE real_pcre32
2314  #endif  #endif
2315    
2316  typedef struct REAL_PCRE {  /* It is necessary to fork the struct for 32 bit, since it needs to use
2317     * pcre_uchar for first_char and req_char. Can't put an ifdef inside the
2318     * typedef since pcretest needs access to  the struct of the 8-, 16-
2319     * and 32-bit variants. */
2320    
2321    typedef struct real_pcre8_or_16 {
2322    pcre_uint32 magic_number;    pcre_uint32 magic_number;
2323    pcre_uint32 size;               /* Total that was malloced */    pcre_uint32 size;               /* Total that was malloced */
2324    pcre_uint32 options;            /* Public options */    pcre_uint32 options;            /* Public options */
# Line 1985  typedef struct REAL_PCRE { Line 2334  typedef struct REAL_PCRE {
2334    pcre_uint16 ref_count;          /* Reference count */    pcre_uint16 ref_count;          /* Reference count */
2335    const pcre_uint8 *tables;       /* Pointer to tables or NULL for std */    const pcre_uint8 *tables;       /* Pointer to tables or NULL for std */
2336    const pcre_uint8 *nullpad;      /* NULL padding */    const pcre_uint8 *nullpad;      /* NULL padding */
2337  } REAL_PCRE;  } real_pcre8_or_16;
2338    
2339    typedef struct real_pcre8_or_16 real_pcre;
2340    typedef struct real_pcre8_or_16 real_pcre16;
2341    
2342    typedef struct real_pcre32 {
2343      pcre_uint32 magic_number;
2344      pcre_uint32 size;               /* Total that was malloced */
2345      pcre_uint32 options;            /* Public options */
2346      pcre_uint16 flags;              /* Private flags */
2347      pcre_uint16 max_lookbehind;     /* Longest lookbehind (characters) */
2348      pcre_uint16 top_bracket;        /* Highest numbered group */
2349      pcre_uint16 top_backref;        /* Highest numbered back reference */
2350      pcre_uint32 first_char;         /* Starting character */
2351      pcre_uint32 req_char;           /* This character must be seen */
2352      pcre_uint16 name_table_offset;  /* Offset to name table that follows */
2353      pcre_uint16 name_entry_size;    /* Size of any name items */
2354      pcre_uint16 name_count;         /* Number of name items */
2355      pcre_uint16 ref_count;          /* Reference count */
2356      pcre_uint16 dummy1;             /* for later expansion */
2357      pcre_uint16 dummy2;             /* for later expansion */
2358      const pcre_uint8 *tables;       /* Pointer to tables or NULL for std */
2359      void *nullpad;                  /* for later expansion */
2360    } real_pcre32;
2361    
2362    /* Assert that the size of REAL_PCRE is divisible by 8 */
2363    typedef int __assert_real_pcre_size_divisible_8[(sizeof(REAL_PCRE) % 8) == 0 ? 1 : -1];
2364    
2365    /* Needed in pcretest to access some fields in the real_pcre* structures
2366     * directly. They're unified for 8/16/32 bits since the structs only differ
2367     * after these fields; if that ever changes, need to fork those defines into
2368     * 8/16 and 32 bit versions. */
2369    #define REAL_PCRE_MAGIC(re)     (((REAL_PCRE*)re)->magic_number)
2370    #define REAL_PCRE_SIZE(re)      (((REAL_PCRE*)re)->size)
2371    #define REAL_PCRE_OPTIONS(re)   (((REAL_PCRE*)re)->options)
2372    #define REAL_PCRE_FLAGS(re)     (((REAL_PCRE*)re)->flags)
2373    
2374  /* The format of the block used to store data from pcre_study(). The same  /* The format of the block used to store data from pcre_study(). The same
2375  remark (see NOTE above) about extending this structure applies. */  remark (see NOTE above) about extending this structure applies. */
# Line 2026  typedef struct compile_data { Line 2410  typedef struct compile_data {
2410    int  names_found;                 /* Number of entries so far */    int  names_found;                 /* Number of entries so far */
2411    int  name_entry_size;             /* Size of each entry */    int  name_entry_size;             /* Size of each entry */
2412    int  workspace_size;              /* Size of workspace */    int  workspace_size;              /* Size of workspace */
2413    int  bracount;                    /* Count of capturing parens as we compile */    unsigned int  bracount;           /* Count of capturing parens as we compile */
2414    int  final_bracount;              /* Saved value after first pass */    int  final_bracount;              /* Saved value after first pass */
2415    int  max_lookbehind;              /* Maximum lookbehind (characters) */    int  max_lookbehind;              /* Maximum lookbehind (characters) */
2416    int  top_backref;                 /* Maximum back reference */    int  top_backref;                 /* Maximum back reference */
# Line 2036  typedef struct compile_data { Line 2420  typedef struct compile_data {
2420    int  external_flags;              /* External flag bits to be set */    int  external_flags;              /* External flag bits to be set */
2421    int  req_varyopt;                 /* "After variable item" flag for reqbyte */    int  req_varyopt;                 /* "After variable item" flag for reqbyte */
2422    BOOL had_accept;                  /* (*ACCEPT) encountered */    BOOL had_accept;                  /* (*ACCEPT) encountered */
2423      BOOL had_pruneorskip;             /* (*PRUNE) or (*SKIP) encountered */
2424    BOOL check_lookbehind;            /* Lookbehinds need later checking */    BOOL check_lookbehind;            /* Lookbehinds need later checking */
2425    int  nltype;                      /* Newline type */    int  nltype;                      /* Newline type */
2426    int  nllen;                       /* Newline string length */    int  nllen;                       /* Newline string length */
# Line 2055  call within the pattern; used by pcre_ex Line 2440  call within the pattern; used by pcre_ex
2440    
2441  typedef struct recursion_info {  typedef struct recursion_info {
2442    struct recursion_info *prevrec; /* Previous recursion record (or NULL) */    struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
2443    int group_num;                  /* Number of group that was called */    unsigned int group_num;         /* Number of group that was called */
2444    int *offset_save;               /* Pointer to start of saved offsets */    int *offset_save;               /* Pointer to start of saved offsets */
2445    int saved_max;                  /* Number of saved offsets */    int saved_max;                  /* Number of saved offsets */
2446    PCRE_PUCHAR subject_position;   /* Position at start of recursion */    PCRE_PUCHAR subject_position;   /* Position at start of recursion */
# Line 2130  typedef struct match_data { Line 2515  typedef struct match_data {
2515    const  pcre_uchar *mark;        /* Mark pointer to pass back on success */    const  pcre_uchar *mark;        /* Mark pointer to pass back on success */
2516    const  pcre_uchar *nomatch_mark;/* Mark pointer to pass back on failure */    const  pcre_uchar *nomatch_mark;/* Mark pointer to pass back on failure */
2517    const  pcre_uchar *once_target; /* Where to back up to for atomic groups */    const  pcre_uchar *once_target; /* Where to back up to for atomic groups */
2518    #ifdef NO_RECURSE
2519      void  *match_frames_base;       /* For remembering malloc'd frames */
2520    #endif
2521  } match_data;  } match_data;
2522    
2523  /* A similar structure is used for the same purpose by the DFA matching  /* A similar structure is used for the same purpose by the DFA matching
# Line 2186  total length. */ Line 2574  total length. */
2574    
2575  /* Internal function and data prefixes. */  /* Internal function and data prefixes. */
2576    
2577  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
2578  #ifndef PUBL  #ifndef PUBL
2579  #define PUBL(name) pcre_##name  #define PUBL(name) pcre_##name
2580  #endif  #endif
2581  #ifndef PRIV  #ifndef PRIV
2582  #define PRIV(name) _pcre_##name  #define PRIV(name) _pcre_##name
2583  #endif  #endif
2584  #else /* COMPILE_PCRE8 */  #elif defined COMPILE_PCRE16
 #ifdef COMPILE_PCRE16  
2585  #ifndef PUBL  #ifndef PUBL
2586  #define PUBL(name) pcre16_##name  #define PUBL(name) pcre16_##name
2587  #endif  #endif
2588  #ifndef PRIV  #ifndef PRIV
2589  #define PRIV(name) _pcre16_##name  #define PRIV(name) _pcre16_##name
2590  #endif  #endif
2591    #elif defined COMPILE_PCRE32
2592    #ifndef PUBL
2593    #define PUBL(name) pcre32_##name
2594    #endif
2595    #ifndef PRIV
2596    #define PRIV(name) _pcre32_##name
2597    #endif
2598  #else  #else
2599  #error Unsupported compiling mode  #error Unsupported compiling mode
2600  #endif /* COMPILE_PCRE16 */  #endif /* COMPILE_PCRE[8|16|32] */
 #endif /* COMPILE_PCRE8 */  
2601    
2602  /* Layout of the UCP type table that translates property names into types and  /* Layout of the UCP type table that translates property names into types and
2603  codes. Each entry used to point directly to a name, but to reduce the number of  codes. Each entry used to point directly to a name, but to reduce the number of
# Line 2224  but are not part of the PCRE public API. Line 2617  but are not part of the PCRE public API.
2617  pcre_tables.c module. */  pcre_tables.c module. */
2618    
2619  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
   
2620  extern const int            PRIV(utf8_table1)[];  extern const int            PRIV(utf8_table1)[];
2621  extern const int            PRIV(utf8_table1_size);  extern const int            PRIV(utf8_table1_size);
2622  extern const int            PRIV(utf8_table2)[];  extern const int            PRIV(utf8_table2)[];
2623  extern const int            PRIV(utf8_table3)[];  extern const int            PRIV(utf8_table3)[];
2624  extern const pcre_uint8     PRIV(utf8_table4)[];  extern const pcre_uint8     PRIV(utf8_table4)[];
   
2625  #endif /* COMPILE_PCRE8 */  #endif /* COMPILE_PCRE8 */
2626    
2627  extern const char           PRIV(utt_names)[];  extern const char           PRIV(utt_names)[];
2628  extern const ucp_type_table PRIV(utt)[];  extern const ucp_type_table PRIV(utt)[];
2629  extern const int            PRIV(utt_size);  extern const int            PRIV(utt_size);
2630    
2631    extern const pcre_uint8     PRIV(OP_lengths)[];
2632  extern const pcre_uint8     PRIV(default_tables)[];  extern const pcre_uint8     PRIV(default_tables)[];
2633    
2634  extern const pcre_uint8     PRIV(OP_lengths)[];  extern const pcre_uint32    PRIV(hspace_list)[];
2635    extern const pcre_uint32    PRIV(vspace_list)[];
2636    
2637    
2638  /* Internal shared functions. These are functions that are used by more than  /* Internal shared functions. These are functions that are used by more than
# Line 2247  one of the exported public functions. Th Line 2640  one of the exported public functions. Th
2640  sense, but are not part of the PCRE public API. */  sense, but are not part of the PCRE public API. */
2641    
2642  /* String comparison functions. */  /* String comparison functions. */
2643  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
2644    
2645  #define STRCMP_UC_UC(str1, str2) \  #define STRCMP_UC_UC(str1, str2) \
2646    strcmp((char *)(str1), (char *)(str2))    strcmp((char *)(str1), (char *)(str2))
# Line 2259  sense, but are not part of the PCRE publ Line 2652  sense, but are not part of the PCRE publ
2652    strncmp((char *)(str1), (str2), (num))    strncmp((char *)(str1), (str2), (num))
2653  #define STRLEN_UC(str) strlen((const char *)str)  #define STRLEN_UC(str) strlen((const char *)str)
2654    
2655  #else  #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2656    
2657  extern int               PRIV(strcmp_uc_uc)(const pcre_uchar *,  extern int               PRIV(strcmp_uc_uc)(const pcre_uchar *,
2658                             const pcre_uchar *);                             const pcre_uchar *);
# Line 2281  extern unsigned int      PRIV(strlen_uc) Line 2674  extern unsigned int      PRIV(strlen_uc)
2674    PRIV(strncmp_uc_c8)((str1), (str2), (num))    PRIV(strncmp_uc_c8)((str1), (str2), (num))
2675  #define STRLEN_UC(str) PRIV(strlen_uc)(str)  #define STRLEN_UC(str) PRIV(strlen_uc)(str)
2676    
2677  #endif /* COMPILE_PCRE8 */  #endif /* COMPILE_PCRE[8|16|32] */
2678    
2679    #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
2680    
2681    #define STRCMP_UC_UC_TEST(str1, str2) STRCMP_UC_UC(str1, str2)
2682    #define STRCMP_UC_C8_TEST(str1, str2) STRCMP_UC_C8(str1, str2)
2683    
2684    #elif defined COMPILE_PCRE32
2685    
2686    extern int               PRIV(strcmp_uc_uc_utf)(const pcre_uchar *,
2687                               const pcre_uchar *);
2688    extern int               PRIV(strcmp_uc_c8_utf)(const pcre_uchar *,
2689                               const char *);
2690    
2691    #define STRCMP_UC_UC_TEST(str1, str2) \
2692      (utf ? PRIV(strcmp_uc_uc_utf)((str1), (str2)) : PRIV(strcmp_uc_uc)((str1), (str2)))
2693    #define STRCMP_UC_C8_TEST(str1, str2) \
2694      (utf ? PRIV(strcmp_uc_c8_utf)((str1), (str2)) : PRIV(strcmp_uc_c8)((str1), (str2)))
2695    
2696    #endif /* COMPILE_PCRE[8|16|32] */
2697    
2698  extern const pcre_uchar *PRIV(find_bracket)(const pcre_uchar *, BOOL, int);  extern const pcre_uchar *PRIV(find_bracket)(const pcre_uchar *, BOOL, int);
2699  extern BOOL              PRIV(is_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,  extern BOOL              PRIV(is_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,
2700                             int *, BOOL);                             int *, BOOL);
2701  extern int               PRIV(ord2utf)(pcre_uint32, pcre_uchar *);  extern unsigned int      PRIV(ord2utf)(pcre_uint32, pcre_uchar *);
2702  extern int               PRIV(valid_utf)(PCRE_PUCHAR, int, int *);  extern int               PRIV(valid_utf)(PCRE_PUCHAR, int, int *);
2703  extern BOOL              PRIV(was_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,  extern BOOL              PRIV(was_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,
2704                             int *, BOOL);                             int *, BOOL);
2705  extern BOOL              PRIV(xclass)(int, const pcre_uchar *, BOOL);  extern BOOL              PRIV(xclass)(pcre_uint32, const pcre_uchar *, BOOL);
2706    
2707  #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
2708  extern void              PRIV(jit_compile)(const REAL_PCRE *, PUBL(extra) *, int);  extern void              PRIV(jit_compile)(const REAL_PCRE *,
2709  extern int               PRIV(jit_exec)(const REAL_PCRE *, void *,                             PUBL(extra) *, int);
2710                             const pcre_uchar *, int, int, int, int, int *, int,  extern int               PRIV(jit_exec)(const PUBL(extra) *,
2711                             pcre_uchar **);                             const pcre_uchar *, int, int, int, int *, int);
2712  extern void              PRIV(jit_free)(void *);  extern void              PRIV(jit_free)(void *);
2713  extern int               PRIV(jit_get_size)(void *);  extern int               PRIV(jit_get_size)(void *);
2714  extern const char*       PRIV(jit_get_target)(void);  extern const char*       PRIV(jit_get_target)(void);
# Line 2305  extern const char*       PRIV(jit_get_ta Line 2717  extern const char*       PRIV(jit_get_ta
2717  /* Unicode character database (UCD) */  /* Unicode character database (UCD) */
2718    
2719  typedef struct {  typedef struct {
2720    pcre_uint8 script;    pcre_uint8 script;     /* ucp_Arabic, etc. */
2721    pcre_uint8 chartype;    pcre_uint8 chartype;   /* ucp_Cc, etc. (general categories) */
2722    pcre_int32 other_case;    pcre_uint8 gbprop;     /* ucp_gbControl, etc. (grapheme break property) */
2723      pcre_uint8 caseset;    /* offset to multichar other cases or zero */
2724      pcre_int32 other_case; /* offset to other case, or zero if none */
2725  } ucd_record;  } ucd_record;
2726    
2727    extern const pcre_uint32 PRIV(ucd_caseless_sets)[];
2728  extern const ucd_record  PRIV(ucd_records)[];  extern const ucd_record  PRIV(ucd_records)[];
2729  extern const pcre_uint8  PRIV(ucd_stage1)[];  extern const pcre_uint8  PRIV(ucd_stage1)[];
2730  extern const pcre_uint16 PRIV(ucd_stage2)[];  extern const pcre_uint16 PRIV(ucd_stage2)[];
2731  extern const int         PRIV(ucp_gentype)[];  extern const pcre_uint32 PRIV(ucp_gentype)[];
2732    extern const pcre_uint32 PRIV(ucp_gbtable)[];
2733  #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
2734  extern const int         PRIV(ucp_typerange)[];  extern const int         PRIV(ucp_typerange)[];
2735  #endif  #endif
# Line 2323  extern const int         PRIV(ucp_typera Line 2739  extern const int         PRIV(ucp_typera
2739    
2740  #define UCD_BLOCK_SIZE 128  #define UCD_BLOCK_SIZE 128
2741  #define GET_UCD(ch) (PRIV(ucd_records) + \  #define GET_UCD(ch) (PRIV(ucd_records) + \
2742          PRIV(ucd_stage2)[PRIV(ucd_stage1)[(ch) / UCD_BLOCK_SIZE] * \          PRIV(ucd_stage2)[PRIV(ucd_stage1)[(int)(ch) / UCD_BLOCK_SIZE] * \
2743          UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE])          UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE])
2744    
2745  #define UCD_CHARTYPE(ch)  GET_UCD(ch)->chartype  #define UCD_CHARTYPE(ch)    GET_UCD(ch)->chartype
2746  #define UCD_SCRIPT(ch)    GET_UCD(ch)->script  #define UCD_SCRIPT(ch)      GET_UCD(ch)->script
2747  #define UCD_CATEGORY(ch)  PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]  #define UCD_CATEGORY(ch)    PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
2748  #define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case)  #define UCD_GRAPHBREAK(ch)  GET_UCD(ch)->gbprop
2749    #define UCD_CASESET(ch)     GET_UCD(ch)->caseset
2750    #define UCD_OTHERCASE(ch)   ((pcre_uint32)((int)ch + (int)(GET_UCD(ch)->other_case)))
2751    
2752  #endif /* SUPPORT_UCP */  #endif /* SUPPORT_UCP */
2753    

Legend:
Removed from v.932  
changed lines
  Added in v.1187

  ViewVC Help
Powered by ViewVC 1.1.5