/[pcre]/code/trunk/pcre_internal.h
ViewVC logotype

Diff of /code/trunk/pcre_internal.h

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1028 by ph10, Thu Sep 6 16:55:38 2012 UTC revision 1160 by chpe, Mon Oct 22 22:05:03 2012 UTC
# Line 40  POSSIBILITY OF SUCH DAMAGE. Line 40  POSSIBILITY OF SUCH DAMAGE.
40    
41  /* This header contains definitions that are shared between the different  /* This header contains definitions that are shared between the different
42  modules, but which are not relevant to the exported API. This includes some  modules, but which are not relevant to the exported API. This includes some
43  functions whose names all begin with "_pcre_" or "_pcre16_" depending on  functions whose names all begin with "_pcre_", "_pcre16_" or "_pcre32_"
44  the PRIV macro. */  depending on the PRIV macro. */
45    
46  #ifndef PCRE_INTERNAL_H  #ifndef PCRE_INTERNAL_H
47  #define PCRE_INTERNAL_H  #define PCRE_INTERNAL_H
# Line 53  the PRIV macro. */ Line 53  the PRIV macro. */
53  #endif  #endif
54    
55  /* PCRE is compiled as an 8 bit library if it is not requested otherwise. */  /* PCRE is compiled as an 8 bit library if it is not requested otherwise. */
56  #ifndef COMPILE_PCRE16  
57    #if !defined COMPILE_PCRE16 && !defined COMPILE_PCRE32
58  #define COMPILE_PCRE8  #define COMPILE_PCRE8
59  #endif  #endif
60    
# Line 78  Until then we define it if SUPPORT_UTF i Line 79  Until then we define it if SUPPORT_UTF i
79  #define SUPPORT_UTF8 1  #define SUPPORT_UTF8 1
80  #endif  #endif
81    
82  /* We do not support both EBCDIC and UTF-8/16 at the same time. The "configure"  /* We do not support both EBCDIC and UTF-8/16/32 at the same time. The "configure"
83  script prevents both being selected, but not everybody uses "configure". */  script prevents both being selected, but not everybody uses "configure". */
84    
85  #if defined EBCDIC && defined SUPPORT_UTF  #if defined EBCDIC && defined SUPPORT_UTF
86  #error The use of both EBCDIC and SUPPORT_UTF8/16 is not supported.  #error The use of both EBCDIC and SUPPORT_UTF is not supported.
87  #endif  #endif
88    
89  /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef  /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
# Line 111  setjmp and stdarg are used is when NO_RE Line 112  setjmp and stdarg are used is when NO_RE
112  #include <stdlib.h>  #include <stdlib.h>
113  #include <string.h>  #include <string.h>
114    
115    /* Valgrind (memcheck) support */
116    
117    #ifdef SUPPORT_VALGRIND
118    #include <valgrind/memcheck.h>
119    #endif
120    
121  /* When compiling a DLL for Windows, the exported symbols have to be declared  /* When compiling a DLL for Windows, the exported symbols have to be declared
122  using some MS magic. I found some useful information on this web page:  using some MS magic. I found some useful information on this web page:
123  http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the  http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the
# Line 214  stdint.h is available, include it; it ma Line 221  stdint.h is available, include it; it ma
221  have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set  have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set
222  by "configure". */  by "configure". */
223    
224  #if HAVE_STDINT_H  #if defined HAVE_STDINT_H
225  #include <stdint.h>  #include <stdint.h>
226  #elif HAVE_INTTYPES_H  #elif defined HAVE_INTTYPES_H
227  #include <inttypes.h>  #include <inttypes.h>
228  #endif  #endif
229    
# Line 243  exactly 256 items. When the character is Line 250  exactly 256 items. When the character is
250  items, some check is needed before accessing these tables.  items, some check is needed before accessing these tables.
251  */  */
252    
253  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
254    
255  typedef unsigned char pcre_uchar;  typedef unsigned char pcre_uchar;
256  #define IN_UCHARS(x) (x)  #define IN_UCHARS(x) (x)
257  #define MAX_255(c) 1  #define MAX_255(c) 1
258  #define TABLE_GET(c, table, default) ((table)[c])  #define TABLE_GET(c, table, default) ((table)[c])
259    
260  #else  #elif defined COMPILE_PCRE16
261    
 #ifdef COMPILE_PCRE16  
262  #if USHRT_MAX != 65535  #if USHRT_MAX != 65535
263  /* This is a warning message. Change PCRE_UCHAR16 to a 16 bit data type in  /* This is a warning message. Change PCRE_UCHAR16 to a 16 bit data type in
264  pcre.h(.in) and disable (comment out) this message. */  pcre.h(.in) and disable (comment out) this message. */
# Line 260  pcre.h(.in) and disable (comment out) th Line 266  pcre.h(.in) and disable (comment out) th
266  #endif  #endif
267    
268  typedef pcre_uint16 pcre_uchar;  typedef pcre_uint16 pcre_uchar;
269  #define IN_UCHARS(x) ((x) << 1)  #define UCHAR_SHIFT (1)
270    #define IN_UCHARS(x) ((x) << UCHAR_SHIFT)
271    #define MAX_255(c) ((c) <= 255u)
272    #define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
273    
274    #elif defined COMPILE_PCRE32
275    
276    typedef pcre_uint32 pcre_uchar;
277    #define UCHAR_SHIFT (2)
278    #define IN_UCHARS(x) ((x) << UCHAR_SHIFT)
279  #define MAX_255(c) ((c) <= 255u)  #define MAX_255(c) ((c) <= 255u)
280  #define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))  #define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
281    
282    /* Assert that pcre_uchar32 is a 32-bit type */
283    typedef int __assert_pcre_uchar32_size[sizeof(pcre_uchar) == 4 ? 1 : -1];
284    
285  #else  #else
286  #error Unsupported compiling mode  #error Unsupported compiling mode
287  #endif /* COMPILE_PCRE16 */  #endif /* COMPILE_PCRE[8|16|32] */
   
 #endif /* COMPILE_PCRE8 */  
288    
289  /* This is an unsigned int value that no character can ever have. UTF-8  /* This is an unsigned int value that no character can ever have. UTF-8
290  characters only go up to 0x7fffffff (though Unicode doesn't go beyond  characters only go up to 0x7fffffff (though Unicode doesn't go beyond
# Line 295  start/end of string field names are. */ Line 311  start/end of string field names are. */
311         &(NLBLOCK->nllen), utf)) \         &(NLBLOCK->nllen), utf)) \
312      : \      : \
313      ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \      ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
314       (p)[0] == NLBLOCK->nl[0] && \       RAWUCHARTEST(p) == NLBLOCK->nl[0] && \
315       (NLBLOCK->nllen == 1 || (p)[1] == NLBLOCK->nl[1]) \       (NLBLOCK->nllen == 1 || RAWUCHARTEST(p+1) == NLBLOCK->nl[1])       \
316      ) \      ) \
317    )    )
318    
# Line 309  start/end of string field names are. */ Line 325  start/end of string field names are. */
325         &(NLBLOCK->nllen), utf)) \         &(NLBLOCK->nllen), utf)) \
326      : \      : \
327      ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \      ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
328       (p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \       RAWUCHARTEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] &&              \
329       (NLBLOCK->nllen == 1 || (p)[-NLBLOCK->nllen+1] == NLBLOCK->nl[1]) \       (NLBLOCK->nllen == 1 || RAWUCHARTEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \
330      ) \      ) \
331    )    )
332    
# Line 396  The macros are controlled by the value o Line 412  The macros are controlled by the value o
412  the config.h file, but can be overridden by using -D on the command line. This  the config.h file, but can be overridden by using -D on the command line. This
413  is automated on Unix systems via the "configure" command. */  is automated on Unix systems via the "configure" command. */
414    
415  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
416    
417  #if LINK_SIZE == 2  #if LINK_SIZE == 2
418    
# Line 441  is automated on Unix systems via the "co Line 457  is automated on Unix systems via the "co
457  #error LINK_SIZE must be either 2, 3, or 4  #error LINK_SIZE must be either 2, 3, or 4
458  #endif  #endif
459    
460  #else /* COMPILE_PCRE8 */  #elif defined COMPILE_PCRE16
   
 #ifdef COMPILE_PCRE16  
461    
462  #if LINK_SIZE == 2  #if LINK_SIZE == 2
463    
464    /* Redefine LINK_SIZE as a multiple of sizeof(pcre_uchar) */
465  #undef LINK_SIZE  #undef LINK_SIZE
466  #define LINK_SIZE 1  #define LINK_SIZE 1
467    
# Line 460  is automated on Unix systems via the "co Line 475  is automated on Unix systems via the "co
475    
476  #elif LINK_SIZE == 3 || LINK_SIZE == 4  #elif LINK_SIZE == 3 || LINK_SIZE == 4
477    
478    /* Redefine LINK_SIZE as a multiple of sizeof(pcre_uchar) */
479  #undef LINK_SIZE  #undef LINK_SIZE
480  #define LINK_SIZE 2  #define LINK_SIZE 2
481    
# Line 477  is automated on Unix systems via the "co Line 493  is automated on Unix systems via the "co
493  #error LINK_SIZE must be either 2, 3, or 4  #error LINK_SIZE must be either 2, 3, or 4
494  #endif  #endif
495    
496    #elif defined COMPILE_PCRE32
497    
498    /* Only supported LINK_SIZE is 4 */
499    /* Redefine LINK_SIZE as a multiple of sizeof(pcre_uchar) */
500    #undef LINK_SIZE
501    #define LINK_SIZE 1
502    
503    #define PUT(a,n,d)   \
504      (a[n] = (d))
505    
506    #define GET(a,n) \
507      (a[n])
508    
509    /* Keep it positive */
510    #define MAX_PATTERN_SIZE (1 << 30)
511    
512  #else  #else
513  #error Unsupported compiling mode  #error Unsupported compiling mode
514  #endif /* COMPILE_PCRE16 */  #endif /* COMPILE_PCRE[8|16|32] */
   
 #endif /* COMPILE_PCRE8 */  
515    
516  /* Convenience macro defined in terms of the others */  /* Convenience macro defined in terms of the others */
517    
# Line 492  is automated on Unix systems via the "co Line 522  is automated on Unix systems via the "co
522  offsets changes. There are used for repeat counts and for other things such as  offsets changes. There are used for repeat counts and for other things such as
523  capturing parenthesis numbers in back references. */  capturing parenthesis numbers in back references. */
524    
525  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
526    
527  #define IMM2_SIZE 2  #define IMM2_SIZE 2
528    
529  #define PUT2(a,n,d)   \  #define PUT2(a,n,d)   \
530    a[n] = (d) >> 8; \    a[n] = (d) >> 8; \
531    a[(n)+1] = (d) & 255    a[(n)+1] = (d) & 255
532    
533    /* For reasons that I do not understand, the expression in this GET2 macro is
534    treated by gcc as a signed expression, even when a is declared as unsigned. It
535    seems that any kind of arithmetic results in a signed value. */
536    
537  #define GET2(a,n) \  #define GET2(a,n) \
538    (((a)[n] << 8) | (a)[(n)+1])    (unsigned int)(((a)[n] << 8) | (a)[(n)+1])
539    
540  #else /* COMPILE_PCRE8 */  #elif defined COMPILE_PCRE16
541    
542  #ifdef COMPILE_PCRE16  #define IMM2_SIZE 1
543    
544    #define PUT2(a,n,d)   \
545       a[n] = d
546    
547    #define GET2(a,n) \
548       a[n]
549    
550    #elif defined COMPILE_PCRE32
551    
552  #define IMM2_SIZE 1  #define IMM2_SIZE 1
553    
# Line 517  capturing parenthesis numbers in back re Line 559  capturing parenthesis numbers in back re
559    
560  #else  #else
561  #error Unsupported compiling mode  #error Unsupported compiling mode
562  #endif /* COMPILE_PCRE16 */  #endif /* COMPILE_PCRE[8|16|32] */
   
 #endif /* COMPILE_PCRE8 */  
563    
564  #define PUT2INC(a,n,d)  PUT2(a,n,d), a += IMM2_SIZE  #define PUT2INC(a,n,d)  PUT2(a,n,d), a += IMM2_SIZE
565    
566  /* The maximum length of a MARK name is currently one data unit; it may be  /* The maximum length of a MARK name is currently one data unit; it may be
567  changed in future to be a fixed number of bytes or to depend on LINK_SIZE. */  changed in future to be a fixed number of bytes or to depend on LINK_SIZE. */
568    
569  #define MAX_MARK ((1 << (sizeof(pcre_uchar)*8)) - 1)  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
570    #define MAX_MARK ((1u << 16) - 1)
571    #else
572    #define MAX_MARK ((1u << 8) - 1)
573    #endif
574    
575  /* When UTF encoding is being used, a character is no longer just a single  /* When UTF encoding is being used, a character is no longer just a single
576  character. The macros for character handling generate simple sequences when  byte. The macros for character handling generate simple sequences when used in
577  used in character-mode, and more complicated ones for UTF characters.  character-mode, and more complicated ones for UTF characters. GETCHARLENTEST
578  GETCHARLENTEST and other macros are not used when UTF is not supported,  and other macros are not used when UTF is not supported, so they are not
579  so they are not defined. To make sure they can never even appear when  defined. To make sure they can never even appear when UTF support is omitted,
580  UTF support is omitted, we don't even define them. */  we don't even define them. */
581    
582  #ifndef SUPPORT_UTF  #ifndef SUPPORT_UTF
583    
# Line 546  UTF support is omitted, we don't even de Line 590  UTF support is omitted, we don't even de
590  #define GETCHARINC(c, eptr) c = *eptr++;  #define GETCHARINC(c, eptr) c = *eptr++;
591  #define GETCHARINCTEST(c, eptr) c = *eptr++;  #define GETCHARINCTEST(c, eptr) c = *eptr++;
592  #define GETCHARLEN(c, eptr, len) c = *eptr;  #define GETCHARLEN(c, eptr, len) c = *eptr;
593    #define RAWUCHAR(eptr) (*(eptr))
594    #define RAWUCHARINC(eptr) (*(eptr)++)
595    #define RAWUCHARTEST(eptr) (*(eptr))
596    #define RAWUCHARINCTEST(eptr) (*(eptr)++)
597  /* #define GETCHARLENTEST(c, eptr, len) */  /* #define GETCHARLENTEST(c, eptr, len) */
598  /* #define BACKCHAR(eptr) */  /* #define BACKCHAR(eptr) */
599  /* #define FORWARDCHAR(eptr) */  /* #define FORWARDCHAR(eptr) */
# Line 553  UTF support is omitted, we don't even de Line 601  UTF support is omitted, we don't even de
601    
602  #else   /* SUPPORT_UTF */  #else   /* SUPPORT_UTF */
603    
 #ifdef COMPILE_PCRE8  
   
 /* These macros were originally written in the form of loops that used data  
 from the tables whose names start with PRIV(utf8_table). They were rewritten by  
 a user so as not to use loops, because in some environments this gives a  
 significant performance advantage, and it seems never to do any harm. */  
   
 /* Tells the biggest code point which can be encoded as a single character. */  
   
 #define MAX_VALUE_FOR_SINGLE_CHAR 127  
   
604  /* Tests whether the code point needs extra characters to decode. */  /* Tests whether the code point needs extra characters to decode. */
605    
606  #define HAS_EXTRALEN(c) ((c) >= 0xc0)  #define HASUTF8EXTRALEN(c) ((c) >= 0xc0)
   
 /* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.  
 Otherwise it has an undefined behaviour. */  
   
 #define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])  
   
 /* Returns TRUE, if the given character is not the first character  
 of a UTF sequence. */  
   
 #define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80)  
607    
608  /* Base macro to pick up the remaining bytes of a UTF-8 character, not  /* Base macro to pick up the remaining bytes of a UTF-8 character, not
609  advancing the pointer. */  advancing the pointer. */
# Line 600  advancing the pointer. */ Line 627  advancing the pointer. */
627            ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \            ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \
628      }      }
629    
 /* Get the next UTF-8 character, not advancing the pointer. This is called when  
 we know we are in UTF-8 mode. */  
   
 #define GETCHAR(c, eptr) \  
   c = *eptr; \  
   if (c >= 0xc0) GETUTF8(c, eptr);  
   
 /* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the  
 pointer. */  
   
 #define GETCHARTEST(c, eptr) \  
   c = *eptr; \  
   if (utf && c >= 0xc0) GETUTF8(c, eptr);  
   
630  /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing  /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing
631  the pointer. */  the pointer. */
632    
# Line 648  the pointer. */ Line 661  the pointer. */
661        } \        } \
662      }      }
663    
664    #if defined COMPILE_PCRE8
665    
666    /* These macros were originally written in the form of loops that used data
667    from the tables whose names start with PRIV(utf8_table). They were rewritten by
668    a user so as not to use loops, because in some environments this gives a
669    significant performance advantage, and it seems never to do any harm. */
670    
671    /* Tells the biggest code point which can be encoded as a single character. */
672    
673    #define MAX_VALUE_FOR_SINGLE_CHAR 127
674    
675    /* Tests whether the code point needs extra characters to decode. */
676    
677    #define HAS_EXTRALEN(c) ((c) >= 0xc0)
678    
679    /* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
680    Otherwise it has an undefined behaviour. */
681    
682    #define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])
683    
684    /* Returns TRUE, if the given character is not the first character
685    of a UTF sequence. */
686    
687    #define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80)
688    
689    /* Get the next UTF-8 character, not advancing the pointer. This is called when
690    we know we are in UTF-8 mode. */
691    
692    #define GETCHAR(c, eptr) \
693      c = *eptr; \
694      if (c >= 0xc0) GETUTF8(c, eptr);
695    
696    /* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
697    pointer. */
698    
699    #define GETCHARTEST(c, eptr) \
700      c = *eptr; \
701      if (utf && c >= 0xc0) GETUTF8(c, eptr);
702    
703  /* Get the next UTF-8 character, advancing the pointer. This is called when we  /* Get the next UTF-8 character, advancing the pointer. This is called when we
704  know we are in UTF-8 mode. */  know we are in UTF-8 mode. */
705    
# Line 714  do not know if we are in UTF-8 mode. */ Line 766  do not know if we are in UTF-8 mode. */
766    c = *eptr; \    c = *eptr; \
767    if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len);    if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len);
768    
769    /* Returns the next uchar, not advancing the pointer. This is called when
770    we know we are in UTF mode. */
771    
772    #define RAWUCHAR(eptr) \
773      (*(eptr))
774    
775    /* Returns the next uchar, advancing the pointer. This is called when
776    we know we are in UTF mode. */
777    
778    #define RAWUCHARINC(eptr) \
779      (*(eptr)++)
780    
781    /* Returns the next uchar, testing for UTF mode, and not advancing the
782    pointer. */
783    
784    #define RAWUCHARTEST(eptr) \
785      (*(eptr))
786    
787    /* Returns the next uchar, testing for UTF mode, advancing the
788    pointer. */
789    
790    #define RAWUCHARINCTEST(eptr) \
791      (*(eptr)++)
792    
793  /* If the pointer is not at the start of a character, move it back until  /* If the pointer is not at the start of a character, move it back until
794  it is. This is called only in UTF-8 mode - we don't put a test within the macro  it is. This is called only in UTF-8 mode - we don't put a test within the macro
795  because almost all calls are already within a block of UTF-8 only code. */  because almost all calls are already within a block of UTF-8 only code. */
# Line 727  because almost all calls are already wit Line 803  because almost all calls are already wit
803  #define ACROSSCHAR(condition, eptr, action) \  #define ACROSSCHAR(condition, eptr, action) \
804    while((condition) && ((eptr) & 0xc0) == 0x80) action    while((condition) && ((eptr) & 0xc0) == 0x80) action
805    
806  #else /* COMPILE_PCRE8 */  #elif defined COMPILE_PCRE16
   
 #ifdef COMPILE_PCRE16  
807    
808  /* Tells the biggest code point which can be encoded as a single character. */  /* Tells the biggest code point which can be encoded as a single character. */
809    
# Line 811  we do not know if we are in UTF-16 mode. Line 885  we do not know if we are in UTF-16 mode.
885    c = *eptr; \    c = *eptr; \
886    if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);    if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);
887    
888    /* Returns the next uchar, not advancing the pointer. This is called when
889    we know we are in UTF mode. */
890    
891    #define RAWUCHAR(eptr) \
892      (*(eptr))
893    
894    /* Returns the next uchar, advancing the pointer. This is called when
895    we know we are in UTF mode. */
896    
897    #define RAWUCHARINC(eptr) \
898      (*(eptr)++)
899    
900    /* Returns the next uchar, testing for UTF mode, and not advancing the
901    pointer. */
902    
903    #define RAWUCHARTEST(eptr) \
904      (*(eptr))
905    
906    /* Returns the next uchar, testing for UTF mode, advancing the
907    pointer. */
908    
909    #define RAWUCHARINCTEST(eptr) \
910      (*(eptr)++)
911    
912  /* If the pointer is not at the start of a character, move it back until  /* If the pointer is not at the start of a character, move it back until
913  it is. This is called only in UTF-16 mode - we don't put a test within the  it is. This is called only in UTF-16 mode - we don't put a test within the
914  macro because almost all calls are already within a block of UTF-16 only  macro because almost all calls are already within a block of UTF-16 only
# Line 825  code. */ Line 923  code. */
923  #define ACROSSCHAR(condition, eptr, action) \  #define ACROSSCHAR(condition, eptr, action) \
924    if ((condition) && ((eptr) & 0xfc00) == 0xdc00) action    if ((condition) && ((eptr) & 0xfc00) == 0xdc00) action
925    
926  #endif  #elif defined COMPILE_PCRE32
927    
928  #endif /* COMPILE_PCRE8 */  /* These are trivial for the 32-bit library, since all UTF-32 characters fit
929    into one pcre_uchar unit. */
930    #define MAX_VALUE_FOR_SINGLE_CHAR (0x10ffffu)
931    #define HAS_EXTRALEN(c) (0)
932    #define GET_EXTRALEN(c) (0)
933    #define NOT_FIRSTCHAR(c) (0)
934    
935    #define UTF32_MASK (0x1fffffu)
936    
937    /* Get the next UTF-32 character, not advancing the pointer. This is called when
938    we know we are in UTF-32 mode. */
939    
940    #define GETCHAR(c, eptr) \
941      c = (*eptr) & UTF32_MASK;
942    
943    /* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the
944    pointer. */
945    
946    #define GETCHARTEST(c, eptr) \
947      c = *eptr; \
948      if (utf) c &= UTF32_MASK;
949    
950    /* Get the next UTF-32 character, advancing the pointer. This is called when we
951    know we are in UTF-32 mode. */
952    
953    #define GETCHARINC(c, eptr) \
954      c = (*eptr++) & UTF32_MASK;
955    
956    /* Get the next character, testing for UTF-32 mode, and advancing the pointer.
957    This is called when we don't know if we are in UTF-32 mode. */
958    
959    #define GETCHARINCTEST(c, eptr) \
960      c = *eptr++; \
961      if (utf) c &= UTF32_MASK;
962    
963    /* Get the next UTF-32 character, not advancing the pointer, not incrementing
964    length (since all UTF-32 is of length 1). This is called when we know we are in
965    UTF-32 mode. */
966    
967    #define GETCHARLEN(c, eptr, len) \
968      GETCHAR(c, eptr)
969    
970    /* Get the next UTF-32character, testing for UTF-32 mode, not advancing the
971    pointer, not incrementing the length (since all UTF-32 is of length 1).
972    This is called when we do not know if we are in UTF-32 mode. */
973    
974    #define GETCHARLENTEST(c, eptr, len) \
975      GETCHARTEST(c, eptr)
976    
977    /* Returns the next uchar, not advancing the pointer. This is called when
978    we know we are in UTF mode. */
979    
980    #define RAWUCHAR(eptr) \
981      (*(eptr) & UTF32_MASK)
982    
983    /* Returns the next uchar, advancing the pointer. This is called when
984    we know we are in UTF mode. */
985    
986    #define RAWUCHARINC(eptr) \
987      (*(eptr)++ & UTF32_MASK)
988    
989    /* Returns the next uchar, testing for UTF mode, and not advancing the
990    pointer. */
991    
992    #define RAWUCHARTEST(eptr) \
993      (utf ? (*(eptr) & UTF32_MASK) : *(eptr))
994    
995    /* Returns the next uchar, testing for UTF mode, advancing the
996    pointer. */
997    
998    #define RAWUCHARINCTEST(eptr) \
999      (utf ? (*(eptr)++ & UTF32_MASK) : *(eptr)++)
1000    
1001    /* If the pointer is not at the start of a character, move it back until
1002    it is. This is called only in UTF-32 mode - we don't put a test within the
1003    macro because almost all calls are already within a block of UTF-32 only
1004    code.
1005    These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */
1006    
1007    #define BACKCHAR(eptr) do { } while (0)
1008    
1009    /* Same as above, just in the other direction. */
1010    #define FORWARDCHAR(eptr) do { } while (0)
1011    
1012    /* Same as above, but it allows a fully customizable form. */
1013    #define ACROSSCHAR(condition, eptr, action) do { } while (0)
1014    
1015    #else
1016    #error Unsupported compiling mode
1017    #endif /* COMPILE_PCRE[8|16|32] */
1018    
1019  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
1020    
1021    /* Tests for Unicode horizontal and vertical whitespace characters must check a
1022    number of different values. Using a switch statement for this generates the
1023    fastest code (no loop, no memory access), and there are several places in the
1024    interpreter code where this happens. In order to ensure that all the case lists
1025    remain in step, we use macros so that there is only one place where the lists
1026    are defined.
1027    
1028    These values are also required as lists in pcre_compile.c when processing \h,
1029    \H, \v and \V in a character class. The lists are defined in pcre_tables.c, but
1030    macros that define the values are here so that all the definitions are
1031    together. The lists must be in ascending character order, terminated by
1032    NOTACHAR (which is 0xffffffff).
1033    
1034    Any changes should ensure that the various macros are kept in step with each
1035    other. NOTE: The values also appear in pcre_jit_compile.c. */
1036    
1037    /* ------ ASCII/Unicode environments ------ */
1038    
1039    #ifndef EBCDIC
1040    
1041    #define HSPACE_LIST \
1042      CHAR_HT, CHAR_SPACE, 0xa0, \
1043      0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \
1044      0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202f, 0x205f, 0x3000, \
1045      NOTACHAR
1046    
1047    #define HSPACE_MULTIBYTE_CASES \
1048      case 0x1680:  /* OGHAM SPACE MARK */ \
1049      case 0x180e:  /* MONGOLIAN VOWEL SEPARATOR */ \
1050      case 0x2000:  /* EN QUAD */ \
1051      case 0x2001:  /* EM QUAD */ \
1052      case 0x2002:  /* EN SPACE */ \
1053      case 0x2003:  /* EM SPACE */ \
1054      case 0x2004:  /* THREE-PER-EM SPACE */ \
1055      case 0x2005:  /* FOUR-PER-EM SPACE */ \
1056      case 0x2006:  /* SIX-PER-EM SPACE */ \
1057      case 0x2007:  /* FIGURE SPACE */ \
1058      case 0x2008:  /* PUNCTUATION SPACE */ \
1059      case 0x2009:  /* THIN SPACE */ \
1060      case 0x200A:  /* HAIR SPACE */ \
1061      case 0x202f:  /* NARROW NO-BREAK SPACE */ \
1062      case 0x205f:  /* MEDIUM MATHEMATICAL SPACE */ \
1063      case 0x3000   /* IDEOGRAPHIC SPACE */
1064    
1065    #define HSPACE_BYTE_CASES \
1066      case CHAR_HT: \
1067      case CHAR_SPACE: \
1068      case 0xa0     /* NBSP */
1069    
1070    #define HSPACE_CASES \
1071      HSPACE_BYTE_CASES: \
1072      HSPACE_MULTIBYTE_CASES
1073    
1074    #define VSPACE_LIST \
1075      CHAR_LF, CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, 0x2028, 0x2029, NOTACHAR
1076    
1077    #define VSPACE_MULTIBYTE_CASES \
1078      case 0x2028:    /* LINE SEPARATOR */ \
1079      case 0x2029     /* PARAGRAPH SEPARATOR */
1080    
1081    #define VSPACE_BYTE_CASES \
1082      case CHAR_LF: \
1083      case CHAR_VT: \
1084      case CHAR_FF: \
1085      case CHAR_CR: \
1086      case CHAR_NEL
1087    
1088    #define VSPACE_CASES \
1089      VSPACE_BYTE_CASES: \
1090      VSPACE_MULTIBYTE_CASES
1091    
1092  /* In case there is no definition of offsetof() provided - though any proper  /* ------ EBCDIC environments ------ */
1093  Standard C system should have one. */  
1094    #else
1095    #define HSPACE_LIST CHAR_HT, CHAR_SPACE
1096    
1097    #define HSPACE_BYTE_CASES \
1098      case CHAR_HT: \
1099      case CHAR_SPACE
1100    
1101    #define HSPACE_CASES HSPACE_BYTE_CASES
1102    
1103    #ifdef EBCDIC_NL25
1104    #define VSPACE_LIST \
1105      CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, CHAR_LF, NOTACHAR
1106    #else
1107    #define VSPACE_LIST \
1108      CHAR_VT, CHAR_FF, CHAR_CR, CHAR_LF, CHAR_NEL, NOTACHAR
1109    #endif
1110    
1111    #define VSPACE_BYTE_CASES \
1112      case CHAR_LF: \
1113      case CHAR_VT: \
1114      case CHAR_FF: \
1115      case CHAR_CR: \
1116      case CHAR_NEL
1117    
1118    #define VSPACE_CASES VSPACE_BYTE_CASES
1119    #endif  /* EBCDIC */
1120    
1121    /* ------ End of whitespace macros ------ */
1122    
 #ifndef offsetof  
 #define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field))  
 #endif  
1123    
1124    
1125  /* Private flags containing information about the compiled regex. They used to  /* Private flags containing information about the compiled regex. They used to
# Line 846  are in a 16-bit flags word. From release Line 1128  are in a 16-bit flags word. From release
1128  the restrictions on partial matching have been lifted. It remains for backwards  the restrictions on partial matching have been lifted. It remains for backwards
1129  compatibility. */  compatibility. */
1130    
1131  #ifdef COMPILE_PCRE8  #define PCRE_MODE8         0x0001  /* compiled in 8 bit mode */
1132  #define PCRE_MODE          0x0001  /* compiled in 8 bit mode */  #define PCRE_MODE16        0x0002  /* compiled in 16 bit mode */
1133  #endif  #define PCRE_MODE32        0x0004  /* compiled in 32 bit mode */
 #ifdef COMPILE_PCRE16  
 #define PCRE_MODE          0x0002  /* compiled in 16 bit mode */  
 #endif  
1134  #define PCRE_FIRSTSET      0x0010  /* first_char is set */  #define PCRE_FIRSTSET      0x0010  /* first_char is set */
1135  #define PCRE_FCH_CASELESS  0x0020  /* caseless first char */  #define PCRE_FCH_CASELESS  0x0020  /* caseless first char */
1136  #define PCRE_REQCHSET      0x0040  /* req_byte is set */  #define PCRE_REQCHSET      0x0040  /* req_byte is set */
# Line 862  compatibility. */ Line 1141  compatibility. */
1141  #define PCRE_HASCRORLF     0x0800  /* explicit \r or \n in pattern */  #define PCRE_HASCRORLF     0x0800  /* explicit \r or \n in pattern */
1142  #define PCRE_HASTHEN       0x1000  /* pattern contains (*THEN) */  #define PCRE_HASTHEN       0x1000  /* pattern contains (*THEN) */
1143    
1144    #if defined COMPILE_PCRE8
1145    #define PCRE_MODE          PCRE_MODE8
1146    #elif defined COMPILE_PCRE16
1147    #define PCRE_MODE          PCRE_MODE16
1148    #elif defined COMPILE_PCRE32
1149    #define PCRE_MODE          PCRE_MODE32
1150    #endif
1151    #define PCRE_MODE_MASK     (PCRE_MODE8 | PCRE_MODE16 | PCRE_MODE32)
1152    
1153  /* Flags for the "extra" block produced by pcre_study(). */  /* Flags for the "extra" block produced by pcre_study(). */
1154    
1155  #define PCRE_STUDY_MAPPED  0x0001  /* a map of starting chars exists */  #define PCRE_STUDY_MAPPED  0x0001  /* a map of starting chars exists */
# Line 904  in different endianness. */ Line 1192  in different endianness. */
1192    
1193  #define REVERSED_MAGIC_NUMBER  0x45524350UL   /* 'ERCP' */  #define REVERSED_MAGIC_NUMBER  0x45524350UL   /* 'ERCP' */
1194    
 /* Negative values for the firstchar and reqchar variables */  
   
 #define REQ_UNSET (-2)  
 #define REQ_NONE  (-1)  
   
1195  /* The maximum remaining length of subject we are prepared to search for a  /* The maximum remaining length of subject we are prepared to search for a
1196  req_byte match. */  req_byte match. */
1197    
# Line 946  macros to give the functions distinct na Line 1229  macros to give the functions distinct na
1229    
1230  /* UTF-8 support is not enabled; use the platform-dependent character literals  /* UTF-8 support is not enabled; use the platform-dependent character literals
1231  so that PCRE works in both ASCII and EBCDIC environments, but only in non-UTF  so that PCRE works in both ASCII and EBCDIC environments, but only in non-UTF
1232  mode. Newline characters are problematic in EBCDIC. Though it has CR and LF  mode. Newline characters are problematic in EBCDIC. Though it has CR and LF
1233  characters, a common practice has been to use its NL (0x15) character as the  characters, a common practice has been to use its NL (0x15) character as the
1234  line terminator in C-like processing environments. However, sometimes the LF  line terminator in C-like processing environments. However, sometimes the LF
1235  (0x25) character is used instead, according to this Unicode document:  (0x25) character is used instead, according to this Unicode document:
1236    
1237  http://unicode.org/standard/reports/tr13/tr13-5.html  http://unicode.org/standard/reports/tr13/tr13-5.html
1238    
1239  PCRE defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25  PCRE defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25
1240  instead. Whichever is *not* chosen is defined as NEL.  instead. Whichever is *not* chosen is defined as NEL.
1241    
1242  In both ASCII and EBCDIC environments, CHAR_NL and CHAR_LF are synonyms for the  In both ASCII and EBCDIC environments, CHAR_NL and CHAR_LF are synonyms for the
1243  same code point. */  same code point. */
# Line 983  same code point. */ Line 1266  same code point. */
1266    
1267  #else  /* Not EBCDIC */  #else  /* Not EBCDIC */
1268    
1269  /* In ASCII/Unicode, linefeed is '\n' and we equate this to NL for  /* In ASCII/Unicode, linefeed is '\n' and we equate this to NL for
1270  compatibility. NEL is the Unicode newline character. */  compatibility. NEL is the Unicode newline character; make sure it is
1271    a positive value. */
1272    
1273  #define CHAR_LF                     '\n'  #define CHAR_LF                     '\n'
1274  #define CHAR_NL                     CHAR_LF  #define CHAR_NL                     CHAR_LF
1275  #define CHAR_NEL                    '\x85'  #define CHAR_NEL                    ((unsigned char)'\x85')
1276  #define CHAR_ESC                    '\033'  #define CHAR_ESC                    '\033'
1277  #define CHAR_DEL                    '\177'  #define CHAR_DEL                    '\177'
1278    
# Line 1247  compatibility. NEL is the Unicode newlin Line 1531  compatibility. NEL is the Unicode newlin
1531  #ifdef COMPILE_PCRE16  #ifdef COMPILE_PCRE16
1532  #define STRING_UTF_RIGHTPAR            "UTF16)"  #define STRING_UTF_RIGHTPAR            "UTF16)"
1533  #endif  #endif
1534    #ifdef COMPILE_PCRE32
1535    #define STRING_UTF_RIGHTPAR            "UTF32)"
1536    #endif
1537  #define STRING_UCP_RIGHTPAR            "UCP)"  #define STRING_UCP_RIGHTPAR            "UCP)"
1538  #define STRING_NO_START_OPT_RIGHTPAR   "NO_START_OPT)"  #define STRING_NO_START_OPT_RIGHTPAR   "NO_START_OPT)"
1539    
# Line 1262  only. */ Line 1549  only. */
1549  #define CHAR_CR                     '\015'  #define CHAR_CR                     '\015'
1550  #define CHAR_LF                     '\012'  #define CHAR_LF                     '\012'
1551  #define CHAR_NL                     CHAR_LF  #define CHAR_NL                     CHAR_LF
1552  #define CHAR_NEL                    '\x85'  #define CHAR_NEL                    ((unsigned char)'\x85')
1553  #define CHAR_BS                     '\010'  #define CHAR_BS                     '\010'
1554  #define CHAR_BEL                    '\007'  #define CHAR_BEL                    '\007'
1555  #define CHAR_ESC                    '\033'  #define CHAR_ESC                    '\033'
# Line 1509  only. */ Line 1796  only. */
1796  #ifdef COMPILE_PCRE16  #ifdef COMPILE_PCRE16
1797  #define STRING_UTF_RIGHTPAR            STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS  #define STRING_UTF_RIGHTPAR            STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
1798  #endif  #endif
1799    #ifdef COMPILE_PCRE32
1800    #define STRING_UTF_RIGHTPAR            STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
1801    #endif
1802  #define STRING_UCP_RIGHTPAR            STR_U STR_C STR_P STR_RIGHT_PARENTHESIS  #define STRING_UCP_RIGHTPAR            STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
1803  #define STRING_NO_START_OPT_RIGHTPAR   STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS  #define STRING_NO_START_OPT_RIGHTPAR   STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
1804    
# Line 1550  only. */ Line 1840  only. */
1840  #define PT_SPACE      6    /* Perl space - Z plus 9,10,12,13 */  #define PT_SPACE      6    /* Perl space - Z plus 9,10,12,13 */
1841  #define PT_PXSPACE    7    /* POSIX space - Z plus 9,10,11,12,13 */  #define PT_PXSPACE    7    /* POSIX space - Z plus 9,10,11,12,13 */
1842  #define PT_WORD       8    /* Word - L plus N plus underscore */  #define PT_WORD       8    /* Word - L plus N plus underscore */
1843    #define PT_CLIST      9    /* Pseudo-property: match character list */
1844    
1845  /* Flag bits and data types for the extended class (OP_XCLASS) for classes that  /* Flag bits and data types for the extended class (OP_XCLASS) for classes that
1846  contain characters with values greater than 255. */  contain characters with values greater than 255. */
# Line 1565  contain characters with values greater t Line 1856  contain characters with values greater t
1856    
1857  /* These are escaped items that aren't just an encoding of a particular data  /* These are escaped items that aren't just an encoding of a particular data
1858  value such as \n. They must have non-zero values, as check_escape() returns  value such as \n. They must have non-zero values, as check_escape() returns
1859  their negation. Also, they must appear in the same order as in the opcode  0 for a data character.  Also, they must appear in the same order as in the opcode
1860  definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it  definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
1861  corresponds to "." in DOTALL mode rather than an escape sequence. It is also  corresponds to "." in DOTALL mode rather than an escape sequence. It is also
1862  used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In  used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In
1863  non-DOTALL mode, "." behaves like \N.  non-DOTALL mode, "." behaves like \N.
1864    
1865  The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.  The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.
1866  when PCRE_UCP is set, when replacement of \d etc by \p sequences is required.  when PCRE_UCP is set and replacement of \d etc by \p sequences is required.
1867  They must be contiguous, and remain in order so that the replacements can be  They must be contiguous, and remain in order so that the replacements can be
1868  looked up from a table.  looked up from a table.
1869    
1870  The final escape must be ESC_REF as subsequent values are used for  Negative numbers are used to encode a backreference (\1, \2, \3, etc.) in
1871  backreferences (\1, \2, \3, etc). There are two tests in the code for an escape  check_escape(). There are two tests in the code for an escape
1872  greater than ESC_b and less than ESC_Z to detect the types that may be  greater than ESC_b and less than ESC_Z to detect the types that may be
1873  repeated. These are the types that consume characters. If any new escapes are  repeated. These are the types that consume characters. If any new escapes are
1874  put in between that don't consume a character, that code will have to change.  put in between that don't consume a character, that code will have to change.
# Line 1587  enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, E Line 1878  enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, E
1878         ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,         ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,
1879         ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z,         ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z,
1880         ESC_E, ESC_Q, ESC_g, ESC_k,         ESC_E, ESC_Q, ESC_g, ESC_k,
1881         ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu,         ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu };
        ESC_REF };  
1882    
1883  /* Opcode table: Starting from 1 (i.e. after OP_END), the values up to  /* Opcode table: Starting from 1 (i.e. after OP_END), the values up to
1884  OP_EOD must correspond in order to the list of escapes immediately above.  OP_EOD must correspond in order to the list of escapes immediately above.
# Line 1614  enum { Line 1904  enum {
1904    OP_NOT_WORDCHAR,       /* 10 \W */    OP_NOT_WORDCHAR,       /* 10 \W */
1905    OP_WORDCHAR,           /* 11 \w */    OP_WORDCHAR,           /* 11 \w */
1906    
1907    OP_ANY,            /* 12 Match any character except newline */    OP_ANY,            /* 12 Match any character except newline (\N) */
1908    OP_ALLANY,         /* 13 Match any character */    OP_ALLANY,         /* 13 Match any character */
1909    OP_ANYBYTE,        /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */    OP_ANYBYTE,        /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */
1910    OP_NOTPROP,        /* 15 \P (not Unicode property) */    OP_NOTPROP,        /* 15 \P (not Unicode property) */
# Line 1625  enum { Line 1915  enum {
1915    OP_NOT_VSPACE,     /* 20 \V (not vertical whitespace) */    OP_NOT_VSPACE,     /* 20 \V (not vertical whitespace) */
1916    OP_VSPACE,         /* 21 \v (vertical whitespace) */    OP_VSPACE,         /* 21 \v (vertical whitespace) */
1917    OP_EXTUNI,         /* 22 \X (extended Unicode sequence */    OP_EXTUNI,         /* 22 \X (extended Unicode sequence */
1918    OP_EODN,           /* 23 End of data or \n at end of data: \Z. */    OP_EODN,           /* 23 End of data or \n at end of data (\Z) */
1919    OP_EOD,            /* 24 End of data: \z */    OP_EOD,            /* 24 End of data (\z) */
1920    
1921    OP_CIRC,           /* 25 Start of line - not multiline */    OP_CIRC,           /* 25 Start of line - not multiline */
1922    OP_CIRCM,          /* 26 Start of line - multiline */    OP_CIRCM,          /* 26 Start of line - multiline */
# Line 1986  enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4, Line 2276  enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,
2276         ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,         ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
2277         ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,         ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
2278         ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,         ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
2279         ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERRCOUNT };         ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERRCOUNT };
2280    
2281  /* JIT compiling modes. The function list is indexed by them. */  /* JIT compiling modes. The function list is indexed by them. */
2282  enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,  enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
# Line 2009  fields are present. Currently PCRE alway Line 2299  fields are present. Currently PCRE alway
2299  NOTE NOTE NOTE  NOTE NOTE NOTE
2300  */  */
2301    
2302  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
2303  #define REAL_PCRE real_pcre  #define REAL_PCRE real_pcre
2304  #else  #elif defined COMPILE_PCRE16
2305  #define REAL_PCRE real_pcre16  #define REAL_PCRE real_pcre16
2306    #elif defined COMPILE_PCRE32
2307    #define REAL_PCRE real_pcre32
2308  #endif  #endif
2309    
2310  typedef struct REAL_PCRE {  /* It is necessary to fork the struct for 32 bit, since it needs to use
2311     * pcre_uchar for first_char and req_char. Can't put an ifdef inside the
2312     * typedef since pcretest needs access to  the struct of the 8-, 16-
2313     * and 32-bit variants. */
2314    
2315    typedef struct real_pcre8_or_16 {
2316    pcre_uint32 magic_number;    pcre_uint32 magic_number;
2317    pcre_uint32 size;               /* Total that was malloced */    pcre_uint32 size;               /* Total that was malloced */
2318    pcre_uint32 options;            /* Public options */    pcre_uint32 options;            /* Public options */
# Line 2031  typedef struct REAL_PCRE { Line 2328  typedef struct REAL_PCRE {
2328    pcre_uint16 ref_count;          /* Reference count */    pcre_uint16 ref_count;          /* Reference count */
2329    const pcre_uint8 *tables;       /* Pointer to tables or NULL for std */    const pcre_uint8 *tables;       /* Pointer to tables or NULL for std */
2330    const pcre_uint8 *nullpad;      /* NULL padding */    const pcre_uint8 *nullpad;      /* NULL padding */
2331  } REAL_PCRE;  } real_pcre8_or_16;
2332    
2333    typedef struct real_pcre8_or_16 real_pcre;
2334    typedef struct real_pcre8_or_16 real_pcre16;
2335    
2336    typedef struct real_pcre32 {
2337      pcre_uint32 magic_number;
2338      pcre_uint32 size;               /* Total that was malloced */
2339      pcre_uint32 options;            /* Public options */
2340      pcre_uint16 flags;              /* Private flags */
2341      pcre_uint16 max_lookbehind;     /* Longest lookbehind (characters) */
2342      pcre_uint16 top_bracket;        /* Highest numbered group */
2343      pcre_uint16 top_backref;        /* Highest numbered back reference */
2344      pcre_uint32 first_char;         /* Starting character */
2345      pcre_uint32 req_char;           /* This character must be seen */
2346      pcre_uint16 name_table_offset;  /* Offset to name table that follows */
2347      pcre_uint16 name_entry_size;    /* Size of any name items */
2348      pcre_uint16 name_count;         /* Number of name items */
2349      pcre_uint16 ref_count;          /* Reference count */
2350      pcre_uint16 dummy1;             /* for later expansion */
2351      pcre_uint16 dummy2;             /* for later expansion */
2352      const pcre_uint8 *tables;       /* Pointer to tables or NULL for std */
2353      void *nullpad;                  /* for later expansion */
2354    } real_pcre32;
2355    
2356    /* Assert that the size of REAL_PCRE is divisible by 8 */
2357    typedef int __assert_real_pcre_size_divisible_8[(sizeof(REAL_PCRE) % 8) == 0 ? 1 : -1];
2358    
2359    /* Needed in pcretest to access some fields in the real_pcre* structures
2360     * directly. They're unified for 8/16/32 bits since the structs only differ
2361     * after these fields; if that ever changes, need to fork those defines into
2362     * 8/16 and 32 bit versions. */
2363    #define REAL_PCRE_MAGIC(re)     (((REAL_PCRE*)re)->magic_number)
2364    #define REAL_PCRE_SIZE(re)      (((REAL_PCRE*)re)->size)
2365    #define REAL_PCRE_OPTIONS(re)   (((REAL_PCRE*)re)->options)
2366    #define REAL_PCRE_FLAGS(re)     (((REAL_PCRE*)re)->flags)
2367    
2368  /* The format of the block used to store data from pcre_study(). The same  /* The format of the block used to store data from pcre_study(). The same
2369  remark (see NOTE above) about extending this structure applies. */  remark (see NOTE above) about extending this structure applies. */
# Line 2072  typedef struct compile_data { Line 2404  typedef struct compile_data {
2404    int  names_found;                 /* Number of entries so far */    int  names_found;                 /* Number of entries so far */
2405    int  name_entry_size;             /* Size of each entry */    int  name_entry_size;             /* Size of each entry */
2406    int  workspace_size;              /* Size of workspace */    int  workspace_size;              /* Size of workspace */
2407    int  bracount;                    /* Count of capturing parens as we compile */    unsigned int  bracount;           /* Count of capturing parens as we compile */
2408    int  final_bracount;              /* Saved value after first pass */    int  final_bracount;              /* Saved value after first pass */
2409    int  max_lookbehind;              /* Maximum lookbehind (characters) */    int  max_lookbehind;              /* Maximum lookbehind (characters) */
2410    int  top_backref;                 /* Maximum back reference */    int  top_backref;                 /* Maximum back reference */
# Line 2082  typedef struct compile_data { Line 2414  typedef struct compile_data {
2414    int  external_flags;              /* External flag bits to be set */    int  external_flags;              /* External flag bits to be set */
2415    int  req_varyopt;                 /* "After variable item" flag for reqbyte */    int  req_varyopt;                 /* "After variable item" flag for reqbyte */
2416    BOOL had_accept;                  /* (*ACCEPT) encountered */    BOOL had_accept;                  /* (*ACCEPT) encountered */
2417    BOOL had_pruneorskip;             /* (*PRUNE) or (*SKIP) encountered */    BOOL had_pruneorskip;             /* (*PRUNE) or (*SKIP) encountered */
2418    BOOL check_lookbehind;            /* Lookbehinds need later checking */    BOOL check_lookbehind;            /* Lookbehinds need later checking */
2419    int  nltype;                      /* Newline type */    int  nltype;                      /* Newline type */
2420    int  nllen;                       /* Newline string length */    int  nllen;                       /* Newline string length */
# Line 2102  call within the pattern; used by pcre_ex Line 2434  call within the pattern; used by pcre_ex
2434    
2435  typedef struct recursion_info {  typedef struct recursion_info {
2436    struct recursion_info *prevrec; /* Previous recursion record (or NULL) */    struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
2437    int group_num;                  /* Number of group that was called */    unsigned int group_num;         /* Number of group that was called */
2438    int *offset_save;               /* Pointer to start of saved offsets */    int *offset_save;               /* Pointer to start of saved offsets */
2439    int saved_max;                  /* Number of saved offsets */    int saved_max;                  /* Number of saved offsets */
2440    PCRE_PUCHAR subject_position;   /* Position at start of recursion */    PCRE_PUCHAR subject_position;   /* Position at start of recursion */
# Line 2236  total length. */ Line 2568  total length. */
2568    
2569  /* Internal function and data prefixes. */  /* Internal function and data prefixes. */
2570    
2571  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
2572  #ifndef PUBL  #ifndef PUBL
2573  #define PUBL(name) pcre_##name  #define PUBL(name) pcre_##name
2574  #endif  #endif
2575  #ifndef PRIV  #ifndef PRIV
2576  #define PRIV(name) _pcre_##name  #define PRIV(name) _pcre_##name
2577  #endif  #endif
2578  #else /* COMPILE_PCRE8 */  #elif defined COMPILE_PCRE16
 #ifdef COMPILE_PCRE16  
2579  #ifndef PUBL  #ifndef PUBL
2580  #define PUBL(name) pcre16_##name  #define PUBL(name) pcre16_##name
2581  #endif  #endif
2582  #ifndef PRIV  #ifndef PRIV
2583  #define PRIV(name) _pcre16_##name  #define PRIV(name) _pcre16_##name
2584  #endif  #endif
2585    #elif defined COMPILE_PCRE32
2586    #ifndef PUBL
2587    #define PUBL(name) pcre32_##name
2588    #endif
2589    #ifndef PRIV
2590    #define PRIV(name) _pcre32_##name
2591    #endif
2592  #else  #else
2593  #error Unsupported compiling mode  #error Unsupported compiling mode
2594  #endif /* COMPILE_PCRE16 */  #endif /* COMPILE_PCRE[8|16|32] */
 #endif /* COMPILE_PCRE8 */  
2595    
2596  /* Layout of the UCP type table that translates property names into types and  /* Layout of the UCP type table that translates property names into types and
2597  codes. Each entry used to point directly to a name, but to reduce the number of  codes. Each entry used to point directly to a name, but to reduce the number of
# Line 2274  but are not part of the PCRE public API. Line 2611  but are not part of the PCRE public API.
2611  pcre_tables.c module. */  pcre_tables.c module. */
2612    
2613  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
   
2614  extern const int            PRIV(utf8_table1)[];  extern const int            PRIV(utf8_table1)[];
2615  extern const int            PRIV(utf8_table1_size);  extern const int            PRIV(utf8_table1_size);
2616  extern const int            PRIV(utf8_table2)[];  extern const int            PRIV(utf8_table2)[];
2617  extern const int            PRIV(utf8_table3)[];  extern const int            PRIV(utf8_table3)[];
2618  extern const pcre_uint8     PRIV(utf8_table4)[];  extern const pcre_uint8     PRIV(utf8_table4)[];
   
2619  #endif /* COMPILE_PCRE8 */  #endif /* COMPILE_PCRE8 */
2620    
2621  extern const char           PRIV(utt_names)[];  extern const char           PRIV(utt_names)[];
2622  extern const ucp_type_table PRIV(utt)[];  extern const ucp_type_table PRIV(utt)[];
2623  extern const int            PRIV(utt_size);  extern const int            PRIV(utt_size);
2624    
2625    extern const pcre_uint8     PRIV(OP_lengths)[];
2626  extern const pcre_uint8     PRIV(default_tables)[];  extern const pcre_uint8     PRIV(default_tables)[];
2627    
2628  extern const pcre_uint8     PRIV(OP_lengths)[];  extern const pcre_uint32    PRIV(hspace_list)[];
2629    extern const pcre_uint32    PRIV(vspace_list)[];
2630    
2631    
2632  /* Internal shared functions. These are functions that are used by more than  /* Internal shared functions. These are functions that are used by more than
# Line 2297  one of the exported public functions. Th Line 2634  one of the exported public functions. Th
2634  sense, but are not part of the PCRE public API. */  sense, but are not part of the PCRE public API. */
2635    
2636  /* String comparison functions. */  /* String comparison functions. */
2637  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
2638    
2639  #define STRCMP_UC_UC(str1, str2) \  #define STRCMP_UC_UC(str1, str2) \
2640    strcmp((char *)(str1), (char *)(str2))    strcmp((char *)(str1), (char *)(str2))
# Line 2309  sense, but are not part of the PCRE publ Line 2646  sense, but are not part of the PCRE publ
2646    strncmp((char *)(str1), (str2), (num))    strncmp((char *)(str1), (str2), (num))
2647  #define STRLEN_UC(str) strlen((const char *)str)  #define STRLEN_UC(str) strlen((const char *)str)
2648    
2649  #else  #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2650    
2651  extern int               PRIV(strcmp_uc_uc)(const pcre_uchar *,  extern int               PRIV(strcmp_uc_uc)(const pcre_uchar *,
2652                             const pcre_uchar *);                             const pcre_uchar *);
# Line 2331  extern unsigned int      PRIV(strlen_uc) Line 2668  extern unsigned int      PRIV(strlen_uc)
2668    PRIV(strncmp_uc_c8)((str1), (str2), (num))    PRIV(strncmp_uc_c8)((str1), (str2), (num))
2669  #define STRLEN_UC(str) PRIV(strlen_uc)(str)  #define STRLEN_UC(str) PRIV(strlen_uc)(str)
2670    
2671  #endif /* COMPILE_PCRE8 */  #endif /* COMPILE_PCRE[8|16|32] */
2672    
2673    #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
2674    
2675    #define STRCMP_UC_UC_TEST(str1, str2) STRCMP_UC_UC(str1, str2)
2676    #define STRCMP_UC_C8_TEST(str1, str2) STRCMP_UC_C8(str1, str2)
2677    
2678    #elif defined COMPILE_PCRE32
2679    
2680    extern int               PRIV(strcmp_uc_uc_utf)(const pcre_uchar *,
2681                               const pcre_uchar *);
2682    extern int               PRIV(strcmp_uc_c8_utf)(const pcre_uchar *,
2683                               const char *);
2684    
2685    #define STRCMP_UC_UC_TEST(str1, str2) \
2686      (utf ? PRIV(strcmp_uc_uc_utf)((str1), (str2)) : PRIV(strcmp_uc_uc)((str1), (str2)))
2687    #define STRCMP_UC_C8_TEST(str1, str2) \
2688      (utf ? PRIV(strcmp_uc_c8_utf)((str1), (str2)) : PRIV(strcmp_uc_c8)((str1), (str2)))
2689    
2690    #endif /* COMPILE_PCRE[8|16|32] */
2691    
2692  extern const pcre_uchar *PRIV(find_bracket)(const pcre_uchar *, BOOL, int);  extern const pcre_uchar *PRIV(find_bracket)(const pcre_uchar *, BOOL, int);
2693  extern BOOL              PRIV(is_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,  extern BOOL              PRIV(is_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,
2694                             int *, BOOL);                             int *, BOOL);
2695  extern int               PRIV(ord2utf)(pcre_uint32, pcre_uchar *);  extern unsigned int      PRIV(ord2utf)(pcre_uint32, pcre_uchar *);
2696  extern int               PRIV(valid_utf)(PCRE_PUCHAR, int, int *);  extern int               PRIV(valid_utf)(PCRE_PUCHAR, int, int *);
2697  extern BOOL              PRIV(was_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,  extern BOOL              PRIV(was_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,
2698                             int *, BOOL);                             int *, BOOL);
2699  extern BOOL              PRIV(xclass)(int, const pcre_uchar *, BOOL);  extern BOOL              PRIV(xclass)(pcre_uint32, const pcre_uchar *, BOOL);
2700    
2701  #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
2702  extern void              PRIV(jit_compile)(const REAL_PCRE *,  extern void              PRIV(jit_compile)(const REAL_PCRE *,
# Line 2358  typedef struct { Line 2714  typedef struct {
2714    pcre_uint8 script;     /* ucp_Arabic, etc. */    pcre_uint8 script;     /* ucp_Arabic, etc. */
2715    pcre_uint8 chartype;   /* ucp_Cc, etc. (general categories) */    pcre_uint8 chartype;   /* ucp_Cc, etc. (general categories) */
2716    pcre_uint8 gbprop;     /* ucp_gbControl, etc. (grapheme break property) */    pcre_uint8 gbprop;     /* ucp_gbControl, etc. (grapheme break property) */
2717      pcre_uint8 caseset;    /* offset to multichar other cases or zero */
2718    pcre_int32 other_case; /* offset to other case, or zero if none */    pcre_int32 other_case; /* offset to other case, or zero if none */
2719  } ucd_record;  } ucd_record;
2720    
2721    extern const pcre_uint32 PRIV(ucd_caseless_sets)[];
2722  extern const ucd_record  PRIV(ucd_records)[];  extern const ucd_record  PRIV(ucd_records)[];
2723  extern const pcre_uint8  PRIV(ucd_stage1)[];  extern const pcre_uint8  PRIV(ucd_stage1)[];
2724  extern const pcre_uint16 PRIV(ucd_stage2)[];  extern const pcre_uint16 PRIV(ucd_stage2)[];
2725  extern const int         PRIV(ucp_gentype)[];  extern const pcre_uint32 PRIV(ucp_gentype)[];
2726  extern const pcre_uint32 PRIV(ucp_gbtable)[];  extern const pcre_uint32 PRIV(ucp_gbtable)[];
2727  #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
2728  extern const int         PRIV(ucp_typerange)[];  extern const int         PRIV(ucp_typerange)[];
# Line 2375  extern const int         PRIV(ucp_typera Line 2733  extern const int         PRIV(ucp_typera
2733    
2734  #define UCD_BLOCK_SIZE 128  #define UCD_BLOCK_SIZE 128
2735  #define GET_UCD(ch) (PRIV(ucd_records) + \  #define GET_UCD(ch) (PRIV(ucd_records) + \
2736          PRIV(ucd_stage2)[PRIV(ucd_stage1)[(ch) / UCD_BLOCK_SIZE] * \          PRIV(ucd_stage2)[PRIV(ucd_stage1)[(int)(ch) / UCD_BLOCK_SIZE] * \
2737          UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE])          UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE])
2738    
2739  #define UCD_CHARTYPE(ch)    GET_UCD(ch)->chartype  #define UCD_CHARTYPE(ch)    GET_UCD(ch)->chartype
2740  #define UCD_SCRIPT(ch)      GET_UCD(ch)->script  #define UCD_SCRIPT(ch)      GET_UCD(ch)->script
2741  #define UCD_CATEGORY(ch)    PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]  #define UCD_CATEGORY(ch)    PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
2742  #define UCD_GRAPHBREAK(ch)  GET_UCD(ch)->gbprop  #define UCD_GRAPHBREAK(ch)  GET_UCD(ch)->gbprop
2743  #define UCD_OTHERCASE(ch)   (ch + GET_UCD(ch)->other_case)  #define UCD_CASESET(ch)     GET_UCD(ch)->caseset
2744    #define UCD_OTHERCASE(ch)   ((pcre_uint32)((int)ch + (int)(GET_UCD(ch)->other_case)))
2745    
2746  #endif /* SUPPORT_UCP */  #endif /* SUPPORT_UCP */
2747    

Legend:
Removed from v.1028  
changed lines
  Added in v.1160

  ViewVC Help
Powered by ViewVC 1.1.5