/[pcre]/code/trunk/pcre_internal.h
ViewVC logotype

Diff of /code/trunk/pcre_internal.h

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1045 by ph10, Sun Sep 23 16:50:00 2012 UTC revision 1198 by chpe, Thu Nov 1 19:23:35 2012 UTC
# Line 40  POSSIBILITY OF SUCH DAMAGE. Line 40  POSSIBILITY OF SUCH DAMAGE.
40    
41  /* This header contains definitions that are shared between the different  /* This header contains definitions that are shared between the different
42  modules, but which are not relevant to the exported API. This includes some  modules, but which are not relevant to the exported API. This includes some
43  functions whose names all begin with "_pcre_" or "_pcre16_" depending on  functions whose names all begin with "_pcre_", "_pcre16_" or "_pcre32_"
44  the PRIV macro. */  depending on the PRIV macro. */
45    
46  #ifndef PCRE_INTERNAL_H  #ifndef PCRE_INTERNAL_H
47  #define PCRE_INTERNAL_H  #define PCRE_INTERNAL_H
# Line 53  the PRIV macro. */ Line 53  the PRIV macro. */
53  #endif  #endif
54    
55  /* PCRE is compiled as an 8 bit library if it is not requested otherwise. */  /* PCRE is compiled as an 8 bit library if it is not requested otherwise. */
56  #ifndef COMPILE_PCRE16  
57    #if !defined COMPILE_PCRE16 && !defined COMPILE_PCRE32
58  #define COMPILE_PCRE8  #define COMPILE_PCRE8
59  #endif  #endif
60    
# Line 78  Until then we define it if SUPPORT_UTF i Line 79  Until then we define it if SUPPORT_UTF i
79  #define SUPPORT_UTF8 1  #define SUPPORT_UTF8 1
80  #endif  #endif
81    
82  /* We do not support both EBCDIC and UTF-8/16 at the same time. The "configure"  /* We do not support both EBCDIC and UTF-8/16/32 at the same time. The "configure"
83  script prevents both being selected, but not everybody uses "configure". */  script prevents both being selected, but not everybody uses "configure". */
84    
85  #if defined EBCDIC && defined SUPPORT_UTF  #if defined EBCDIC && defined SUPPORT_UTF
86  #error The use of both EBCDIC and SUPPORT_UTF8/16 is not supported.  #error The use of both EBCDIC and SUPPORT_UTF is not supported.
87  #endif  #endif
88    
89  /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef  /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
# Line 111  setjmp and stdarg are used is when NO_RE Line 112  setjmp and stdarg are used is when NO_RE
112  #include <stdlib.h>  #include <stdlib.h>
113  #include <string.h>  #include <string.h>
114    
115    /* Valgrind (memcheck) support */
116    
117    #ifdef SUPPORT_VALGRIND
118    #include <valgrind/memcheck.h>
119    #endif
120    
121  /* When compiling a DLL for Windows, the exported symbols have to be declared  /* When compiling a DLL for Windows, the exported symbols have to be declared
122  using some MS magic. I found some useful information on this web page:  using some MS magic. I found some useful information on this web page:
123  http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the  http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the
# Line 214  stdint.h is available, include it; it ma Line 221  stdint.h is available, include it; it ma
221  have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set  have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set
222  by "configure". */  by "configure". */
223    
224  #if HAVE_STDINT_H  #if defined HAVE_STDINT_H
225  #include <stdint.h>  #include <stdint.h>
226  #elif HAVE_INTTYPES_H  #elif defined HAVE_INTTYPES_H
227  #include <inttypes.h>  #include <inttypes.h>
228  #endif  #endif
229    
# Line 243  exactly 256 items. When the character is Line 250  exactly 256 items. When the character is
250  items, some check is needed before accessing these tables.  items, some check is needed before accessing these tables.
251  */  */
252    
253  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
254    
255  typedef unsigned char pcre_uchar;  typedef unsigned char pcre_uchar;
256  #define IN_UCHARS(x) (x)  #define IN_UCHARS(x) (x)
257  #define MAX_255(c) 1  #define MAX_255(c) 1
258  #define TABLE_GET(c, table, default) ((table)[c])  #define TABLE_GET(c, table, default) ((table)[c])
259    
260  #else  #elif defined COMPILE_PCRE16
261    
 #ifdef COMPILE_PCRE16  
262  #if USHRT_MAX != 65535  #if USHRT_MAX != 65535
263  /* This is a warning message. Change PCRE_UCHAR16 to a 16 bit data type in  /* This is a warning message. Change PCRE_UCHAR16 to a 16 bit data type in
264  pcre.h(.in) and disable (comment out) this message. */  pcre.h(.in) and disable (comment out) this message. */
# Line 260  pcre.h(.in) and disable (comment out) th Line 266  pcre.h(.in) and disable (comment out) th
266  #endif  #endif
267    
268  typedef pcre_uint16 pcre_uchar;  typedef pcre_uint16 pcre_uchar;
269  #define IN_UCHARS(x) ((x) << 1)  #define UCHAR_SHIFT (1)
270    #define IN_UCHARS(x) ((x) << UCHAR_SHIFT)
271    #define MAX_255(c) ((c) <= 255u)
272    #define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
273    
274    #elif defined COMPILE_PCRE32
275    
276    typedef pcre_uint32 pcre_uchar;
277    #define UCHAR_SHIFT (2)
278    #define IN_UCHARS(x) ((x) << UCHAR_SHIFT)
279  #define MAX_255(c) ((c) <= 255u)  #define MAX_255(c) ((c) <= 255u)
280  #define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))  #define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
281    
282    /* Assert that pcre_uchar32 is a 32-bit type */
283    typedef int __assert_pcre_uchar32_size[sizeof(pcre_uchar) == 4 ? 1 : -1];
284    
285  #else  #else
286  #error Unsupported compiling mode  #error Unsupported compiling mode
287  #endif /* COMPILE_PCRE16 */  #endif /* COMPILE_PCRE[8|16|32] */
   
 #endif /* COMPILE_PCRE8 */  
288    
289  /* This is an unsigned int value that no character can ever have. UTF-8  /* This is an unsigned int value that no character can ever have. UTF-8
290  characters only go up to 0x7fffffff (though Unicode doesn't go beyond  characters only go up to 0x7fffffff (though Unicode doesn't go beyond
# Line 295  start/end of string field names are. */ Line 311  start/end of string field names are. */
311         &(NLBLOCK->nllen), utf)) \         &(NLBLOCK->nllen), utf)) \
312      : \      : \
313      ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \      ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
314       (p)[0] == NLBLOCK->nl[0] && \       RAWUCHARTEST(p) == NLBLOCK->nl[0] && \
315       (NLBLOCK->nllen == 1 || (p)[1] == NLBLOCK->nl[1]) \       (NLBLOCK->nllen == 1 || RAWUCHARTEST(p+1) == NLBLOCK->nl[1])       \
316      ) \      ) \
317    )    )
318    
# Line 309  start/end of string field names are. */ Line 325  start/end of string field names are. */
325         &(NLBLOCK->nllen), utf)) \         &(NLBLOCK->nllen), utf)) \
326      : \      : \
327      ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \      ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
328       (p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \       RAWUCHARTEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] &&              \
329       (NLBLOCK->nllen == 1 || (p)[-NLBLOCK->nllen+1] == NLBLOCK->nl[1]) \       (NLBLOCK->nllen == 1 || RAWUCHARTEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \
330      ) \      ) \
331    )    )
332    
# Line 396  The macros are controlled by the value o Line 412  The macros are controlled by the value o
412  the config.h file, but can be overridden by using -D on the command line. This  the config.h file, but can be overridden by using -D on the command line. This
413  is automated on Unix systems via the "configure" command. */  is automated on Unix systems via the "configure" command. */
414    
415  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
416    
417  #if LINK_SIZE == 2  #if LINK_SIZE == 2
418    
# Line 441  is automated on Unix systems via the "co Line 457  is automated on Unix systems via the "co
457  #error LINK_SIZE must be either 2, 3, or 4  #error LINK_SIZE must be either 2, 3, or 4
458  #endif  #endif
459    
460  #else /* COMPILE_PCRE8 */  #elif defined COMPILE_PCRE16
   
 #ifdef COMPILE_PCRE16  
461    
462  #if LINK_SIZE == 2  #if LINK_SIZE == 2
463    
464    /* Redefine LINK_SIZE as a multiple of sizeof(pcre_uchar) */
465  #undef LINK_SIZE  #undef LINK_SIZE
466  #define LINK_SIZE 1  #define LINK_SIZE 1
467    
# Line 460  is automated on Unix systems via the "co Line 475  is automated on Unix systems via the "co
475    
476  #elif LINK_SIZE == 3 || LINK_SIZE == 4  #elif LINK_SIZE == 3 || LINK_SIZE == 4
477    
478    /* Redefine LINK_SIZE as a multiple of sizeof(pcre_uchar) */
479  #undef LINK_SIZE  #undef LINK_SIZE
480  #define LINK_SIZE 2  #define LINK_SIZE 2
481    
# Line 477  is automated on Unix systems via the "co Line 493  is automated on Unix systems via the "co
493  #error LINK_SIZE must be either 2, 3, or 4  #error LINK_SIZE must be either 2, 3, or 4
494  #endif  #endif
495    
496    #elif defined COMPILE_PCRE32
497    
498    /* Only supported LINK_SIZE is 4 */
499    /* Redefine LINK_SIZE as a multiple of sizeof(pcre_uchar) */
500    #undef LINK_SIZE
501    #define LINK_SIZE 1
502    
503    #define PUT(a,n,d)   \
504      (a[n] = (d))
505    
506    #define GET(a,n) \
507      (a[n])
508    
509    /* Keep it positive */
510    #define MAX_PATTERN_SIZE (1 << 30)
511    
512  #else  #else
513  #error Unsupported compiling mode  #error Unsupported compiling mode
514  #endif /* COMPILE_PCRE16 */  #endif /* COMPILE_PCRE[8|16|32] */
   
 #endif /* COMPILE_PCRE8 */  
515    
516  /* Convenience macro defined in terms of the others */  /* Convenience macro defined in terms of the others */
517    
# Line 492  is automated on Unix systems via the "co Line 522  is automated on Unix systems via the "co
522  offsets changes. There are used for repeat counts and for other things such as  offsets changes. There are used for repeat counts and for other things such as
523  capturing parenthesis numbers in back references. */  capturing parenthesis numbers in back references. */
524    
525  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
526    
527  #define IMM2_SIZE 2  #define IMM2_SIZE 2
528    
529  #define PUT2(a,n,d)   \  #define PUT2(a,n,d)   \
530    a[n] = (d) >> 8; \    a[n] = (d) >> 8; \
531    a[(n)+1] = (d) & 255    a[(n)+1] = (d) & 255
532    
533    /* For reasons that I do not understand, the expression in this GET2 macro is
534    treated by gcc as a signed expression, even when a is declared as unsigned. It
535    seems that any kind of arithmetic results in a signed value. */
536    
537  #define GET2(a,n) \  #define GET2(a,n) \
538    (((a)[n] << 8) | (a)[(n)+1])    (unsigned int)(((a)[n] << 8) | (a)[(n)+1])
539    
540  #else /* COMPILE_PCRE8 */  #elif defined COMPILE_PCRE16
541    
542  #ifdef COMPILE_PCRE16  #define IMM2_SIZE 1
543    
544    #define PUT2(a,n,d)   \
545       a[n] = d
546    
547    #define GET2(a,n) \
548       a[n]
549    
550    #elif defined COMPILE_PCRE32
551    
552  #define IMM2_SIZE 1  #define IMM2_SIZE 1
553    
# Line 517  capturing parenthesis numbers in back re Line 559  capturing parenthesis numbers in back re
559    
560  #else  #else
561  #error Unsupported compiling mode  #error Unsupported compiling mode
562  #endif /* COMPILE_PCRE16 */  #endif /* COMPILE_PCRE[8|16|32] */
   
 #endif /* COMPILE_PCRE8 */  
563    
564  #define PUT2INC(a,n,d)  PUT2(a,n,d), a += IMM2_SIZE  #define PUT2INC(a,n,d)  PUT2(a,n,d), a += IMM2_SIZE
565    
566  /* The maximum length of a MARK name is currently one data unit; it may be  /* The maximum length of a MARK name is currently one data unit; it may be
567  changed in future to be a fixed number of bytes or to depend on LINK_SIZE. */  changed in future to be a fixed number of bytes or to depend on LINK_SIZE. */
568    
569  #define MAX_MARK ((1 << (sizeof(pcre_uchar)*8)) - 1)  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
570    #define MAX_MARK ((1u << 16) - 1)
571    #else
572    #define MAX_MARK ((1u << 8) - 1)
573    #endif
574    
575  /* When UTF encoding is being used, a character is no longer just a single  /* When UTF encoding is being used, a character is no longer just a single
576  byte. The macros for character handling generate simple sequences when used in  byte. The macros for character handling generate simple sequences when used in
# Line 546  we don't even define them. */ Line 590  we don't even define them. */
590  #define GETCHARINC(c, eptr) c = *eptr++;  #define GETCHARINC(c, eptr) c = *eptr++;
591  #define GETCHARINCTEST(c, eptr) c = *eptr++;  #define GETCHARINCTEST(c, eptr) c = *eptr++;
592  #define GETCHARLEN(c, eptr, len) c = *eptr;  #define GETCHARLEN(c, eptr, len) c = *eptr;
593    #define RAWUCHAR(eptr) (*(eptr))
594    #define RAWUCHARINC(eptr) (*(eptr)++)
595    #define RAWUCHARTEST(eptr) (*(eptr))
596    #define RAWUCHARINCTEST(eptr) (*(eptr)++)
597  /* #define GETCHARLENTEST(c, eptr, len) */  /* #define GETCHARLENTEST(c, eptr, len) */
598  /* #define BACKCHAR(eptr) */  /* #define BACKCHAR(eptr) */
599  /* #define FORWARDCHAR(eptr) */  /* #define FORWARDCHAR(eptr) */
# Line 553  we don't even define them. */ Line 601  we don't even define them. */
601    
602  #else   /* SUPPORT_UTF */  #else   /* SUPPORT_UTF */
603    
 #ifdef COMPILE_PCRE8  
   
 /* These macros were originally written in the form of loops that used data  
 from the tables whose names start with PRIV(utf8_table). They were rewritten by  
 a user so as not to use loops, because in some environments this gives a  
 significant performance advantage, and it seems never to do any harm. */  
   
 /* Tells the biggest code point which can be encoded as a single character. */  
   
 #define MAX_VALUE_FOR_SINGLE_CHAR 127  
   
604  /* Tests whether the code point needs extra characters to decode. */  /* Tests whether the code point needs extra characters to decode. */
605    
606  #define HAS_EXTRALEN(c) ((c) >= 0xc0)  #define HASUTF8EXTRALEN(c) ((c) >= 0xc0)
   
 /* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.  
 Otherwise it has an undefined behaviour. */  
   
 #define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])  
   
 /* Returns TRUE, if the given character is not the first character  
 of a UTF sequence. */  
   
 #define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80)  
607    
608  /* Base macro to pick up the remaining bytes of a UTF-8 character, not  /* Base macro to pick up the remaining bytes of a UTF-8 character, not
609  advancing the pointer. */  advancing the pointer. */
# Line 600  advancing the pointer. */ Line 627  advancing the pointer. */
627            ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \            ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \
628      }      }
629    
 /* Get the next UTF-8 character, not advancing the pointer. This is called when  
 we know we are in UTF-8 mode. */  
   
 #define GETCHAR(c, eptr) \  
   c = *eptr; \  
   if (c >= 0xc0) GETUTF8(c, eptr);  
   
 /* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the  
 pointer. */  
   
 #define GETCHARTEST(c, eptr) \  
   c = *eptr; \  
   if (utf && c >= 0xc0) GETUTF8(c, eptr);  
   
630  /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing  /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing
631  the pointer. */  the pointer. */
632    
# Line 648  the pointer. */ Line 661  the pointer. */
661        } \        } \
662      }      }
663    
664    #if defined COMPILE_PCRE8
665    
666    /* These macros were originally written in the form of loops that used data
667    from the tables whose names start with PRIV(utf8_table). They were rewritten by
668    a user so as not to use loops, because in some environments this gives a
669    significant performance advantage, and it seems never to do any harm. */
670    
671    /* Tells the biggest code point which can be encoded as a single character. */
672    
673    #define MAX_VALUE_FOR_SINGLE_CHAR 127
674    
675    /* Tests whether the code point needs extra characters to decode. */
676    
677    #define HAS_EXTRALEN(c) ((c) >= 0xc0)
678    
679    /* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
680    Otherwise it has an undefined behaviour. */
681    
682    #define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])
683    
684    /* Returns TRUE, if the given character is not the first character
685    of a UTF sequence. */
686    
687    #define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80)
688    
689    /* Get the next UTF-8 character, not advancing the pointer. This is called when
690    we know we are in UTF-8 mode. */
691    
692    #define GETCHAR(c, eptr) \
693      c = *eptr; \
694      if (c >= 0xc0) GETUTF8(c, eptr);
695    
696    /* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
697    pointer. */
698    
699    #define GETCHARTEST(c, eptr) \
700      c = *eptr; \
701      if (utf && c >= 0xc0) GETUTF8(c, eptr);
702    
703  /* Get the next UTF-8 character, advancing the pointer. This is called when we  /* Get the next UTF-8 character, advancing the pointer. This is called when we
704  know we are in UTF-8 mode. */  know we are in UTF-8 mode. */
705    
# Line 714  do not know if we are in UTF-8 mode. */ Line 766  do not know if we are in UTF-8 mode. */
766    c = *eptr; \    c = *eptr; \
767    if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len);    if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len);
768    
769    /* Returns the next uchar, not advancing the pointer. This is called when
770    we know we are in UTF mode. */
771    
772    #define RAWUCHAR(eptr) \
773      (*(eptr))
774    
775    /* Returns the next uchar, advancing the pointer. This is called when
776    we know we are in UTF mode. */
777    
778    #define RAWUCHARINC(eptr) \
779      (*((eptr)++))
780    
781    /* Returns the next uchar, testing for UTF mode, and not advancing the
782    pointer. */
783    
784    #define RAWUCHARTEST(eptr) \
785      (*(eptr))
786    
787    /* Returns the next uchar, testing for UTF mode, advancing the
788    pointer. */
789    
790    #define RAWUCHARINCTEST(eptr) \
791      (*((eptr)++))
792    
793  /* If the pointer is not at the start of a character, move it back until  /* If the pointer is not at the start of a character, move it back until
794  it is. This is called only in UTF-8 mode - we don't put a test within the macro  it is. This is called only in UTF-8 mode - we don't put a test within the macro
795  because almost all calls are already within a block of UTF-8 only code. */  because almost all calls are already within a block of UTF-8 only code. */
# Line 727  because almost all calls are already wit Line 803  because almost all calls are already wit
803  #define ACROSSCHAR(condition, eptr, action) \  #define ACROSSCHAR(condition, eptr, action) \
804    while((condition) && ((eptr) & 0xc0) == 0x80) action    while((condition) && ((eptr) & 0xc0) == 0x80) action
805    
806  #else /* COMPILE_PCRE8 */  #elif defined COMPILE_PCRE16
   
 #ifdef COMPILE_PCRE16  
807    
808  /* Tells the biggest code point which can be encoded as a single character. */  /* Tells the biggest code point which can be encoded as a single character. */
809    
# Line 811  we do not know if we are in UTF-16 mode. Line 885  we do not know if we are in UTF-16 mode.
885    c = *eptr; \    c = *eptr; \
886    if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);    if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);
887    
888    /* Returns the next uchar, not advancing the pointer. This is called when
889    we know we are in UTF mode. */
890    
891    #define RAWUCHAR(eptr) \
892      (*(eptr))
893    
894    /* Returns the next uchar, advancing the pointer. This is called when
895    we know we are in UTF mode. */
896    
897    #define RAWUCHARINC(eptr) \
898      (*((eptr)++))
899    
900    /* Returns the next uchar, testing for UTF mode, and not advancing the
901    pointer. */
902    
903    #define RAWUCHARTEST(eptr) \
904      (*(eptr))
905    
906    /* Returns the next uchar, testing for UTF mode, advancing the
907    pointer. */
908    
909    #define RAWUCHARINCTEST(eptr) \
910      (*((eptr)++))
911    
912  /* If the pointer is not at the start of a character, move it back until  /* If the pointer is not at the start of a character, move it back until
913  it is. This is called only in UTF-16 mode - we don't put a test within the  it is. This is called only in UTF-16 mode - we don't put a test within the
914  macro because almost all calls are already within a block of UTF-16 only  macro because almost all calls are already within a block of UTF-16 only
# Line 825  code. */ Line 923  code. */
923  #define ACROSSCHAR(condition, eptr, action) \  #define ACROSSCHAR(condition, eptr, action) \
924    if ((condition) && ((eptr) & 0xfc00) == 0xdc00) action    if ((condition) && ((eptr) & 0xfc00) == 0xdc00) action
925    
926  #endif  #elif defined COMPILE_PCRE32
927    
928  #endif /* COMPILE_PCRE8 */  /* These are trivial for the 32-bit library, since all UTF-32 characters fit
929    into one pcre_uchar unit. */
930    #define MAX_VALUE_FOR_SINGLE_CHAR (0x10ffffu)
931    #define HAS_EXTRALEN(c) (0)
932    #define GET_EXTRALEN(c) (0)
933    #define NOT_FIRSTCHAR(c) (0)
934    
935  #endif  /* SUPPORT_UTF */  #define UTF32_MASK (0x1fffffu)
936    
937    /* Base macro to pick up an UTF-32 character out of a uint32 */
938    
939    #define MASKHIGHBITS(c) ((c) & UTF32_MASK)
940    
941    /* Base macro to pick up an UTF-32 character, not advancing the pointer */
942    
943    #define GETUTF32(eptr) (MASKHIGHBITS(*(eptr)))
944    
945    /* Base macro to pick up an UTF-32 character, advancing the pointer */
946    
947    #define GETUTF32INC(eptr) (MASKHIGHBITS(*((eptr)++)))
948    
949    /* Get the next UTF-32 character, not advancing the pointer. This is called when
950    we know we are in UTF-32 mode. */
951    
952    #define GETCHAR(c, eptr) \
953      c = GETUTF32(eptr);
954    
955    /* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the
956    pointer. */
957    
958    #define GETCHARTEST(c, eptr) \
959      c = (utf ? GETUTF32(eptr) : *(eptr));
960    
961    /* Get the next UTF-32 character, advancing the pointer. This is called when we
962    know we are in UTF-32 mode. */
963    
964    #define GETCHARINC(c, eptr) \
965      c = GETUTF32INC(eptr);
966    
967    /* Get the next character, testing for UTF-32 mode, and advancing the pointer.
968    This is called when we don't know if we are in UTF-32 mode. */
969    
970    #define GETCHARINCTEST(c, eptr) \
971      c = (utf ? GETUTF32INC(eptr) : *((eptr)++));
972    
973    /* Get the next UTF-32 character, not advancing the pointer, not incrementing
974    length (since all UTF-32 is of length 1). This is called when we know we are in
975    UTF-32 mode. */
976    
977    #define GETCHARLEN(c, eptr, len) \
978      GETCHAR(c, eptr)
979    
980    /* Get the next UTF-32character, testing for UTF-32 mode, not advancing the
981    pointer, not incrementing the length (since all UTF-32 is of length 1).
982    This is called when we do not know if we are in UTF-32 mode. */
983    
984    #define GETCHARLENTEST(c, eptr, len) \
985      GETCHARTEST(c, eptr)
986    
987    /* Returns the next uchar, not advancing the pointer. This is called when
988    we know we are in UTF mode. */
989    
990    #define RAWUCHAR(eptr) \
991      (MASKHIGHBITS(*(eptr)))
992    
993    /* Returns the next uchar, advancing the pointer. This is called when
994    we know we are in UTF mode. */
995    
996    #define RAWUCHARINC(eptr) \
997      (MASKHIGHBITS(*((eptr)++)))
998    
999    /* Returns the next uchar, testing for UTF mode, and not advancing the
1000    pointer. */
1001    
1002    #define RAWUCHARTEST(eptr) \
1003      (utf ? (MASKHIGHBITS(*(eptr))) : *(eptr))
1004    
1005    /* Returns the next uchar, testing for UTF mode, advancing the
1006    pointer. */
1007    
1008    #define RAWUCHARINCTEST(eptr) \
1009      (utf ? (MASKHIGHBITS(*((eptr)++))) : *((eptr)++))
1010    
1011    /* If the pointer is not at the start of a character, move it back until
1012    it is. This is called only in UTF-32 mode - we don't put a test within the
1013    macro because almost all calls are already within a block of UTF-32 only
1014    code.
1015    These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */
1016    
1017    #define BACKCHAR(eptr) do { } while (0)
1018    
1019    /* Same as above, just in the other direction. */
1020    #define FORWARDCHAR(eptr) do { } while (0)
1021    
1022    /* Same as above, but it allows a fully customizable form. */
1023    #define ACROSSCHAR(condition, eptr, action) do { } while (0)
1024    
1025    #else
1026    #error Unsupported compiling mode
1027    #endif /* COMPILE_PCRE[8|16|32] */
1028    
1029    #endif  /* SUPPORT_UTF */
1030    
1031  /* Tests for Unicode horizontal and vertical whitespace characters must check a  /* Tests for Unicode horizontal and vertical whitespace characters must check a
1032  number of different values. Using a switch statement for this generates the  number of different values. Using a switch statement for this generates the
# Line 935  other. NOTE: The values also appear in p Line 1131  other. NOTE: The values also appear in p
1131  /* ------ End of whitespace macros ------ */  /* ------ End of whitespace macros ------ */
1132    
1133    
 /* In case there is no definition of offsetof() provided - though any proper  
 Standard C system should have one. */  
   
 #ifndef offsetof  
 #define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field))  
 #endif  
   
1134    
1135  /* Private flags containing information about the compiled regex. They used to  /* Private flags containing information about the compiled regex. They used to
1136  live at the top end of the options word, but that got almost full, so now they  live at the top end of the options word, but that got almost full, so now they
# Line 949  are in a 16-bit flags word. From release Line 1138  are in a 16-bit flags word. From release
1138  the restrictions on partial matching have been lifted. It remains for backwards  the restrictions on partial matching have been lifted. It remains for backwards
1139  compatibility. */  compatibility. */
1140    
1141  #ifdef COMPILE_PCRE8  #define PCRE_MODE8         0x0001  /* compiled in 8 bit mode */
1142  #define PCRE_MODE          0x0001  /* compiled in 8 bit mode */  #define PCRE_MODE16        0x0002  /* compiled in 16 bit mode */
1143  #endif  #define PCRE_MODE32        0x0004  /* compiled in 32 bit mode */
 #ifdef COMPILE_PCRE16  
 #define PCRE_MODE          0x0002  /* compiled in 16 bit mode */  
 #endif  
1144  #define PCRE_FIRSTSET      0x0010  /* first_char is set */  #define PCRE_FIRSTSET      0x0010  /* first_char is set */
1145  #define PCRE_FCH_CASELESS  0x0020  /* caseless first char */  #define PCRE_FCH_CASELESS  0x0020  /* caseless first char */
1146  #define PCRE_REQCHSET      0x0040  /* req_byte is set */  #define PCRE_REQCHSET      0x0040  /* req_byte is set */
# Line 965  compatibility. */ Line 1151  compatibility. */
1151  #define PCRE_HASCRORLF     0x0800  /* explicit \r or \n in pattern */  #define PCRE_HASCRORLF     0x0800  /* explicit \r or \n in pattern */
1152  #define PCRE_HASTHEN       0x1000  /* pattern contains (*THEN) */  #define PCRE_HASTHEN       0x1000  /* pattern contains (*THEN) */
1153    
1154    #if defined COMPILE_PCRE8
1155    #define PCRE_MODE          PCRE_MODE8
1156    #elif defined COMPILE_PCRE16
1157    #define PCRE_MODE          PCRE_MODE16
1158    #elif defined COMPILE_PCRE32
1159    #define PCRE_MODE          PCRE_MODE32
1160    #endif
1161    #define PCRE_MODE_MASK     (PCRE_MODE8 | PCRE_MODE16 | PCRE_MODE32)
1162    
1163  /* Flags for the "extra" block produced by pcre_study(). */  /* Flags for the "extra" block produced by pcre_study(). */
1164    
1165  #define PCRE_STUDY_MAPPED  0x0001  /* a map of starting chars exists */  #define PCRE_STUDY_MAPPED  0x0001  /* a map of starting chars exists */
# Line 998  time, run time, or study time, respectiv Line 1193  time, run time, or study time, respectiv
1193     (PCRE_STUDY_JIT_COMPILE|PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE| \     (PCRE_STUDY_JIT_COMPILE|PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE| \
1194      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE|PCRE_STUDY_EXTRA_NEEDED)      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE|PCRE_STUDY_EXTRA_NEEDED)
1195    
1196    #define PUBLIC_JIT_EXEC_OPTIONS \
1197       (PCRE_NO_UTF8_CHECK|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|\
1198        PCRE_NOTEMPTY_ATSTART|PCRE_PARTIAL_SOFT|PCRE_PARTIAL_HARD)
1199    
1200  /* Magic number to provide a small check against being handed junk. */  /* Magic number to provide a small check against being handed junk. */
1201    
1202  #define MAGIC_NUMBER  0x50435245UL   /* 'PCRE' */  #define MAGIC_NUMBER  0x50435245UL   /* 'PCRE' */
# Line 1007  in different endianness. */ Line 1206  in different endianness. */
1206    
1207  #define REVERSED_MAGIC_NUMBER  0x45524350UL   /* 'ERCP' */  #define REVERSED_MAGIC_NUMBER  0x45524350UL   /* 'ERCP' */
1208    
 /* Negative values for the firstchar and reqchar variables */  
   
 #define REQ_UNSET (-2)  
 #define REQ_NONE  (-1)  
   
1209  /* The maximum remaining length of subject we are prepared to search for a  /* The maximum remaining length of subject we are prepared to search for a
1210  req_byte match. */  req_byte match. */
1211    
# Line 1106  a positive value. */ Line 1300  a positive value. */
1300    
1301  /* The remaining definitions work in both environments. */  /* The remaining definitions work in both environments. */
1302    
1303    #define CHAR_NULL                   '\0'
1304  #define CHAR_HT                     '\t'  #define CHAR_HT                     '\t'
1305  #define CHAR_VT                     '\v'  #define CHAR_VT                     '\v'
1306  #define CHAR_FF                     '\f'  #define CHAR_FF                     '\f'
# Line 1351  a positive value. */ Line 1546  a positive value. */
1546  #ifdef COMPILE_PCRE16  #ifdef COMPILE_PCRE16
1547  #define STRING_UTF_RIGHTPAR            "UTF16)"  #define STRING_UTF_RIGHTPAR            "UTF16)"
1548  #endif  #endif
1549    #ifdef COMPILE_PCRE32
1550    #define STRING_UTF_RIGHTPAR            "UTF32)"
1551    #endif
1552  #define STRING_UCP_RIGHTPAR            "UCP)"  #define STRING_UCP_RIGHTPAR            "UCP)"
1553  #define STRING_NO_START_OPT_RIGHTPAR   "NO_START_OPT)"  #define STRING_NO_START_OPT_RIGHTPAR   "NO_START_OPT)"
1554    
# Line 1372  only. */ Line 1570  only. */
1570  #define CHAR_ESC                    '\033'  #define CHAR_ESC                    '\033'
1571  #define CHAR_DEL                    '\177'  #define CHAR_DEL                    '\177'
1572    
1573    #define CHAR_NULL                   '\0'
1574  #define CHAR_SPACE                  '\040'  #define CHAR_SPACE                  '\040'
1575  #define CHAR_EXCLAMATION_MARK       '\041'  #define CHAR_EXCLAMATION_MARK       '\041'
1576  #define CHAR_QUOTATION_MARK         '\042'  #define CHAR_QUOTATION_MARK         '\042'
# Line 1613  only. */ Line 1812  only. */
1812  #ifdef COMPILE_PCRE16  #ifdef COMPILE_PCRE16
1813  #define STRING_UTF_RIGHTPAR            STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS  #define STRING_UTF_RIGHTPAR            STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
1814  #endif  #endif
1815    #ifdef COMPILE_PCRE32
1816    #define STRING_UTF_RIGHTPAR            STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
1817    #endif
1818  #define STRING_UCP_RIGHTPAR            STR_U STR_C STR_P STR_RIGHT_PARENTHESIS  #define STRING_UCP_RIGHTPAR            STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
1819  #define STRING_NO_START_OPT_RIGHTPAR   STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS  #define STRING_NO_START_OPT_RIGHTPAR   STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
1820    
# Line 1654  only. */ Line 1856  only. */
1856  #define PT_SPACE      6    /* Perl space - Z plus 9,10,12,13 */  #define PT_SPACE      6    /* Perl space - Z plus 9,10,12,13 */
1857  #define PT_PXSPACE    7    /* POSIX space - Z plus 9,10,11,12,13 */  #define PT_PXSPACE    7    /* POSIX space - Z plus 9,10,11,12,13 */
1858  #define PT_WORD       8    /* Word - L plus N plus underscore */  #define PT_WORD       8    /* Word - L plus N plus underscore */
1859    #define PT_CLIST      9    /* Pseudo-property: match character list */
1860    
1861  /* Flag bits and data types for the extended class (OP_XCLASS) for classes that  /* Flag bits and data types for the extended class (OP_XCLASS) for classes that
1862  contain characters with values greater than 255. */  contain characters with values greater than 255. */
# Line 1669  contain characters with values greater t Line 1872  contain characters with values greater t
1872    
1873  /* These are escaped items that aren't just an encoding of a particular data  /* These are escaped items that aren't just an encoding of a particular data
1874  value such as \n. They must have non-zero values, as check_escape() returns  value such as \n. They must have non-zero values, as check_escape() returns
1875  their negation. Also, they must appear in the same order as in the opcode  0 for a data character.  Also, they must appear in the same order as in the opcode
1876  definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it  definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
1877  corresponds to "." in DOTALL mode rather than an escape sequence. It is also  corresponds to "." in DOTALL mode rather than an escape sequence. It is also
1878  used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In  used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In
1879  non-DOTALL mode, "." behaves like \N.  non-DOTALL mode, "." behaves like \N.
1880    
1881  The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.  The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.
1882  when PCRE_UCP is set, when replacement of \d etc by \p sequences is required.  when PCRE_UCP is set and replacement of \d etc by \p sequences is required.
1883  They must be contiguous, and remain in order so that the replacements can be  They must be contiguous, and remain in order so that the replacements can be
1884  looked up from a table.  looked up from a table.
1885    
1886  The final escape must be ESC_REF as subsequent values are used for  Negative numbers are used to encode a backreference (\1, \2, \3, etc.) in
1887  backreferences (\1, \2, \3, etc). There are two tests in the code for an escape  check_escape(). There are two tests in the code for an escape
1888  greater than ESC_b and less than ESC_Z to detect the types that may be  greater than ESC_b and less than ESC_Z to detect the types that may be
1889  repeated. These are the types that consume characters. If any new escapes are  repeated. These are the types that consume characters. If any new escapes are
1890  put in between that don't consume a character, that code will have to change.  put in between that don't consume a character, that code will have to change.
# Line 1691  enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, E Line 1894  enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, E
1894         ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,         ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,
1895         ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z,         ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z,
1896         ESC_E, ESC_Q, ESC_g, ESC_k,         ESC_E, ESC_Q, ESC_g, ESC_k,
1897         ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu,         ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu };
        ESC_REF };  
1898    
1899  /* Opcode table: Starting from 1 (i.e. after OP_END), the values up to  /* Opcode table: Starting from 1 (i.e. after OP_END), the values up to
1900  OP_EOD must correspond in order to the list of escapes immediately above.  OP_EOD must correspond in order to the list of escapes immediately above.
# Line 1718  enum { Line 1920  enum {
1920    OP_NOT_WORDCHAR,       /* 10 \W */    OP_NOT_WORDCHAR,       /* 10 \W */
1921    OP_WORDCHAR,           /* 11 \w */    OP_WORDCHAR,           /* 11 \w */
1922    
1923    OP_ANY,            /* 12 Match any character except newline */    OP_ANY,            /* 12 Match any character except newline (\N) */
1924    OP_ALLANY,         /* 13 Match any character */    OP_ALLANY,         /* 13 Match any character */
1925    OP_ANYBYTE,        /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */    OP_ANYBYTE,        /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */
1926    OP_NOTPROP,        /* 15 \P (not Unicode property) */    OP_NOTPROP,        /* 15 \P (not Unicode property) */
# Line 1729  enum { Line 1931  enum {
1931    OP_NOT_VSPACE,     /* 20 \V (not vertical whitespace) */    OP_NOT_VSPACE,     /* 20 \V (not vertical whitespace) */
1932    OP_VSPACE,         /* 21 \v (vertical whitespace) */    OP_VSPACE,         /* 21 \v (vertical whitespace) */
1933    OP_EXTUNI,         /* 22 \X (extended Unicode sequence */    OP_EXTUNI,         /* 22 \X (extended Unicode sequence */
1934    OP_EODN,           /* 23 End of data or \n at end of data: \Z. */    OP_EODN,           /* 23 End of data or \n at end of data (\Z) */
1935    OP_EOD,            /* 24 End of data: \z */    OP_EOD,            /* 24 End of data (\z) */
1936    
1937    OP_CIRC,           /* 25 Start of line - not multiline */    OP_CIRC,           /* 25 Start of line - not multiline */
1938    OP_CIRCM,          /* 26 Start of line - multiline */    OP_CIRCM,          /* 26 Start of line - multiline */
# Line 2090  enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4, Line 2292  enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,
2292         ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,         ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
2293         ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,         ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
2294         ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,         ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
2295         ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERRCOUNT };         ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERRCOUNT };
2296    
2297  /* JIT compiling modes. The function list is indexed by them. */  /* JIT compiling modes. The function list is indexed by them. */
2298  enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,  enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
# Line 2113  fields are present. Currently PCRE alway Line 2315  fields are present. Currently PCRE alway
2315  NOTE NOTE NOTE  NOTE NOTE NOTE
2316  */  */
2317    
2318  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
2319  #define REAL_PCRE real_pcre  #define REAL_PCRE real_pcre
2320  #else  #elif defined COMPILE_PCRE16
2321  #define REAL_PCRE real_pcre16  #define REAL_PCRE real_pcre16
2322    #elif defined COMPILE_PCRE32
2323    #define REAL_PCRE real_pcre32
2324  #endif  #endif
2325    
2326  typedef struct REAL_PCRE {  /* It is necessary to fork the struct for 32 bit, since it needs to use
2327     * pcre_uchar for first_char and req_char. Can't put an ifdef inside the
2328     * typedef since pcretest needs access to  the struct of the 8-, 16-
2329     * and 32-bit variants. */
2330    
2331    typedef struct real_pcre8_or_16 {
2332    pcre_uint32 magic_number;    pcre_uint32 magic_number;
2333    pcre_uint32 size;               /* Total that was malloced */    pcre_uint32 size;               /* Total that was malloced */
2334    pcre_uint32 options;            /* Public options */    pcre_uint32 options;            /* Public options */
# Line 2135  typedef struct REAL_PCRE { Line 2344  typedef struct REAL_PCRE {
2344    pcre_uint16 ref_count;          /* Reference count */    pcre_uint16 ref_count;          /* Reference count */
2345    const pcre_uint8 *tables;       /* Pointer to tables or NULL for std */    const pcre_uint8 *tables;       /* Pointer to tables or NULL for std */
2346    const pcre_uint8 *nullpad;      /* NULL padding */    const pcre_uint8 *nullpad;      /* NULL padding */
2347  } REAL_PCRE;  } real_pcre8_or_16;
2348    
2349    typedef struct real_pcre8_or_16 real_pcre;
2350    typedef struct real_pcre8_or_16 real_pcre16;
2351    
2352    typedef struct real_pcre32 {
2353      pcre_uint32 magic_number;
2354      pcre_uint32 size;               /* Total that was malloced */
2355      pcre_uint32 options;            /* Public options */
2356      pcre_uint16 flags;              /* Private flags */
2357      pcre_uint16 max_lookbehind;     /* Longest lookbehind (characters) */
2358      pcre_uint16 top_bracket;        /* Highest numbered group */
2359      pcre_uint16 top_backref;        /* Highest numbered back reference */
2360      pcre_uint32 first_char;         /* Starting character */
2361      pcre_uint32 req_char;           /* This character must be seen */
2362      pcre_uint16 name_table_offset;  /* Offset to name table that follows */
2363      pcre_uint16 name_entry_size;    /* Size of any name items */
2364      pcre_uint16 name_count;         /* Number of name items */
2365      pcre_uint16 ref_count;          /* Reference count */
2366      pcre_uint16 dummy1;             /* for later expansion */
2367      pcre_uint16 dummy2;             /* for later expansion */
2368      const pcre_uint8 *tables;       /* Pointer to tables or NULL for std */
2369      void *nullpad;                  /* for later expansion */
2370    } real_pcre32;
2371    
2372    /* Assert that the size of REAL_PCRE is divisible by 8 */
2373    typedef int __assert_real_pcre_size_divisible_8[(sizeof(REAL_PCRE) % 8) == 0 ? 1 : -1];
2374    
2375    /* Needed in pcretest to access some fields in the real_pcre* structures
2376     * directly. They're unified for 8/16/32 bits since the structs only differ
2377     * after these fields; if that ever changes, need to fork those defines into
2378     * 8/16 and 32 bit versions. */
2379    #define REAL_PCRE_MAGIC(re)     (((REAL_PCRE*)re)->magic_number)
2380    #define REAL_PCRE_SIZE(re)      (((REAL_PCRE*)re)->size)
2381    #define REAL_PCRE_OPTIONS(re)   (((REAL_PCRE*)re)->options)
2382    #define REAL_PCRE_FLAGS(re)     (((REAL_PCRE*)re)->flags)
2383    
2384  /* The format of the block used to store data from pcre_study(). The same  /* The format of the block used to store data from pcre_study(). The same
2385  remark (see NOTE above) about extending this structure applies. */  remark (see NOTE above) about extending this structure applies. */
# Line 2176  typedef struct compile_data { Line 2420  typedef struct compile_data {
2420    int  names_found;                 /* Number of entries so far */    int  names_found;                 /* Number of entries so far */
2421    int  name_entry_size;             /* Size of each entry */    int  name_entry_size;             /* Size of each entry */
2422    int  workspace_size;              /* Size of workspace */    int  workspace_size;              /* Size of workspace */
2423    int  bracount;                    /* Count of capturing parens as we compile */    unsigned int  bracount;           /* Count of capturing parens as we compile */
2424    int  final_bracount;              /* Saved value after first pass */    int  final_bracount;              /* Saved value after first pass */
2425    int  max_lookbehind;              /* Maximum lookbehind (characters) */    int  max_lookbehind;              /* Maximum lookbehind (characters) */
2426    int  top_backref;                 /* Maximum back reference */    int  top_backref;                 /* Maximum back reference */
# Line 2206  call within the pattern; used by pcre_ex Line 2450  call within the pattern; used by pcre_ex
2450    
2451  typedef struct recursion_info {  typedef struct recursion_info {
2452    struct recursion_info *prevrec; /* Previous recursion record (or NULL) */    struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
2453    int group_num;                  /* Number of group that was called */    unsigned int group_num;         /* Number of group that was called */
2454    int *offset_save;               /* Pointer to start of saved offsets */    int *offset_save;               /* Pointer to start of saved offsets */
2455    int saved_max;                  /* Number of saved offsets */    int saved_max;                  /* Number of saved offsets */
2456    PCRE_PUCHAR subject_position;   /* Position at start of recursion */    PCRE_PUCHAR subject_position;   /* Position at start of recursion */
# Line 2340  total length. */ Line 2584  total length. */
2584    
2585  /* Internal function and data prefixes. */  /* Internal function and data prefixes. */
2586    
2587  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
2588  #ifndef PUBL  #ifndef PUBL
2589  #define PUBL(name) pcre_##name  #define PUBL(name) pcre_##name
2590  #endif  #endif
2591  #ifndef PRIV  #ifndef PRIV
2592  #define PRIV(name) _pcre_##name  #define PRIV(name) _pcre_##name
2593  #endif  #endif
2594  #else /* COMPILE_PCRE8 */  #elif defined COMPILE_PCRE16
 #ifdef COMPILE_PCRE16  
2595  #ifndef PUBL  #ifndef PUBL
2596  #define PUBL(name) pcre16_##name  #define PUBL(name) pcre16_##name
2597  #endif  #endif
2598  #ifndef PRIV  #ifndef PRIV
2599  #define PRIV(name) _pcre16_##name  #define PRIV(name) _pcre16_##name
2600  #endif  #endif
2601    #elif defined COMPILE_PCRE32
2602    #ifndef PUBL
2603    #define PUBL(name) pcre32_##name
2604    #endif
2605    #ifndef PRIV
2606    #define PRIV(name) _pcre32_##name
2607    #endif
2608  #else  #else
2609  #error Unsupported compiling mode  #error Unsupported compiling mode
2610  #endif /* COMPILE_PCRE16 */  #endif /* COMPILE_PCRE[8|16|32] */
 #endif /* COMPILE_PCRE8 */  
2611    
2612  /* Layout of the UCP type table that translates property names into types and  /* Layout of the UCP type table that translates property names into types and
2613  codes. Each entry used to point directly to a name, but to reduce the number of  codes. Each entry used to point directly to a name, but to reduce the number of
# Line 2401  one of the exported public functions. Th Line 2650  one of the exported public functions. Th
2650  sense, but are not part of the PCRE public API. */  sense, but are not part of the PCRE public API. */
2651    
2652  /* String comparison functions. */  /* String comparison functions. */
2653  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
2654    
2655  #define STRCMP_UC_UC(str1, str2) \  #define STRCMP_UC_UC(str1, str2) \
2656    strcmp((char *)(str1), (char *)(str2))    strcmp((char *)(str1), (char *)(str2))
# Line 2413  sense, but are not part of the PCRE publ Line 2662  sense, but are not part of the PCRE publ
2662    strncmp((char *)(str1), (str2), (num))    strncmp((char *)(str1), (str2), (num))
2663  #define STRLEN_UC(str) strlen((const char *)str)  #define STRLEN_UC(str) strlen((const char *)str)
2664    
2665  #else  #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2666    
2667  extern int               PRIV(strcmp_uc_uc)(const pcre_uchar *,  extern int               PRIV(strcmp_uc_uc)(const pcre_uchar *,
2668                             const pcre_uchar *);                             const pcre_uchar *);
# Line 2435  extern unsigned int      PRIV(strlen_uc) Line 2684  extern unsigned int      PRIV(strlen_uc)
2684    PRIV(strncmp_uc_c8)((str1), (str2), (num))    PRIV(strncmp_uc_c8)((str1), (str2), (num))
2685  #define STRLEN_UC(str) PRIV(strlen_uc)(str)  #define STRLEN_UC(str) PRIV(strlen_uc)(str)
2686    
2687  #endif /* COMPILE_PCRE8 */  #endif /* COMPILE_PCRE[8|16|32] */
2688    
2689    #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
2690    
2691    #define STRCMP_UC_UC_TEST(str1, str2) STRCMP_UC_UC(str1, str2)
2692    #define STRCMP_UC_C8_TEST(str1, str2) STRCMP_UC_C8(str1, str2)
2693    
2694    #elif defined COMPILE_PCRE32
2695    
2696    extern int               PRIV(strcmp_uc_uc_utf)(const pcre_uchar *,
2697                               const pcre_uchar *);
2698    extern int               PRIV(strcmp_uc_c8_utf)(const pcre_uchar *,
2699                               const char *);
2700    
2701    #define STRCMP_UC_UC_TEST(str1, str2) \
2702      (utf ? PRIV(strcmp_uc_uc_utf)((str1), (str2)) : PRIV(strcmp_uc_uc)((str1), (str2)))
2703    #define STRCMP_UC_C8_TEST(str1, str2) \
2704      (utf ? PRIV(strcmp_uc_c8_utf)((str1), (str2)) : PRIV(strcmp_uc_c8)((str1), (str2)))
2705    
2706    #endif /* COMPILE_PCRE[8|16|32] */
2707    
2708  extern const pcre_uchar *PRIV(find_bracket)(const pcre_uchar *, BOOL, int);  extern const pcre_uchar *PRIV(find_bracket)(const pcre_uchar *, BOOL, int);
2709  extern BOOL              PRIV(is_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,  extern BOOL              PRIV(is_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,
2710                             int *, BOOL);                             int *, BOOL);
2711  extern int               PRIV(ord2utf)(pcre_uint32, pcre_uchar *);  extern unsigned int      PRIV(ord2utf)(pcre_uint32, pcre_uchar *);
2712  extern int               PRIV(valid_utf)(PCRE_PUCHAR, int, int *);  extern int               PRIV(valid_utf)(PCRE_PUCHAR, int, int *);
2713  extern BOOL              PRIV(was_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,  extern BOOL              PRIV(was_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,
2714                             int *, BOOL);                             int *, BOOL);
2715  extern BOOL              PRIV(xclass)(int, const pcre_uchar *, BOOL);  extern BOOL              PRIV(xclass)(pcre_uint32, const pcre_uchar *, BOOL);
2716    
2717  #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
2718  extern void              PRIV(jit_compile)(const REAL_PCRE *,  extern void              PRIV(jit_compile)(const REAL_PCRE *,
2719                             PUBL(extra) *, int);                             PUBL(extra) *, int);
2720  extern int               PRIV(jit_exec)(const REAL_PCRE *, const PUBL(extra) *,  extern int               PRIV(jit_exec)(const PUBL(extra) *,
2721                             const pcre_uchar *, int, int, int, int *, int);                             const pcre_uchar *, int, int, int, int *, int);
2722  extern void              PRIV(jit_free)(void *);  extern void              PRIV(jit_free)(void *);
2723  extern int               PRIV(jit_get_size)(void *);  extern int               PRIV(jit_get_size)(void *);
# Line 2470  extern const pcre_uint32 PRIV(ucd_casele Line 2738  extern const pcre_uint32 PRIV(ucd_casele
2738  extern const ucd_record  PRIV(ucd_records)[];  extern const ucd_record  PRIV(ucd_records)[];
2739  extern const pcre_uint8  PRIV(ucd_stage1)[];  extern const pcre_uint8  PRIV(ucd_stage1)[];
2740  extern const pcre_uint16 PRIV(ucd_stage2)[];  extern const pcre_uint16 PRIV(ucd_stage2)[];
2741  extern const int         PRIV(ucp_gentype)[];  extern const pcre_uint32 PRIV(ucp_gentype)[];
2742  extern const pcre_uint32 PRIV(ucp_gbtable)[];  extern const pcre_uint32 PRIV(ucp_gbtable)[];
2743  #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
2744  extern const int         PRIV(ucp_typerange)[];  extern const int         PRIV(ucp_typerange)[];
# Line 2481  extern const int         PRIV(ucp_typera Line 2749  extern const int         PRIV(ucp_typera
2749    
2750  #define UCD_BLOCK_SIZE 128  #define UCD_BLOCK_SIZE 128
2751  #define GET_UCD(ch) (PRIV(ucd_records) + \  #define GET_UCD(ch) (PRIV(ucd_records) + \
2752          PRIV(ucd_stage2)[PRIV(ucd_stage1)[(ch) / UCD_BLOCK_SIZE] * \          PRIV(ucd_stage2)[PRIV(ucd_stage1)[(int)(ch) / UCD_BLOCK_SIZE] * \
2753          UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE])          UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE])
2754    
2755  #define UCD_CHARTYPE(ch)    GET_UCD(ch)->chartype  #define UCD_CHARTYPE(ch)    GET_UCD(ch)->chartype
2756  #define UCD_SCRIPT(ch)      GET_UCD(ch)->script  #define UCD_SCRIPT(ch)      GET_UCD(ch)->script
2757  #define UCD_CATEGORY(ch)    PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]  #define UCD_CATEGORY(ch)    PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
2758  #define UCD_GRAPHBREAK(ch)  GET_UCD(ch)->gbprop  #define UCD_GRAPHBREAK(ch)  GET_UCD(ch)->gbprop
2759  #define UCD_CASESET(ch)     GET_UCD(ch)->caseset  #define UCD_CASESET(ch)     GET_UCD(ch)->caseset
2760  #define UCD_OTHERCASE(ch)   (ch + GET_UCD(ch)->other_case)  #define UCD_OTHERCASE(ch)   ((pcre_uint32)((int)ch + (int)(GET_UCD(ch)->other_case)))
2761    
2762  #endif /* SUPPORT_UCP */  #endif /* SUPPORT_UCP */
2763    

Legend:
Removed from v.1045  
changed lines
  Added in v.1198

  ViewVC Help
Powered by ViewVC 1.1.5