/[pcre]/code/trunk/pcre_internal.h
ViewVC logotype

Diff of /code/trunk/pcre_internal.h

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1083 by chpe, Tue Oct 16 15:55:24 2012 UTC revision 1151 by chpe, Sun Oct 21 16:53:46 2012 UTC
# Line 53  depending on the PRIV macro. */ Line 53  depending on the PRIV macro. */
53  #endif  #endif
54    
55  /* PCRE is compiled as an 8 bit library if it is not requested otherwise. */  /* PCRE is compiled as an 8 bit library if it is not requested otherwise. */
56  #if !defined COMPILE_PCRE16 && ! defined COMPILE_PCRE32  
57    #if !defined COMPILE_PCRE16 && !defined COMPILE_PCRE32
58  #define COMPILE_PCRE8  #define COMPILE_PCRE8
59  #endif  #endif
60    
# Line 111  setjmp and stdarg are used is when NO_RE Line 112  setjmp and stdarg are used is when NO_RE
112  #include <stdlib.h>  #include <stdlib.h>
113  #include <string.h>  #include <string.h>
114    
115    /* Valgrind (memcheck) support */
116    
117    #ifdef SUPPORT_VALGRIND
118    #include <valgrind/memcheck.h>
119    #endif
120    
121  /* When compiling a DLL for Windows, the exported symbols have to be declared  /* When compiling a DLL for Windows, the exported symbols have to be declared
122  using some MS magic. I found some useful information on this web page:  using some MS magic. I found some useful information on this web page:
123  http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the  http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the
# Line 304  start/end of string field names are. */ Line 311  start/end of string field names are. */
311         &(NLBLOCK->nllen), utf)) \         &(NLBLOCK->nllen), utf)) \
312      : \      : \
313      ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \      ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
314       (p)[0] == NLBLOCK->nl[0] && \       RAWUCHARTEST(p) == NLBLOCK->nl[0] && \
315       (NLBLOCK->nllen == 1 || (p)[1] == NLBLOCK->nl[1]) \       (NLBLOCK->nllen == 1 || RAWUCHARTEST(p+1) == NLBLOCK->nl[1])       \
316      ) \      ) \
317    )    )
318    
# Line 318  start/end of string field names are. */ Line 325  start/end of string field names are. */
325         &(NLBLOCK->nllen), utf)) \         &(NLBLOCK->nllen), utf)) \
326      : \      : \
327      ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \      ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
328       (p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \       RAWUCHARTEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] &&              \
329       (NLBLOCK->nllen == 1 || (p)[-NLBLOCK->nllen+1] == NLBLOCK->nl[1]) \       (NLBLOCK->nllen == 1 || RAWUCHARTEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \
330      ) \      ) \
331    )    )
332    
# Line 522  capturing parenthesis numbers in back re Line 529  capturing parenthesis numbers in back re
529  #define PUT2(a,n,d)   \  #define PUT2(a,n,d)   \
530    a[n] = (d) >> 8; \    a[n] = (d) >> 8; \
531    a[(n)+1] = (d) & 255    a[(n)+1] = (d) & 255
532    
533    /* For reasons that I do not understand, the expression in this GET2 macro is
534    treated by gcc as a signed expression, even when a is declared as unsigned. It
535    seems that any kind of arithmetic results in a signed value. */
536    
537  #define GET2(a,n) \  #define GET2(a,n) \
538    (((a)[n] << 8) | (a)[(n)+1])    (unsigned int)(((a)[n] << 8) | (a)[(n)+1])
539    
540  #elif defined COMPILE_PCRE16  #elif defined COMPILE_PCRE16
541    
# Line 579  we don't even define them. */ Line 590  we don't even define them. */
590  #define GETCHARINC(c, eptr) c = *eptr++;  #define GETCHARINC(c, eptr) c = *eptr++;
591  #define GETCHARINCTEST(c, eptr) c = *eptr++;  #define GETCHARINCTEST(c, eptr) c = *eptr++;
592  #define GETCHARLEN(c, eptr, len) c = *eptr;  #define GETCHARLEN(c, eptr, len) c = *eptr;
593    #define RAWUCHAR(eptr) (*(eptr))
594    #define RAWUCHARINC(eptr) (*(eptr)++)
595    #define RAWUCHARTEST(eptr) (*(eptr))
596    #define RAWUCHARINCTEST(eptr) (*(eptr)++)
597  /* #define GETCHARLENTEST(c, eptr, len) */  /* #define GETCHARLENTEST(c, eptr, len) */
598  /* #define BACKCHAR(eptr) */  /* #define BACKCHAR(eptr) */
599  /* #define FORWARDCHAR(eptr) */  /* #define FORWARDCHAR(eptr) */
# Line 586  we don't even define them. */ Line 601  we don't even define them. */
601    
602  #else   /* SUPPORT_UTF */  #else   /* SUPPORT_UTF */
603    
 #if defined COMPILE_PCRE8  
   
 /* These macros were originally written in the form of loops that used data  
 from the tables whose names start with PRIV(utf8_table). They were rewritten by  
 a user so as not to use loops, because in some environments this gives a  
 significant performance advantage, and it seems never to do any harm. */  
   
 /* Tells the biggest code point which can be encoded as a single character. */  
   
 #define MAX_VALUE_FOR_SINGLE_CHAR 127  
   
604  /* Tests whether the code point needs extra characters to decode. */  /* Tests whether the code point needs extra characters to decode. */
605    
606  #define HAS_EXTRALEN(c) ((c) >= 0xc0)  #define HASUTF8EXTRALEN(c) ((c) >= 0xc0)
   
 /* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.  
 Otherwise it has an undefined behaviour. */  
   
 #define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])  
   
 /* Returns TRUE, if the given character is not the first character  
 of a UTF sequence. */  
   
 #define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80)  
607    
608  /* Base macro to pick up the remaining bytes of a UTF-8 character, not  /* Base macro to pick up the remaining bytes of a UTF-8 character, not
609  advancing the pointer. */  advancing the pointer. */
# Line 633  advancing the pointer. */ Line 627  advancing the pointer. */
627            ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \            ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \
628      }      }
629    
 /* Get the next UTF-8 character, not advancing the pointer. This is called when  
 we know we are in UTF-8 mode. */  
   
 #define GETCHAR(c, eptr) \  
   c = *eptr; \  
   if (c >= 0xc0) GETUTF8(c, eptr);  
   
 /* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the  
 pointer. */  
   
 #define GETCHARTEST(c, eptr) \  
   c = *eptr; \  
   if (utf && c >= 0xc0) GETUTF8(c, eptr);  
   
630  /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing  /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing
631  the pointer. */  the pointer. */
632    
# Line 681  the pointer. */ Line 661  the pointer. */
661        } \        } \
662      }      }
663    
664    #if defined COMPILE_PCRE8
665    
666    /* These macros were originally written in the form of loops that used data
667    from the tables whose names start with PRIV(utf8_table). They were rewritten by
668    a user so as not to use loops, because in some environments this gives a
669    significant performance advantage, and it seems never to do any harm. */
670    
671    /* Tells the biggest code point which can be encoded as a single character. */
672    
673    #define MAX_VALUE_FOR_SINGLE_CHAR 127
674    
675    /* Tests whether the code point needs extra characters to decode. */
676    
677    #define HAS_EXTRALEN(c) ((c) >= 0xc0)
678    
679    /* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
680    Otherwise it has an undefined behaviour. */
681    
682    #define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])
683    
684    /* Returns TRUE, if the given character is not the first character
685    of a UTF sequence. */
686    
687    #define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80)
688    
689    /* Get the next UTF-8 character, not advancing the pointer. This is called when
690    we know we are in UTF-8 mode. */
691    
692    #define GETCHAR(c, eptr) \
693      c = *eptr; \
694      if (c >= 0xc0) GETUTF8(c, eptr);
695    
696    /* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
697    pointer. */
698    
699    #define GETCHARTEST(c, eptr) \
700      c = *eptr; \
701      if (utf && c >= 0xc0) GETUTF8(c, eptr);
702    
703  /* Get the next UTF-8 character, advancing the pointer. This is called when we  /* Get the next UTF-8 character, advancing the pointer. This is called when we
704  know we are in UTF-8 mode. */  know we are in UTF-8 mode. */
705    
# Line 747  do not know if we are in UTF-8 mode. */ Line 766  do not know if we are in UTF-8 mode. */
766    c = *eptr; \    c = *eptr; \
767    if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len);    if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len);
768    
769    /* Returns the next uchar, not advancing the pointer. This is called when
770    we know we are in UTF mode. */
771    
772    #define RAWUCHAR(eptr) \
773      (*(eptr))
774    
775    /* Returns the next uchar, advancing the pointer. This is called when
776    we know we are in UTF mode. */
777    
778    #define RAWUCHARINC(eptr) \
779      (*(eptr)++)
780    
781    /* Returns the next uchar, testing for UTF mode, and not advancing the
782    pointer. */
783    
784    #define RAWUCHARTEST(eptr) \
785      (*(eptr))
786    
787    /* Returns the next uchar, testing for UTF mode, advancing the
788    pointer. */
789    
790    #define RAWUCHARINCTEST(eptr) \
791      (*(eptr)++)
792    
793  /* If the pointer is not at the start of a character, move it back until  /* If the pointer is not at the start of a character, move it back until
794  it is. This is called only in UTF-8 mode - we don't put a test within the macro  it is. This is called only in UTF-8 mode - we don't put a test within the macro
795  because almost all calls are already within a block of UTF-8 only code. */  because almost all calls are already within a block of UTF-8 only code. */
# Line 842  we do not know if we are in UTF-16 mode. Line 885  we do not know if we are in UTF-16 mode.
885    c = *eptr; \    c = *eptr; \
886    if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);    if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);
887    
888    /* Returns the next uchar, not advancing the pointer. This is called when
889    we know we are in UTF mode. */
890    
891    #define RAWUCHAR(eptr) \
892      (*(eptr))
893    
894    /* Returns the next uchar, advancing the pointer. This is called when
895    we know we are in UTF mode. */
896    
897    #define RAWUCHARINC(eptr) \
898      (*(eptr)++)
899    
900    /* Returns the next uchar, testing for UTF mode, and not advancing the
901    pointer. */
902    
903    #define RAWUCHARTEST(eptr) \
904      (*(eptr))
905    
906    /* Returns the next uchar, testing for UTF mode, advancing the
907    pointer. */
908    
909    #define RAWUCHARINCTEST(eptr) \
910      (*(eptr)++)
911    
912  /* If the pointer is not at the start of a character, move it back until  /* If the pointer is not at the start of a character, move it back until
913  it is. This is called only in UTF-16 mode - we don't put a test within the  it is. This is called only in UTF-16 mode - we don't put a test within the
914  macro because almost all calls are already within a block of UTF-16 only  macro because almost all calls are already within a block of UTF-16 only
# Line 858  code. */ Line 925  code. */
925    
926  #elif defined COMPILE_PCRE32  #elif defined COMPILE_PCRE32
927    
928  /* These are unnecessary for the 32-bit library */  /* These are trivial for the 32-bit library, since all UTF-32 characters fit
929  #undef MAX_VALUE_FOR_SINGLE_CHAR  into one pcre_uchar unit. */
930  #undef HAS_EXTRALEN  #define MAX_VALUE_FOR_SINGLE_CHAR (0x10ffffu)
931  #undef GET_EXTRALEN  #define HAS_EXTRALEN(c) (0)
932  #undef NOT_FIRSTCHAR  #define GET_EXTRALEN(c) (0)
933    #define NOT_FIRSTCHAR(c) (0)
934    
935  #define UTF32_MASK (0x1ffffful)  #define UTF32_MASK (0x1fffffu)
936    
937  /* Get the next UTF-32 character, not advancing the pointer. This is called when  /* Get the next UTF-32 character, not advancing the pointer. This is called when
938  we know we are in UTF-32 mode. */  we know we are in UTF-32 mode. */
# Line 906  This is called when we do not know if we Line 974  This is called when we do not know if we
974  #define GETCHARLENTEST(c, eptr, len) \  #define GETCHARLENTEST(c, eptr, len) \
975    GETCHARTEST(c, eptr)    GETCHARTEST(c, eptr)
976    
977    /* Returns the next uchar, not advancing the pointer. This is called when
978    we know we are in UTF mode. */
979    
980    #define RAWUCHAR(eptr) \
981      (*(eptr) & UTF32_MASK)
982    
983    /* Returns the next uchar, advancing the pointer. This is called when
984    we know we are in UTF mode. */
985    
986    #define RAWUCHARINC(eptr) \
987      (*(eptr)++ & UTF32_MASK)
988    
989    /* Returns the next uchar, testing for UTF mode, and not advancing the
990    pointer. */
991    
992    #define RAWUCHARTEST(eptr) \
993      (utf ? (*(eptr) & UTF32_MASK) : *(eptr))
994    
995    /* Returns the next uchar, testing for UTF mode, advancing the
996    pointer. */
997    
998    #define RAWUCHARINCTEST(eptr) \
999      (utf ? (*(eptr)++ & UTF32_MASK) : *(eptr)++)
1000    
1001  /* If the pointer is not at the start of a character, move it back until  /* If the pointer is not at the start of a character, move it back until
1002  it is. This is called only in UTF-32 mode - we don't put a test within the  it is. This is called only in UTF-32 mode - we don't put a test within the
1003  macro because almost all calls are already within a block of UTF-32 only  macro because almost all calls are already within a block of UTF-32 only
1004  code. */  code.
1005    These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */
1006    
1007  #define BACKCHAR(eptr) do { } while (0)  #define BACKCHAR(eptr) do { } while (0)
1008    
# Line 2311  typedef struct compile_data { Line 2404  typedef struct compile_data {
2404    int  names_found;                 /* Number of entries so far */    int  names_found;                 /* Number of entries so far */
2405    int  name_entry_size;             /* Size of each entry */    int  name_entry_size;             /* Size of each entry */
2406    int  workspace_size;              /* Size of workspace */    int  workspace_size;              /* Size of workspace */
2407    int  bracount;                    /* Count of capturing parens as we compile */    unsigned int  bracount;           /* Count of capturing parens as we compile */
2408    int  final_bracount;              /* Saved value after first pass */    int  final_bracount;              /* Saved value after first pass */
2409    int  max_lookbehind;              /* Maximum lookbehind (characters) */    int  max_lookbehind;              /* Maximum lookbehind (characters) */
2410    int  top_backref;                 /* Maximum back reference */    int  top_backref;                 /* Maximum back reference */
# Line 2341  call within the pattern; used by pcre_ex Line 2434  call within the pattern; used by pcre_ex
2434    
2435  typedef struct recursion_info {  typedef struct recursion_info {
2436    struct recursion_info *prevrec; /* Previous recursion record (or NULL) */    struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
2437    int group_num;                  /* Number of group that was called */    unsigned int group_num;         /* Number of group that was called */
2438    int *offset_save;               /* Pointer to start of saved offsets */    int *offset_save;               /* Pointer to start of saved offsets */
2439    int saved_max;                  /* Number of saved offsets */    int saved_max;                  /* Number of saved offsets */
2440    PCRE_PUCHAR subject_position;   /* Position at start of recursion */    PCRE_PUCHAR subject_position;   /* Position at start of recursion */
# Line 2577  extern unsigned int      PRIV(strlen_uc) Line 2670  extern unsigned int      PRIV(strlen_uc)
2670    
2671  #endif /* COMPILE_PCRE[8|16|32] */  #endif /* COMPILE_PCRE[8|16|32] */
2672    
2673    #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
2674    
2675    #define STRCMP_UC_UC_TEST(str1, str2) STRCMP_UC_UC(str1, str2)
2676    #define STRCMP_UC_C8_TEST(str1, str2) STRCMP_UC_C8(str1, str2)
2677    
2678    #elif defined COMPILE_PCRE32
2679    
2680    extern int               PRIV(strcmp_uc_uc_utf)(const pcre_uchar *,
2681                               const pcre_uchar *);
2682    extern int               PRIV(strcmp_uc_c8_utf)(const pcre_uchar *,
2683                               const char *);
2684    
2685    #define STRCMP_UC_UC_TEST(str1, str2) \
2686      (utf ? PRIV(strcmp_uc_uc_utf)((str1), (str2)) : PRIV(strcmp_uc_uc)((str1), (str2)))
2687    #define STRCMP_UC_C8_TEST(str1, str2) \
2688      (utf ? PRIV(strcmp_uc_c8_utf)((str1), (str2)) : PRIV(strcmp_uc_c8)((str1), (str2)))
2689    
2690    #endif /* COMPILE_PCRE[8|16|32] */
2691    
2692  extern const pcre_uchar *PRIV(find_bracket)(const pcre_uchar *, BOOL, int);  extern const pcre_uchar *PRIV(find_bracket)(const pcre_uchar *, BOOL, int);
2693  extern BOOL              PRIV(is_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,  extern BOOL              PRIV(is_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,
2694                             int *, BOOL);                             int *, BOOL);
2695  extern int               PRIV(ord2utf)(pcre_uint32, pcre_uchar *);  extern unsigned int      PRIV(ord2utf)(pcre_uint32, pcre_uchar *);
2696  extern int               PRIV(valid_utf)(PCRE_PUCHAR, int, int *);  extern int               PRIV(valid_utf)(PCRE_PUCHAR, int, int *);
2697  extern BOOL              PRIV(was_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,  extern BOOL              PRIV(was_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,
2698                             int *, BOOL);                             int *, BOOL);
# Line 2610  extern const pcre_uint32 PRIV(ucd_casele Line 2722  extern const pcre_uint32 PRIV(ucd_casele
2722  extern const ucd_record  PRIV(ucd_records)[];  extern const ucd_record  PRIV(ucd_records)[];
2723  extern const pcre_uint8  PRIV(ucd_stage1)[];  extern const pcre_uint8  PRIV(ucd_stage1)[];
2724  extern const pcre_uint16 PRIV(ucd_stage2)[];  extern const pcre_uint16 PRIV(ucd_stage2)[];
2725  extern const int         PRIV(ucp_gentype)[];  extern const pcre_uint32 PRIV(ucp_gentype)[];
2726  extern const pcre_uint32 PRIV(ucp_gbtable)[];  extern const pcre_uint32 PRIV(ucp_gbtable)[];
2727  #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
2728  extern const int         PRIV(ucp_typerange)[];  extern const int         PRIV(ucp_typerange)[];

Legend:
Removed from v.1083  
changed lines
  Added in v.1151

  ViewVC Help
Powered by ViewVC 1.1.5