/[pcre]/code/trunk/pcre_internal.h
ViewVC logotype

Diff of /code/trunk/pcre_internal.h

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1083 by chpe, Tue Oct 16 15:55:24 2012 UTC revision 1114 by chpe, Tue Oct 16 15:57:16 2012 UTC
# Line 53  depending on the PRIV macro. */ Line 53  depending on the PRIV macro. */
53  #endif  #endif
54    
55  /* PCRE is compiled as an 8 bit library if it is not requested otherwise. */  /* PCRE is compiled as an 8 bit library if it is not requested otherwise. */
56  #if !defined COMPILE_PCRE16 && ! defined COMPILE_PCRE32  #if !defined COMPILE_PCRE16 && !defined COMPILE_PCRE32
57  #define COMPILE_PCRE8  #define COMPILE_PCRE8
58  #endif  #endif
59    
# Line 304  start/end of string field names are. */ Line 304  start/end of string field names are. */
304         &(NLBLOCK->nllen), utf)) \         &(NLBLOCK->nllen), utf)) \
305      : \      : \
306      ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \      ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
307       (p)[0] == NLBLOCK->nl[0] && \       RAWUCHARTEST(p) == NLBLOCK->nl[0] && \
308       (NLBLOCK->nllen == 1 || (p)[1] == NLBLOCK->nl[1]) \       (NLBLOCK->nllen == 1 || RAWUCHARTEST(p+1) == NLBLOCK->nl[1])       \
309      ) \      ) \
310    )    )
311    
# Line 318  start/end of string field names are. */ Line 318  start/end of string field names are. */
318         &(NLBLOCK->nllen), utf)) \         &(NLBLOCK->nllen), utf)) \
319      : \      : \
320      ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \      ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
321       (p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \       RAWUCHARTEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] &&              \
322       (NLBLOCK->nllen == 1 || (p)[-NLBLOCK->nllen+1] == NLBLOCK->nl[1]) \       (NLBLOCK->nllen == 1 || RAWUCHARTEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \
323      ) \      ) \
324    )    )
325    
# Line 579  we don't even define them. */ Line 579  we don't even define them. */
579  #define GETCHARINC(c, eptr) c = *eptr++;  #define GETCHARINC(c, eptr) c = *eptr++;
580  #define GETCHARINCTEST(c, eptr) c = *eptr++;  #define GETCHARINCTEST(c, eptr) c = *eptr++;
581  #define GETCHARLEN(c, eptr, len) c = *eptr;  #define GETCHARLEN(c, eptr, len) c = *eptr;
582    #define RAWUCHAR(eptr) (*(eptr))
583    #define RAWUCHARINC(eptr) (*(eptr)++)
584    #define RAWUCHARTEST(eptr) (*(eptr))
585    #define RAWUCHARINCTEST(eptr) (*(eptr)++)
586  /* #define GETCHARLENTEST(c, eptr, len) */  /* #define GETCHARLENTEST(c, eptr, len) */
587  /* #define BACKCHAR(eptr) */  /* #define BACKCHAR(eptr) */
588  /* #define FORWARDCHAR(eptr) */  /* #define FORWARDCHAR(eptr) */
# Line 586  we don't even define them. */ Line 590  we don't even define them. */
590    
591  #else   /* SUPPORT_UTF */  #else   /* SUPPORT_UTF */
592    
 #if defined COMPILE_PCRE8  
   
 /* These macros were originally written in the form of loops that used data  
 from the tables whose names start with PRIV(utf8_table). They were rewritten by  
 a user so as not to use loops, because in some environments this gives a  
 significant performance advantage, and it seems never to do any harm. */  
   
 /* Tells the biggest code point which can be encoded as a single character. */  
   
 #define MAX_VALUE_FOR_SINGLE_CHAR 127  
   
593  /* Tests whether the code point needs extra characters to decode. */  /* Tests whether the code point needs extra characters to decode. */
594    
595  #define HAS_EXTRALEN(c) ((c) >= 0xc0)  #define HASUTF8EXTRALEN(c) ((c) >= 0xc0)
   
 /* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.  
 Otherwise it has an undefined behaviour. */  
   
 #define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])  
   
 /* Returns TRUE, if the given character is not the first character  
 of a UTF sequence. */  
   
 #define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80)  
596    
597  /* Base macro to pick up the remaining bytes of a UTF-8 character, not  /* Base macro to pick up the remaining bytes of a UTF-8 character, not
598  advancing the pointer. */  advancing the pointer. */
# Line 633  advancing the pointer. */ Line 616  advancing the pointer. */
616            ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \            ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \
617      }      }
618    
 /* Get the next UTF-8 character, not advancing the pointer. This is called when  
 we know we are in UTF-8 mode. */  
   
 #define GETCHAR(c, eptr) \  
   c = *eptr; \  
   if (c >= 0xc0) GETUTF8(c, eptr);  
   
 /* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the  
 pointer. */  
   
 #define GETCHARTEST(c, eptr) \  
   c = *eptr; \  
   if (utf && c >= 0xc0) GETUTF8(c, eptr);  
   
619  /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing  /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing
620  the pointer. */  the pointer. */
621    
# Line 681  the pointer. */ Line 650  the pointer. */
650        } \        } \
651      }      }
652    
653    #if defined COMPILE_PCRE8
654    
655    /* These macros were originally written in the form of loops that used data
656    from the tables whose names start with PRIV(utf8_table). They were rewritten by
657    a user so as not to use loops, because in some environments this gives a
658    significant performance advantage, and it seems never to do any harm. */
659    
660    /* Tells the biggest code point which can be encoded as a single character. */
661    
662    #define MAX_VALUE_FOR_SINGLE_CHAR 127
663    
664    /* Tests whether the code point needs extra characters to decode. */
665    
666    #define HAS_EXTRALEN(c) ((c) >= 0xc0)
667    
668    /* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
669    Otherwise it has an undefined behaviour. */
670    
671    #define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])
672    
673    /* Returns TRUE, if the given character is not the first character
674    of a UTF sequence. */
675    
676    #define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80)
677    
678    /* Get the next UTF-8 character, not advancing the pointer. This is called when
679    we know we are in UTF-8 mode. */
680    
681    #define GETCHAR(c, eptr) \
682      c = *eptr; \
683      if (c >= 0xc0) GETUTF8(c, eptr);
684    
685    /* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
686    pointer. */
687    
688    #define GETCHARTEST(c, eptr) \
689      c = *eptr; \
690      if (utf && c >= 0xc0) GETUTF8(c, eptr);
691    
692  /* Get the next UTF-8 character, advancing the pointer. This is called when we  /* Get the next UTF-8 character, advancing the pointer. This is called when we
693  know we are in UTF-8 mode. */  know we are in UTF-8 mode. */
694    
# Line 747  do not know if we are in UTF-8 mode. */ Line 755  do not know if we are in UTF-8 mode. */
755    c = *eptr; \    c = *eptr; \
756    if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len);    if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len);
757    
758    /* Returns the next uchar, not advancing the pointer. This is called when
759    we know we are in UTF mode. */
760    
761    #define RAWUCHAR(eptr) \
762      (*(eptr))
763    
764    /* Returns the next uchar, advancing the pointer. This is called when
765    we know we are in UTF mode. */
766    
767    #define RAWUCHARINC(eptr) \
768      (*(eptr)++)
769    
770    /* Returns the next uchar, testing for UTF mode, and not advancing the
771    pointer. */
772    
773    #define RAWUCHARTEST(eptr) \
774      (*(eptr))
775    
776    /* Returns the next uchar, testing for UTF mode, advancing the
777    pointer. */
778    
779    #define RAWUCHARINCTEST(eptr) \
780      (*(eptr)++)
781    
782  /* If the pointer is not at the start of a character, move it back until  /* If the pointer is not at the start of a character, move it back until
783  it is. This is called only in UTF-8 mode - we don't put a test within the macro  it is. This is called only in UTF-8 mode - we don't put a test within the macro
784  because almost all calls are already within a block of UTF-8 only code. */  because almost all calls are already within a block of UTF-8 only code. */
# Line 842  we do not know if we are in UTF-16 mode. Line 874  we do not know if we are in UTF-16 mode.
874    c = *eptr; \    c = *eptr; \
875    if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);    if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);
876    
877    /* Returns the next uchar, not advancing the pointer. This is called when
878    we know we are in UTF mode. */
879    
880    #define RAWUCHAR(eptr) \
881      (*(eptr))
882    
883    /* Returns the next uchar, advancing the pointer. This is called when
884    we know we are in UTF mode. */
885    
886    #define RAWUCHARINC(eptr) \
887      (*(eptr)++)
888    
889    /* Returns the next uchar, testing for UTF mode, and not advancing the
890    pointer. */
891    
892    #define RAWUCHARTEST(eptr) \
893      (*(eptr))
894    
895    /* Returns the next uchar, testing for UTF mode, advancing the
896    pointer. */
897    
898    #define RAWUCHARINCTEST(eptr) \
899      (*(eptr)++)
900    
901  /* If the pointer is not at the start of a character, move it back until  /* If the pointer is not at the start of a character, move it back until
902  it is. This is called only in UTF-16 mode - we don't put a test within the  it is. This is called only in UTF-16 mode - we don't put a test within the
903  macro because almost all calls are already within a block of UTF-16 only  macro because almost all calls are already within a block of UTF-16 only
# Line 858  code. */ Line 914  code. */
914    
915  #elif defined COMPILE_PCRE32  #elif defined COMPILE_PCRE32
916    
917  /* These are unnecessary for the 32-bit library */  /* These are trivial for the 32-bit library, since all UTF-32 characters fit
918  #undef MAX_VALUE_FOR_SINGLE_CHAR  into one pcre_uchar unit. */
919  #undef HAS_EXTRALEN  #define MAX_VALUE_FOR_SINGLE_CHAR (0x10ffffu)
920  #undef GET_EXTRALEN  #define HAS_EXTRALEN(c) (0)
921  #undef NOT_FIRSTCHAR  #define GET_EXTRALEN(c) (0)
922    #define NOT_FIRSTCHAR(c) (0)
923    
924  #define UTF32_MASK (0x1ffffful)  #define UTF32_MASK (0x1fffffu)
925    
926  /* Get the next UTF-32 character, not advancing the pointer. This is called when  /* Get the next UTF-32 character, not advancing the pointer. This is called when
927  we know we are in UTF-32 mode. */  we know we are in UTF-32 mode. */
# Line 906  This is called when we do not know if we Line 963  This is called when we do not know if we
963  #define GETCHARLENTEST(c, eptr, len) \  #define GETCHARLENTEST(c, eptr, len) \
964    GETCHARTEST(c, eptr)    GETCHARTEST(c, eptr)
965    
966    /* Returns the next uchar, not advancing the pointer. This is called when
967    we know we are in UTF mode. */
968    
969    #define RAWUCHAR(eptr) \
970      (*(eptr) & UTF32_MASK)
971    
972    /* Returns the next uchar, advancing the pointer. This is called when
973    we know we are in UTF mode. */
974    
975    #define RAWUCHARINC(eptr) \
976      (*(eptr)++ & UTF32_MASK)
977    
978    /* Returns the next uchar, testing for UTF mode, and not advancing the
979    pointer. */
980    
981    #define RAWUCHARTEST(eptr) \
982      (utf ? (*(eptr) & UTF32_MASK) : *(eptr))
983    
984    /* Returns the next uchar, testing for UTF mode, advancing the
985    pointer. */
986    
987    #define RAWUCHARINCTEST(eptr) \
988      (utf ? (*(eptr)++ & UTF32_MASK) : *(eptr)++)
989    
990  /* If the pointer is not at the start of a character, move it back until  /* If the pointer is not at the start of a character, move it back until
991  it is. This is called only in UTF-32 mode - we don't put a test within the  it is. This is called only in UTF-32 mode - we don't put a test within the
992  macro because almost all calls are already within a block of UTF-32 only  macro because almost all calls are already within a block of UTF-32 only
993  code. */  code.
994    These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */
995    
996  #define BACKCHAR(eptr) do { } while (0)  #define BACKCHAR(eptr) do { } while (0)
997    
# Line 2577  extern unsigned int      PRIV(strlen_uc) Line 2659  extern unsigned int      PRIV(strlen_uc)
2659    
2660  #endif /* COMPILE_PCRE[8|16|32] */  #endif /* COMPILE_PCRE[8|16|32] */
2661    
2662    #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
2663    
2664    #define STRCMP_UC_UC_TEST(str1, str2) STRCMP_UC_UC(str1, str2)
2665    #define STRCMP_UC_C8_TEST(str1, str2) STRCMP_UC_C8(str1, str2)
2666    
2667    #elif defined COMPILE_PCRE32
2668    
2669    extern int               PRIV(strcmp_uc_uc_utf)(const pcre_uchar *,
2670                               const pcre_uchar *);
2671    extern int               PRIV(strcmp_uc_c8_utf)(const pcre_uchar *,
2672                               const char *);
2673    
2674    #define STRCMP_UC_UC_TEST(str1, str2) \
2675      (utf ? PRIV(strcmp_uc_uc_utf)((str1), (str2)) : PRIV(strcmp_uc_uc)((str1), (str2)))
2676    #define STRCMP_UC_C8_TEST(str1, str2) \
2677      (utf ? PRIV(strcmp_uc_c8_utf)((str1), (str2)) : PRIV(strcmp_uc_c8)((str1), (str2)))
2678    
2679    #endif /* COMPILE_PCRE[8|16|32] */
2680    
2681  extern const pcre_uchar *PRIV(find_bracket)(const pcre_uchar *, BOOL, int);  extern const pcre_uchar *PRIV(find_bracket)(const pcre_uchar *, BOOL, int);
2682  extern BOOL              PRIV(is_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,  extern BOOL              PRIV(is_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,
2683                             int *, BOOL);                             int *, BOOL);

Legend:
Removed from v.1083  
changed lines
  Added in v.1114

  ViewVC Help
Powered by ViewVC 1.1.5