/[pcre]/code/trunk/pcre_internal.h
ViewVC logotype

Diff of /code/trunk/pcre_internal.h

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1061 by chpe, Tue Oct 16 15:54:02 2012 UTC revision 1100 by chpe, Tue Oct 16 15:56:26 2012 UTC
# Line 304  start/end of string field names are. */ Line 304  start/end of string field names are. */
304         &(NLBLOCK->nllen), utf)) \         &(NLBLOCK->nllen), utf)) \
305      : \      : \
306      ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \      ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
307       (p)[0] == NLBLOCK->nl[0] && \       RAWUCHARTEST(p) == NLBLOCK->nl[0] && \
308       (NLBLOCK->nllen == 1 || (p)[1] == NLBLOCK->nl[1]) \       (NLBLOCK->nllen == 1 || RAWUCHARTEST(p+1) == NLBLOCK->nl[1])       \
309      ) \      ) \
310    )    )
311    
# Line 318  start/end of string field names are. */ Line 318  start/end of string field names are. */
318         &(NLBLOCK->nllen), utf)) \         &(NLBLOCK->nllen), utf)) \
319      : \      : \
320      ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \      ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
321       (p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \       RAWUCHARTEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] &&              \
322       (NLBLOCK->nllen == 1 || (p)[-NLBLOCK->nllen+1] == NLBLOCK->nl[1]) \       (NLBLOCK->nllen == 1 || RAWUCHARTEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \
323      ) \      ) \
324    )    )
325    
# Line 579  we don't even define them. */ Line 579  we don't even define them. */
579  #define GETCHARINC(c, eptr) c = *eptr++;  #define GETCHARINC(c, eptr) c = *eptr++;
580  #define GETCHARINCTEST(c, eptr) c = *eptr++;  #define GETCHARINCTEST(c, eptr) c = *eptr++;
581  #define GETCHARLEN(c, eptr, len) c = *eptr;  #define GETCHARLEN(c, eptr, len) c = *eptr;
582    #define RAWUCHAR(eptr) (*(eptr))
583    #define RAWUCHARINC(eptr) (*(eptr)++)
584    #define RAWUCHARTEST(eptr) (*(eptr))
585    #define RAWUCHARINCTEST(eptr) (*(eptr)++)
586  /* #define GETCHARLENTEST(c, eptr, len) */  /* #define GETCHARLENTEST(c, eptr, len) */
587  /* #define BACKCHAR(eptr) */  /* #define BACKCHAR(eptr) */
588  /* #define FORWARDCHAR(eptr) */  /* #define FORWARDCHAR(eptr) */
# Line 586  we don't even define them. */ Line 590  we don't even define them. */
590    
591  #else   /* SUPPORT_UTF */  #else   /* SUPPORT_UTF */
592    
 #if defined COMPILE_PCRE8  
   
 /* These macros were originally written in the form of loops that used data  
 from the tables whose names start with PRIV(utf8_table). They were rewritten by  
 a user so as not to use loops, because in some environments this gives a  
 significant performance advantage, and it seems never to do any harm. */  
   
 /* Tells the biggest code point which can be encoded as a single character. */  
   
 #define MAX_VALUE_FOR_SINGLE_CHAR 127  
   
593  /* Tests whether the code point needs extra characters to decode. */  /* Tests whether the code point needs extra characters to decode. */
594    
595  #define HAS_EXTRALEN(c) ((c) >= 0xc0)  #define HASUTF8EXTRALEN(c) ((c) >= 0xc0)
   
 /* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.  
 Otherwise it has an undefined behaviour. */  
   
 #define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])  
   
 /* Returns TRUE, if the given character is not the first character  
 of a UTF sequence. */  
   
 #define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80)  
596    
597  /* Base macro to pick up the remaining bytes of a UTF-8 character, not  /* Base macro to pick up the remaining bytes of a UTF-8 character, not
598  advancing the pointer. */  advancing the pointer. */
# Line 633  advancing the pointer. */ Line 616  advancing the pointer. */
616            ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \            ((eptr[4] & 0x3f) << 6) | (eptr[5] & 0x3f); \
617      }      }
618    
 /* Get the next UTF-8 character, not advancing the pointer. This is called when  
 we know we are in UTF-8 mode. */  
   
 #define GETCHAR(c, eptr) \  
   c = *eptr; \  
   if (c >= 0xc0) GETUTF8(c, eptr);  
   
 /* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the  
 pointer. */  
   
 #define GETCHARTEST(c, eptr) \  
   c = *eptr; \  
   if (utf && c >= 0xc0) GETUTF8(c, eptr);  
   
619  /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing  /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing
620  the pointer. */  the pointer. */
621    
# Line 681  the pointer. */ Line 650  the pointer. */
650        } \        } \
651      }      }
652    
653    #if defined COMPILE_PCRE8
654    
655    /* These macros were originally written in the form of loops that used data
656    from the tables whose names start with PRIV(utf8_table). They were rewritten by
657    a user so as not to use loops, because in some environments this gives a
658    significant performance advantage, and it seems never to do any harm. */
659    
660    /* Tells the biggest code point which can be encoded as a single character. */
661    
662    #define MAX_VALUE_FOR_SINGLE_CHAR 127
663    
664    /* Tests whether the code point needs extra characters to decode. */
665    
666    #define HAS_EXTRALEN(c) ((c) >= 0xc0)
667    
668    /* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
669    Otherwise it has an undefined behaviour. */
670    
671    #define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])
672    
673    /* Returns TRUE, if the given character is not the first character
674    of a UTF sequence. */
675    
676    #define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80)
677    
678    /* Get the next UTF-8 character, not advancing the pointer. This is called when
679    we know we are in UTF-8 mode. */
680    
681    #define GETCHAR(c, eptr) \
682      c = *eptr; \
683      if (c >= 0xc0) GETUTF8(c, eptr);
684    
685    /* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
686    pointer. */
687    
688    #define GETCHARTEST(c, eptr) \
689      c = *eptr; \
690      if (utf && c >= 0xc0) GETUTF8(c, eptr);
691    
692  /* Get the next UTF-8 character, advancing the pointer. This is called when we  /* Get the next UTF-8 character, advancing the pointer. This is called when we
693  know we are in UTF-8 mode. */  know we are in UTF-8 mode. */
694    
# Line 747  do not know if we are in UTF-8 mode. */ Line 755  do not know if we are in UTF-8 mode. */
755    c = *eptr; \    c = *eptr; \
756    if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len);    if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len);
757    
758    /* Returns the next uchar, not advancing the pointer. This is called when
759    we know we are in UTF mode. */
760    
761    #define RAWUCHAR(eptr) \
762      (*(eptr))
763    
764    /* Returns the next uchar, advancing the pointer. This is called when
765    we know we are in UTF mode. */
766    
767    #define RAWUCHARINC(eptr) \
768      (*(eptr)++)
769    
770    /* Returns the next uchar, testing for UTF mode, and not advancing the
771    pointer. */
772    
773    #define RAWUCHARTEST(eptr) \
774      (*(eptr))
775    
776    /* Returns the next uchar, testing for UTF mode, advancing the
777    pointer. */
778    
779    #define RAWUCHARINCTEST(eptr) \
780      (*(eptr)++)
781    
782  /* If the pointer is not at the start of a character, move it back until  /* If the pointer is not at the start of a character, move it back until
783  it is. This is called only in UTF-8 mode - we don't put a test within the macro  it is. This is called only in UTF-8 mode - we don't put a test within the macro
784  because almost all calls are already within a block of UTF-8 only code. */  because almost all calls are already within a block of UTF-8 only code. */
# Line 842  we do not know if we are in UTF-16 mode. Line 874  we do not know if we are in UTF-16 mode.
874    c = *eptr; \    c = *eptr; \
875    if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);    if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);
876    
877    /* Returns the next uchar, not advancing the pointer. This is called when
878    we know we are in UTF mode. */
879    
880    #define RAWUCHAR(eptr) \
881      (*(eptr))
882    
883    /* Returns the next uchar, advancing the pointer. This is called when
884    we know we are in UTF mode. */
885    
886    #define RAWUCHARINC(eptr) \
887      (*(eptr)++)
888    
889    /* Returns the next uchar, testing for UTF mode, and not advancing the
890    pointer. */
891    
892    #define RAWUCHARTEST(eptr) \
893      (*(eptr))
894    
895    /* Returns the next uchar, testing for UTF mode, advancing the
896    pointer. */
897    
898    #define RAWUCHARINCTEST(eptr) \
899      (*(eptr)++)
900    
901  /* If the pointer is not at the start of a character, move it back until  /* If the pointer is not at the start of a character, move it back until
902  it is. This is called only in UTF-16 mode - we don't put a test within the  it is. This is called only in UTF-16 mode - we don't put a test within the
903  macro because almost all calls are already within a block of UTF-16 only  macro because almost all calls are already within a block of UTF-16 only
# Line 864  code. */ Line 920  code. */
920  #undef GET_EXTRALEN  #undef GET_EXTRALEN
921  #undef NOT_FIRSTCHAR  #undef NOT_FIRSTCHAR
922    
923    #define UTF32_MASK (0x1ffffful)
924    
925  /* Get the next UTF-32 character, not advancing the pointer. This is called when  /* Get the next UTF-32 character, not advancing the pointer. This is called when
926  we know we are in UTF-32 mode. */  we know we are in UTF-32 mode. */
927    
928  #define GETCHAR(c, eptr) \  #define GETCHAR(c, eptr) \
929    c = *eptr;    c = (*eptr) & UTF32_MASK;
930    
931  /* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the  /* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the
932  pointer. */  pointer. */
933    
934  #define GETCHARTEST(c, eptr) \  #define GETCHARTEST(c, eptr) \
935    c = *eptr;    c = *eptr; \
936      if (utf) c &= UTF32_MASK;
937    
938  /* Get the next UTF-32 character, advancing the pointer. This is called when we  /* Get the next UTF-32 character, advancing the pointer. This is called when we
939  know we are in UTF-32 mode. */  know we are in UTF-32 mode. */
940    
941  #define GETCHARINC(c, eptr) \  #define GETCHARINC(c, eptr) \
942    c = *eptr++;    c = (*eptr++) & UTF32_MASK;
943    
944  /* Get the next character, testing for UTF-32 mode, and advancing the pointer.  /* Get the next character, testing for UTF-32 mode, and advancing the pointer.
945  This is called when we don't know if we are in UTF-32 mode. */  This is called when we don't know if we are in UTF-32 mode. */
946    
947  #define GETCHARINCTEST(c, eptr) \  #define GETCHARINCTEST(c, eptr) \
948    c = *eptr++;    c = *eptr++; \
949      if (utf) c &= UTF32_MASK;
950    
951  /* Get the next UTF-32 character, not advancing the pointer, not incrementing  /* Get the next UTF-32 character, not advancing the pointer, not incrementing
952  length (since all UTF-32 is of length 1). This is called when we know we are in  length (since all UTF-32 is of length 1). This is called when we know we are in
953  UTF-32 mode. */  UTF-32 mode. */
954    
955  #define GETCHARLEN(c, eptr, len) \  #define GETCHARLEN(c, eptr, len) \
956    c = *eptr;    GETCHAR(c, eptr)
957    
958  /* Get the next UTF-832character, testing for UTF-32 mode, not advancing the  /* Get the next UTF-32character, testing for UTF-32 mode, not advancing the
959  pointer, not incrementing the length (since all UTF-32 is of length 1).  pointer, not incrementing the length (since all UTF-32 is of length 1).
960  This is called when we do not know if we are in UTF-32 mode. */  This is called when we do not know if we are in UTF-32 mode. */
961    
962  #define GETCHARLENTEST(c, eptr, len) \  #define GETCHARLENTEST(c, eptr, len) \
963    c = *eptr;    GETCHARTEST(c, eptr)
964    
965    /* Returns the next uchar, not advancing the pointer. This is called when
966    we know we are in UTF mode. */
967    
968    #define RAWUCHAR(eptr) \
969      (*(eptr) & UTF32_MASK)
970    
971    /* Returns the next uchar, advancing the pointer. This is called when
972    we know we are in UTF mode. */
973    
974    #define RAWUCHARINC(eptr) \
975      (*(eptr)++ & UTF32_MASK)
976    
977    /* Returns the next uchar, testing for UTF mode, and not advancing the
978    pointer. */
979    
980    #define RAWUCHARTEST(eptr) \
981      (utf ? (*(eptr) & UTF32_MASK) : *(eptr))
982    
983    /* Returns the next uchar, testing for UTF mode, advancing the
984    pointer. */
985    
986    #define RAWUCHARINCTEST(eptr) \
987      (utf ? (*(eptr)++ & UTF32_MASK) : *(eptr)++)
988    
989  /* If the pointer is not at the start of a character, move it back until  /* If the pointer is not at the start of a character, move it back until
990  it is. This is called only in UTF-32 mode - we don't put a test within the  it is. This is called only in UTF-32 mode - we don't put a test within the
# Line 1095  in different endianness. */ Line 1179  in different endianness. */
1179    
1180  #define REVERSED_MAGIC_NUMBER  0x45524350UL   /* 'ERCP' */  #define REVERSED_MAGIC_NUMBER  0x45524350UL   /* 'ERCP' */
1181    
 /* Negative values for the firstchar and reqchar variables */  
   
 #define REQ_UNSET (-2)  
 #define REQ_NONE  (-1)  
   
1182  /* The maximum remaining length of subject we are prepared to search for a  /* The maximum remaining length of subject we are prepared to search for a
1183  req_byte match. */  req_byte match. */
1184    
# Line 2578  extern unsigned int      PRIV(strlen_uc) Line 2657  extern unsigned int      PRIV(strlen_uc)
2657    
2658  #endif /* COMPILE_PCRE[8|16|32] */  #endif /* COMPILE_PCRE[8|16|32] */
2659    
2660    #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
2661    
2662    #define STRCMP_UC_UC_TEST(str1, str2) STRCMP_UC_UC(str1, str2)
2663    #define STRCMP_UC_C8_TEST(str1, str2) STRCMP_UC_C8(str1, str2)
2664    
2665    #elif defined COMPILE_PCRE32
2666    
2667    extern int               PRIV(strcmp_uc_uc_utf)(const pcre_uchar *,
2668                               const pcre_uchar *);
2669    extern int               PRIV(strcmp_uc_c8_utf)(const pcre_uchar *,
2670                               const char *);
2671    
2672    #define STRCMP_UC_UC_TEST(str1, str2) \
2673      (utf ? PRIV(strcmp_uc_uc_utf)((str1), (str2)) : PRIV(strcmp_uc_uc)((str1), (str2)))
2674    #define STRCMP_UC_C8_TEST(str1, str2) \
2675      (utf ? PRIV(strcmp_uc_c8_utf)((str1), (str2)) : PRIV(strcmp_uc_c8)((str1), (str2)))
2676    
2677    #endif /* COMPILE_PCRE[8|16|32] */
2678    
2679  extern const pcre_uchar *PRIV(find_bracket)(const pcre_uchar *, BOOL, int);  extern const pcre_uchar *PRIV(find_bracket)(const pcre_uchar *, BOOL, int);
2680  extern BOOL              PRIV(is_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,  extern BOOL              PRIV(is_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,
2681                             int *, BOOL);                             int *, BOOL);
# Line 2622  extern const int         PRIV(ucp_typera Line 2720  extern const int         PRIV(ucp_typera
2720    
2721  #define UCD_BLOCK_SIZE 128  #define UCD_BLOCK_SIZE 128
2722  #define GET_UCD(ch) (PRIV(ucd_records) + \  #define GET_UCD(ch) (PRIV(ucd_records) + \
2723          PRIV(ucd_stage2)[PRIV(ucd_stage1)[(ch) / UCD_BLOCK_SIZE] * \          PRIV(ucd_stage2)[PRIV(ucd_stage1)[(int)(ch) / UCD_BLOCK_SIZE] * \
2724          UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE])          UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE])
2725    
2726  #define UCD_CHARTYPE(ch)    GET_UCD(ch)->chartype  #define UCD_CHARTYPE(ch)    GET_UCD(ch)->chartype
2727  #define UCD_SCRIPT(ch)      GET_UCD(ch)->script  #define UCD_SCRIPT(ch)      GET_UCD(ch)->script
2728  #define UCD_CATEGORY(ch)    PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]  #define UCD_CATEGORY(ch)    PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
2729  #define UCD_GRAPHBREAK(ch)  GET_UCD(ch)->gbprop  #define UCD_GRAPHBREAK(ch)  GET_UCD(ch)->gbprop
2730  #define UCD_CASESET(ch)     GET_UCD(ch)->caseset  #define UCD_CASESET(ch)     GET_UCD(ch)->caseset
2731  #define UCD_OTHERCASE(ch)   (ch + GET_UCD(ch)->other_case)  #define UCD_OTHERCASE(ch)   ((pcre_uint32)((int)ch + (int)(GET_UCD(ch)->other_case)))
2732    
2733  #endif /* SUPPORT_UCP */  #endif /* SUPPORT_UCP */
2734    

Legend:
Removed from v.1061  
changed lines
  Added in v.1100

  ViewVC Help
Powered by ViewVC 1.1.5