/[pcre]/code/trunk/pcre_internal.h
ViewVC logotype

Diff of /code/trunk/pcre_internal.h

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1199 by chpe, Sat Nov 3 19:21:37 2012 UTC revision 1248 by ph10, Wed Feb 13 17:36:38 2013 UTC
# Line 200  typedef unsigned char pcre_uint8; Line 200  typedef unsigned char pcre_uint8;
200    typedef unsigned int pcre_uint16;    typedef unsigned int pcre_uint16;
201    typedef int pcre_int16;    typedef int pcre_int16;
202  #else  #else
203    #error Cannot determine a type for 16-bit unsigned integers  # error Cannot determine a type for 16-bit unsigned integers
204  #endif  #endif
205    
206  #if UINT_MAX == 4294967295  #if UINT_MAX == 4294967295
# Line 210  typedef unsigned char pcre_uint8; Line 210  typedef unsigned char pcre_uint8;
210    typedef unsigned long int pcre_uint32;    typedef unsigned long int pcre_uint32;
211    typedef long int pcre_int32;    typedef long int pcre_int32;
212  #else  #else
213    #error Cannot determine a type for 32-bit unsigned integers  # error Cannot determine a type for 32-bit unsigned integers
214  #endif  #endif
215    
216  /* When checking for integer overflow in pcre_compile(), we need to handle  /* When checking for integer overflow in pcre_compile(), we need to handle
# Line 531  capturing parenthesis numbers in back re Line 531  capturing parenthesis numbers in back re
531  #define PUT2(a,n,d)   \  #define PUT2(a,n,d)   \
532    a[n] = (d) >> 8; \    a[n] = (d) >> 8; \
533    a[(n)+1] = (d) & 255    a[(n)+1] = (d) & 255
534    
535  /* For reasons that I do not understand, the expression in this GET2 macro is  /* For reasons that I do not understand, the expression in this GET2 macro is
536  treated by gcc as a signed expression, even when a is declared as unsigned. It  treated by gcc as a signed expression, even when a is declared as unsigned. It
537  seems that any kind of arithmetic results in a signed value. */  seems that any kind of arithmetic results in a signed value. */
538    
539  #define GET2(a,n) \  #define GET2(a,n) \
# Line 934  into one pcre_uchar unit. */ Line 934  into one pcre_uchar unit. */
934  #define GET_EXTRALEN(c) (0)  #define GET_EXTRALEN(c) (0)
935  #define NOT_FIRSTCHAR(c) (0)  #define NOT_FIRSTCHAR(c) (0)
936    
 #define UTF32_MASK (0x1fffffu)  
   
 /* Base macro to pick up an UTF-32 character out of a uint32 */  
   
 #define MASKHIGHBITS(c) ((c) & UTF32_MASK)  
   
 /* Base macro to pick up an UTF-32 character, not advancing the pointer */  
   
 #define GETUTF32(eptr) (MASKHIGHBITS(*(eptr)))  
   
 /* Base macro to pick up an UTF-32 character, advancing the pointer */  
   
 #define GETUTF32INC(eptr) (MASKHIGHBITS(*((eptr)++)))  
   
937  /* Get the next UTF-32 character, not advancing the pointer. This is called when  /* Get the next UTF-32 character, not advancing the pointer. This is called when
938  we know we are in UTF-32 mode. */  we know we are in UTF-32 mode. */
939    
940  #define GETCHAR(c, eptr) \  #define GETCHAR(c, eptr) \
941    c = GETUTF32(eptr);    c = *(eptr);
942    
943  /* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the  /* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the
944  pointer. */  pointer. */
945    
946  #define GETCHARTEST(c, eptr) \  #define GETCHARTEST(c, eptr) \
947    c = (utf ? GETUTF32(eptr) : *(eptr));    c = *(eptr);
948    
949  /* Get the next UTF-32 character, advancing the pointer. This is called when we  /* Get the next UTF-32 character, advancing the pointer. This is called when we
950  know we are in UTF-32 mode. */  know we are in UTF-32 mode. */
951    
952  #define GETCHARINC(c, eptr) \  #define GETCHARINC(c, eptr) \
953    c = GETUTF32INC(eptr);    c = *((eptr)++);
954    
955  /* Get the next character, testing for UTF-32 mode, and advancing the pointer.  /* Get the next character, testing for UTF-32 mode, and advancing the pointer.
956  This is called when we don't know if we are in UTF-32 mode. */  This is called when we don't know if we are in UTF-32 mode. */
957    
958  #define GETCHARINCTEST(c, eptr) \  #define GETCHARINCTEST(c, eptr) \
959    c = (utf ? GETUTF32INC(eptr) : *((eptr)++));    c = *((eptr)++);
960    
961  /* Get the next UTF-32 character, not advancing the pointer, not incrementing  /* Get the next UTF-32 character, not advancing the pointer, not incrementing
962  length (since all UTF-32 is of length 1). This is called when we know we are in  length (since all UTF-32 is of length 1). This is called when we know we are in
# Line 990  This is called when we do not know if we Line 976  This is called when we do not know if we
976  we know we are in UTF mode. */  we know we are in UTF mode. */
977    
978  #define RAWUCHAR(eptr) \  #define RAWUCHAR(eptr) \
979    (MASKHIGHBITS(*(eptr)))    (*(eptr))
980    
981  /* Returns the next uchar, advancing the pointer. This is called when  /* Returns the next uchar, advancing the pointer. This is called when
982  we know we are in UTF mode. */  we know we are in UTF mode. */
983    
984  #define RAWUCHARINC(eptr) \  #define RAWUCHARINC(eptr) \
985    (MASKHIGHBITS(*((eptr)++)))    (*((eptr)++))
986    
987  /* Returns the next uchar, testing for UTF mode, and not advancing the  /* Returns the next uchar, testing for UTF mode, and not advancing the
988  pointer. */  pointer. */
989    
990  #define RAWUCHARTEST(eptr) \  #define RAWUCHARTEST(eptr) \
991    (utf ? (MASKHIGHBITS(*(eptr))) : *(eptr))    (*(eptr))
992    
993  /* Returns the next uchar, testing for UTF mode, advancing the  /* Returns the next uchar, testing for UTF mode, advancing the
994  pointer. */  pointer. */
995    
996  #define RAWUCHARINCTEST(eptr) \  #define RAWUCHARINCTEST(eptr) \
997    (utf ? (MASKHIGHBITS(*((eptr)++))) : *((eptr)++))    (*((eptr)++))
998    
999  /* If the pointer is not at the start of a character, move it back until  /* If the pointer is not at the start of a character, move it back until
1000  it is. This is called only in UTF-32 mode - we don't put a test within the  it is. This is called only in UTF-32 mode - we don't put a test within the
1001  macro because almost all calls are already within a block of UTF-32 only  macro because almost all calls are already within a block of UTF-32 only
1002  code.  code.
1003  These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */  These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */
1004    
1005  #define BACKCHAR(eptr) do { } while (0)  #define BACKCHAR(eptr) do { } while (0)
# Line 1040  are defined. Line 1026  are defined.
1026  These values are also required as lists in pcre_compile.c when processing \h,  These values are also required as lists in pcre_compile.c when processing \h,
1027  \H, \v and \V in a character class. The lists are defined in pcre_tables.c, but  \H, \v and \V in a character class. The lists are defined in pcre_tables.c, but
1028  macros that define the values are here so that all the definitions are  macros that define the values are here so that all the definitions are
1029  together. The lists must be in ascending character order, terminated by  together. The lists must be in ascending character order, terminated by
1030  NOTACHAR (which is 0xffffffff).  NOTACHAR (which is 0xffffffff).
1031    
1032  Any changes should ensure that the various macros are kept in step with each  Any changes should ensure that the various macros are kept in step with each
# Line 1054  other. NOTE: The values also appear in p Line 1040  other. NOTE: The values also appear in p
1040    CHAR_HT, CHAR_SPACE, 0xa0, \    CHAR_HT, CHAR_SPACE, 0xa0, \
1041    0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \    0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \
1042    0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202f, 0x205f, 0x3000, \    0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202f, 0x205f, 0x3000, \
1043    NOTACHAR    NOTACHAR
1044    
1045  #define HSPACE_MULTIBYTE_CASES \  #define HSPACE_MULTIBYTE_CASES \
1046    case 0x1680:  /* OGHAM SPACE MARK */ \    case 0x1680:  /* OGHAM SPACE MARK */ \
# Line 1078  other. NOTE: The values also appear in p Line 1064  other. NOTE: The values also appear in p
1064    case CHAR_HT: \    case CHAR_HT: \
1065    case CHAR_SPACE: \    case CHAR_SPACE: \
1066    case 0xa0     /* NBSP */    case 0xa0     /* NBSP */
1067    
1068  #define HSPACE_CASES \  #define HSPACE_CASES \
1069    HSPACE_BYTE_CASES: \    HSPACE_BYTE_CASES: \
1070    HSPACE_MULTIBYTE_CASES    HSPACE_MULTIBYTE_CASES
1071    
1072  #define VSPACE_LIST \  #define VSPACE_LIST \
1073    CHAR_LF, CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, 0x2028, 0x2029, NOTACHAR    CHAR_LF, CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, 0x2028, 0x2029, NOTACHAR
1074    
1075  #define VSPACE_MULTIBYTE_CASES \  #define VSPACE_MULTIBYTE_CASES \
1076    case 0x2028:    /* LINE SEPARATOR */ \    case 0x2028:    /* LINE SEPARATOR */ \
# Line 1114  other. NOTE: The values also appear in p Line 1100  other. NOTE: The values also appear in p
1100    
1101  #ifdef EBCDIC_NL25  #ifdef EBCDIC_NL25
1102  #define VSPACE_LIST \  #define VSPACE_LIST \
1103    CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, CHAR_LF, NOTACHAR    CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, CHAR_LF, NOTACHAR
1104  #else  #else
1105  #define VSPACE_LIST \  #define VSPACE_LIST \
1106    CHAR_VT, CHAR_FF, CHAR_CR, CHAR_LF, CHAR_NEL, NOTACHAR    CHAR_VT, CHAR_FF, CHAR_CR, CHAR_LF, CHAR_NEL, NOTACHAR
1107  #endif  #endif
1108    
1109  #define VSPACE_BYTE_CASES \  #define VSPACE_BYTE_CASES \
1110    case CHAR_LF: \    case CHAR_LF: \
# Line 1542  a positive value. */ Line 1528  a positive value. */
1528  #define STRING_ANYCRLF_RIGHTPAR        "ANYCRLF)"  #define STRING_ANYCRLF_RIGHTPAR        "ANYCRLF)"
1529  #define STRING_BSR_ANYCRLF_RIGHTPAR    "BSR_ANYCRLF)"  #define STRING_BSR_ANYCRLF_RIGHTPAR    "BSR_ANYCRLF)"
1530  #define STRING_BSR_UNICODE_RIGHTPAR    "BSR_UNICODE)"  #define STRING_BSR_UNICODE_RIGHTPAR    "BSR_UNICODE)"
1531  #ifdef COMPILE_PCRE8  #define STRING_UTF8_RIGHTPAR           "UTF8)"
1532  #define STRING_UTF_RIGHTPAR            "UTF8)"  #define STRING_UTF16_RIGHTPAR          "UTF16)"
1533  #endif  #define STRING_UTF32_RIGHTPAR          "UTF32)"
1534  #ifdef COMPILE_PCRE16  #define STRING_UTF_RIGHTPAR            "UTF)"
 #define STRING_UTF_RIGHTPAR            "UTF16)"  
 #endif  
 #ifdef COMPILE_PCRE32  
 #define STRING_UTF_RIGHTPAR            "UTF32)"  
 #endif  
1535  #define STRING_UCP_RIGHTPAR            "UCP)"  #define STRING_UCP_RIGHTPAR            "UCP)"
1536  #define STRING_NO_START_OPT_RIGHTPAR   "NO_START_OPT)"  #define STRING_NO_START_OPT_RIGHTPAR   "NO_START_OPT)"
1537    
# Line 1808  only. */ Line 1789  only. */
1789  #define STRING_ANYCRLF_RIGHTPAR        STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS  #define STRING_ANYCRLF_RIGHTPAR        STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
1790  #define STRING_BSR_ANYCRLF_RIGHTPAR    STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS  #define STRING_BSR_ANYCRLF_RIGHTPAR    STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
1791  #define STRING_BSR_UNICODE_RIGHTPAR    STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS  #define STRING_BSR_UNICODE_RIGHTPAR    STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
1792  #ifdef COMPILE_PCRE8  #define STRING_UTF8_RIGHTPAR           STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
1793  #define STRING_UTF_RIGHTPAR            STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS  #define STRING_UTF16_RIGHTPAR          STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
1794  #endif  #define STRING_UTF32_RIGHTPAR          STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
1795  #ifdef COMPILE_PCRE16  #define STRING_UTF_RIGHTPAR            STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
 #define STRING_UTF_RIGHTPAR            STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS  
 #endif  
 #ifdef COMPILE_PCRE32  
 #define STRING_UTF_RIGHTPAR            STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS  
 #endif  
1796  #define STRING_UCP_RIGHTPAR            STR_U STR_C STR_P STR_RIGHT_PARENTHESIS  #define STRING_UCP_RIGHTPAR            STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
1797  #define STRING_NO_START_OPT_RIGHTPAR   STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS  #define STRING_NO_START_OPT_RIGHTPAR   STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
1798    
# Line 2455  typedef struct recursion_info { Line 2431  typedef struct recursion_info {
2431    unsigned int group_num;         /* Number of group that was called */    unsigned int group_num;         /* Number of group that was called */
2432    int *offset_save;               /* Pointer to start of saved offsets */    int *offset_save;               /* Pointer to start of saved offsets */
2433    int saved_max;                  /* Number of saved offsets */    int saved_max;                  /* Number of saved offsets */
2434      int saved_capture_last;         /* Last capture number */
2435    PCRE_PUCHAR subject_position;   /* Position at start of recursion */    PCRE_PUCHAR subject_position;   /* Position at start of recursion */
2436  } recursion_info;  } recursion_info;
2437    
# Line 2496  typedef struct match_data { Line 2473  typedef struct match_data {
2473    const  pcre_uint8 *lcc;         /* Points to lower casing table */    const  pcre_uint8 *lcc;         /* Points to lower casing table */
2474    const  pcre_uint8 *fcc;         /* Points to case-flipping table */    const  pcre_uint8 *fcc;         /* Points to case-flipping table */
2475    const  pcre_uint8 *ctypes;      /* Points to table of type maps */    const  pcre_uint8 *ctypes;      /* Points to table of type maps */
   BOOL   offset_overflow;         /* Set if too many extractions */  
2476    BOOL   notbol;                  /* NOTBOL flag */    BOOL   notbol;                  /* NOTBOL flag */
2477    BOOL   noteol;                  /* NOTEOL flag */    BOOL   noteol;                  /* NOTEOL flag */
2478    BOOL   utf;                     /* UTF-8 / UTF-16 flag */    BOOL   utf;                     /* UTF-8 / UTF-16 flag */
# Line 2517  typedef struct match_data { Line 2493  typedef struct match_data {
2493    PCRE_PUCHAR start_used_ptr;     /* Earliest consulted character */    PCRE_PUCHAR start_used_ptr;     /* Earliest consulted character */
2494    int    partial;                 /* PARTIAL options */    int    partial;                 /* PARTIAL options */
2495    int    end_offset_top;          /* Highwater mark at end of match */    int    end_offset_top;          /* Highwater mark at end of match */
2496    int    capture_last;            /* Most recent capture number */    pcre_int32 capture_last;        /* Most recent capture number + overflow flag */
2497    int    start_offset;            /* The start offset value */    int    start_offset;            /* The start offset value */
2498    int    match_function_type;     /* Set for certain special calls of MATCH() */    int    match_function_type;     /* Set for certain special calls of MATCH() */
2499    eptrblock *eptrchain;           /* Chain of eptrblocks for tail recursions */    eptrblock *eptrchain;           /* Chain of eptrblocks for tail recursions */

Legend:
Removed from v.1199  
changed lines
  Added in v.1248

  ViewVC Help
Powered by ViewVC 1.1.5