/[pcre]/code/trunk/pcre_internal.h
ViewVC logotype

Diff of /code/trunk/pcre_internal.h

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1380 by ph10, Tue Oct 15 16:49:12 2013 UTC revision 1631 by ph10, Wed Feb 10 19:13:17 2016 UTC
# Line 7  Line 7 
7  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
8    
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2013 University of Cambridge             Copyright (c) 1997-2016 University of Cambridge
11    
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 316  start/end of string field names are. */ Line 316  start/end of string field names are. */
316         &(NLBLOCK->nllen), utf)) \         &(NLBLOCK->nllen), utf)) \
317      : \      : \
318      ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \      ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
319       RAWUCHARTEST(p) == NLBLOCK->nl[0] && \       UCHAR21TEST(p) == NLBLOCK->nl[0] && \
320       (NLBLOCK->nllen == 1 || RAWUCHARTEST(p+1) == NLBLOCK->nl[1])       \       (NLBLOCK->nllen == 1 || UCHAR21TEST(p+1) == NLBLOCK->nl[1])       \
321      ) \      ) \
322    )    )
323    
# Line 330  start/end of string field names are. */ Line 330  start/end of string field names are. */
330         &(NLBLOCK->nllen), utf)) \         &(NLBLOCK->nllen), utf)) \
331      : \      : \
332      ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \      ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
333       RAWUCHARTEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] &&              \       UCHAR21TEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] &&              \
334       (NLBLOCK->nllen == 1 || RAWUCHARTEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \       (NLBLOCK->nllen == 1 || UCHAR21TEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \
335      ) \      ) \
336    )    )
337    
# Line 582  changed in future to be a fixed number o Line 582  changed in future to be a fixed number o
582  #define MAX_MARK ((1u << 8) - 1)  #define MAX_MARK ((1u << 8) - 1)
583  #endif  #endif
584    
585    /* There is a proposed future special "UTF-21" mode, in which only the lowest
586    21 bits of a 32-bit character are interpreted as UTF, with the remaining 11
587    high-order bits available to the application for other uses. In preparation for
588    the future implementation of this mode, there are macros that load a data item
589    and, if in this special mode, mask it to 21 bits. These macros all have names
590    starting with UCHAR21. In all other modes, including the normal 32-bit
591    library, the macros all have the same simple definitions. When the new mode is
592    implemented, it is expected that these definitions will be varied appropriately
593    using #ifdef when compiling the library that supports the special mode. */
594    
595    #define UCHAR21(eptr)        (*(eptr))
596    #define UCHAR21TEST(eptr)    (*(eptr))
597    #define UCHAR21INC(eptr)     (*(eptr)++)
598    #define UCHAR21INCTEST(eptr) (*(eptr)++)
599    
600  /* When UTF encoding is being used, a character is no longer just a single  /* When UTF encoding is being used, a character is no longer just a single
601  byte. The macros for character handling generate simple sequences when used in  byte in 8-bit mode or a single short in 16-bit mode. The macros for character
602  character-mode, and more complicated ones for UTF characters. GETCHARLENTEST  handling generate simple sequences when used in the basic mode, and more
603  and other macros are not used when UTF is not supported, so they are not  complicated ones for UTF characters. GETCHARLENTEST and other macros are not
604  defined. To make sure they can never even appear when UTF support is omitted,  used when UTF is not supported. To make sure they can never even appear when
605  we don't even define them. */  UTF support is omitted, we don't even define them. */
606    
607  #ifndef SUPPORT_UTF  #ifndef SUPPORT_UTF
608    
# Line 600  we don't even define them. */ Line 615  we don't even define them. */
615  #define GETCHARINC(c, eptr) c = *eptr++;  #define GETCHARINC(c, eptr) c = *eptr++;
616  #define GETCHARINCTEST(c, eptr) c = *eptr++;  #define GETCHARINCTEST(c, eptr) c = *eptr++;
617  #define GETCHARLEN(c, eptr, len) c = *eptr;  #define GETCHARLEN(c, eptr, len) c = *eptr;
 #define RAWUCHAR(eptr) (*(eptr))  
 #define RAWUCHARINC(eptr) (*(eptr)++)  
 #define RAWUCHARTEST(eptr) (*(eptr))  
 #define RAWUCHARINCTEST(eptr) (*(eptr)++)  
618  /* #define GETCHARLENTEST(c, eptr, len) */  /* #define GETCHARLENTEST(c, eptr, len) */
619  /* #define BACKCHAR(eptr) */  /* #define BACKCHAR(eptr) */
620  /* #define FORWARDCHAR(eptr) */  /* #define FORWARDCHAR(eptr) */
# Line 776  do not know if we are in UTF-8 mode. */ Line 787  do not know if we are in UTF-8 mode. */
787    c = *eptr; \    c = *eptr; \
788    if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len);    if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len);
789    
 /* Returns the next uchar, not advancing the pointer. This is called when  
 we know we are in UTF mode. */  
   
 #define RAWUCHAR(eptr) \  
   (*(eptr))  
   
 /* Returns the next uchar, advancing the pointer. This is called when  
 we know we are in UTF mode. */  
   
 #define RAWUCHARINC(eptr) \  
   (*((eptr)++))  
   
 /* Returns the next uchar, testing for UTF mode, and not advancing the  
 pointer. */  
   
 #define RAWUCHARTEST(eptr) \  
   (*(eptr))  
   
 /* Returns the next uchar, testing for UTF mode, advancing the  
 pointer. */  
   
 #define RAWUCHARINCTEST(eptr) \  
   (*((eptr)++))  
   
790  /* If the pointer is not at the start of a character, move it back until  /* If the pointer is not at the start of a character, move it back until
791  it is. This is called only in UTF-8 mode - we don't put a test within the macro  it is. This is called only in UTF-8 mode - we don't put a test within the macro
792  because almost all calls are already within a block of UTF-8 only code. */  because almost all calls are already within a block of UTF-8 only code. */
# Line 895  we do not know if we are in UTF-16 mode. Line 882  we do not know if we are in UTF-16 mode.
882    c = *eptr; \    c = *eptr; \
883    if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);    if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);
884    
 /* Returns the next uchar, not advancing the pointer. This is called when  
 we know we are in UTF mode. */  
   
 #define RAWUCHAR(eptr) \  
   (*(eptr))  
   
 /* Returns the next uchar, advancing the pointer. This is called when  
 we know we are in UTF mode. */  
   
 #define RAWUCHARINC(eptr) \  
   (*((eptr)++))  
   
 /* Returns the next uchar, testing for UTF mode, and not advancing the  
 pointer. */  
   
 #define RAWUCHARTEST(eptr) \  
   (*(eptr))  
   
 /* Returns the next uchar, testing for UTF mode, advancing the  
 pointer. */  
   
 #define RAWUCHARINCTEST(eptr) \  
   (*((eptr)++))  
   
885  /* If the pointer is not at the start of a character, move it back until  /* If the pointer is not at the start of a character, move it back until
886  it is. This is called only in UTF-16 mode - we don't put a test within the  it is. This is called only in UTF-16 mode - we don't put a test within the
887  macro because almost all calls are already within a block of UTF-16 only  macro because almost all calls are already within a block of UTF-16 only
# Line 980  This is called when we do not know if we Line 943  This is called when we do not know if we
943  #define GETCHARLENTEST(c, eptr, len) \  #define GETCHARLENTEST(c, eptr, len) \
944    GETCHARTEST(c, eptr)    GETCHARTEST(c, eptr)
945    
 /* Returns the next uchar, not advancing the pointer. This is called when  
 we know we are in UTF mode. */  
   
 #define RAWUCHAR(eptr) \  
   (*(eptr))  
   
 /* Returns the next uchar, advancing the pointer. This is called when  
 we know we are in UTF mode. */  
   
 #define RAWUCHARINC(eptr) \  
   (*((eptr)++))  
   
 /* Returns the next uchar, testing for UTF mode, and not advancing the  
 pointer. */  
   
 #define RAWUCHARTEST(eptr) \  
   (*(eptr))  
   
 /* Returns the next uchar, testing for UTF mode, advancing the  
 pointer. */  
   
 #define RAWUCHARINCTEST(eptr) \  
   (*((eptr)++))  
   
946  /* If the pointer is not at the start of a character, move it back until  /* If the pointer is not at the start of a character, move it back until
947  it is. This is called only in UTF-32 mode - we don't put a test within the  it is. This is called only in UTF-32 mode - we don't put a test within the
948  macro because almost all calls are already within a block of UTF-32 only  macro because almost all calls are already within a block of UTF-32 only
# Line 1045  other. NOTE: The values also appear in p Line 984  other. NOTE: The values also appear in p
984  #ifndef EBCDIC  #ifndef EBCDIC
985    
986  #define HSPACE_LIST \  #define HSPACE_LIST \
987    CHAR_HT, CHAR_SPACE, 0xa0, \    CHAR_HT, CHAR_SPACE, CHAR_NBSP, \
988    0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \    0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \
989    0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202f, 0x205f, 0x3000, \    0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202f, 0x205f, 0x3000, \
990    NOTACHAR    NOTACHAR
# Line 1071  other. NOTE: The values also appear in p Line 1010  other. NOTE: The values also appear in p
1010  #define HSPACE_BYTE_CASES \  #define HSPACE_BYTE_CASES \
1011    case CHAR_HT: \    case CHAR_HT: \
1012    case CHAR_SPACE: \    case CHAR_SPACE: \
1013    case 0xa0     /* NBSP */    case CHAR_NBSP
1014    
1015  #define HSPACE_CASES \  #define HSPACE_CASES \
1016    HSPACE_BYTE_CASES: \    HSPACE_BYTE_CASES: \
# Line 1098  other. NOTE: The values also appear in p Line 1037  other. NOTE: The values also appear in p
1037  /* ------ EBCDIC environments ------ */  /* ------ EBCDIC environments ------ */
1038    
1039  #else  #else
1040  #define HSPACE_LIST CHAR_HT, CHAR_SPACE  #define HSPACE_LIST CHAR_HT, CHAR_SPACE, CHAR_NBSP, NOTACHAR
1041    
1042  #define HSPACE_BYTE_CASES \  #define HSPACE_BYTE_CASES \
1043    case CHAR_HT: \    case CHAR_HT: \
1044    case CHAR_SPACE    case CHAR_SPACE: \
1045      case CHAR_NBSP
1046    
1047  #define HSPACE_CASES HSPACE_BYTE_CASES  #define HSPACE_CASES HSPACE_BYTE_CASES
1048    
# Line 1174  time, run time, or study time, respectiv Line 1114  time, run time, or study time, respectiv
1114  #define PUBLIC_COMPILE_OPTIONS \  #define PUBLIC_COMPILE_OPTIONS \
1115    (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \    (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
1116     PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \     PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
1117     PCRE_NO_AUTO_CAPTURE|PCRE_NO_AUTO_POSSESSIFY| \     PCRE_NO_AUTO_CAPTURE|PCRE_NO_AUTO_POSSESS| \
1118     PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \     PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
1119     PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \     PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
1120     PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE|PCRE_NEVER_UTF)     PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE|PCRE_NEVER_UTF)
# Line 1276  same code point. */ Line 1216  same code point. */
1216    
1217  #define CHAR_ESC                    '\047'  #define CHAR_ESC                    '\047'
1218  #define CHAR_DEL                    '\007'  #define CHAR_DEL                    '\007'
1219    #define CHAR_NBSP                   '\x41'
1220  #define STR_ESC                     "\047"  #define STR_ESC                     "\047"
1221  #define STR_DEL                     "\007"  #define STR_DEL                     "\007"
1222    
# Line 1290  a positive value. */ Line 1231  a positive value. */
1231  #define CHAR_NEL                    ((unsigned char)'\x85')  #define CHAR_NEL                    ((unsigned char)'\x85')
1232  #define CHAR_ESC                    '\033'  #define CHAR_ESC                    '\033'
1233  #define CHAR_DEL                    '\177'  #define CHAR_DEL                    '\177'
1234    #define CHAR_NBSP                   ((unsigned char)'\xa0')
1235    
1236  #define STR_LF                      "\n"  #define STR_LF                      "\n"
1237  #define STR_NL                      STR_LF  #define STR_NL                      STR_LF
# Line 1533  a positive value. */ Line 1475  a positive value. */
1475  #define STRING_xdigit               "xdigit"  #define STRING_xdigit               "xdigit"
1476    
1477  #define STRING_DEFINE               "DEFINE"  #define STRING_DEFINE               "DEFINE"
1478    #define STRING_WEIRD_STARTWORD      "[:<:]]"
1479    #define STRING_WEIRD_ENDWORD        "[:>:]]"
1480    
1481  #define STRING_CR_RIGHTPAR             "CR)"  #define STRING_CR_RIGHTPAR              "CR)"
1482  #define STRING_LF_RIGHTPAR             "LF)"  #define STRING_LF_RIGHTPAR              "LF)"
1483  #define STRING_CRLF_RIGHTPAR           "CRLF)"  #define STRING_CRLF_RIGHTPAR            "CRLF)"
1484  #define STRING_ANY_RIGHTPAR            "ANY)"  #define STRING_ANY_RIGHTPAR             "ANY)"
1485  #define STRING_ANYCRLF_RIGHTPAR        "ANYCRLF)"  #define STRING_ANYCRLF_RIGHTPAR         "ANYCRLF)"
1486  #define STRING_BSR_ANYCRLF_RIGHTPAR    "BSR_ANYCRLF)"  #define STRING_BSR_ANYCRLF_RIGHTPAR     "BSR_ANYCRLF)"
1487  #define STRING_BSR_UNICODE_RIGHTPAR    "BSR_UNICODE)"  #define STRING_BSR_UNICODE_RIGHTPAR     "BSR_UNICODE)"
1488  #define STRING_UTF8_RIGHTPAR           "UTF8)"  #define STRING_UTF8_RIGHTPAR            "UTF8)"
1489  #define STRING_UTF16_RIGHTPAR          "UTF16)"  #define STRING_UTF16_RIGHTPAR           "UTF16)"
1490  #define STRING_UTF32_RIGHTPAR          "UTF32)"  #define STRING_UTF32_RIGHTPAR           "UTF32)"
1491  #define STRING_UTF_RIGHTPAR            "UTF)"  #define STRING_UTF_RIGHTPAR             "UTF)"
1492  #define STRING_UCP_RIGHTPAR            "UCP)"  #define STRING_UCP_RIGHTPAR             "UCP)"
1493  #define STRING_NO_START_OPT_RIGHTPAR   "NO_START_OPT)"  #define STRING_NO_AUTO_POSSESS_RIGHTPAR "NO_AUTO_POSSESS)"
1494  #define STRING_LIMIT_MATCH_EQ          "LIMIT_MATCH="  #define STRING_NO_START_OPT_RIGHTPAR    "NO_START_OPT)"
1495  #define STRING_LIMIT_RECURSION_EQ      "LIMIT_RECURSION="  #define STRING_LIMIT_MATCH_EQ           "LIMIT_MATCH="
1496    #define STRING_LIMIT_RECURSION_EQ       "LIMIT_RECURSION="
1497    
1498  #else  /* SUPPORT_UTF */  #else  /* SUPPORT_UTF */
1499    
# Line 1664  only. */ Line 1609  only. */
1609  #define CHAR_VERTICAL_LINE          '\174'  #define CHAR_VERTICAL_LINE          '\174'
1610  #define CHAR_RIGHT_CURLY_BRACKET    '\175'  #define CHAR_RIGHT_CURLY_BRACKET    '\175'
1611  #define CHAR_TILDE                  '\176'  #define CHAR_TILDE                  '\176'
1612    #define CHAR_NBSP                   ((unsigned char)'\xa0')
1613    
1614  #define STR_HT                      "\011"  #define STR_HT                      "\011"
1615  #define STR_VT                      "\013"  #define STR_VT                      "\013"
# Line 1796  only. */ Line 1742  only. */
1742  #define STRING_xdigit               STR_x STR_d STR_i STR_g STR_i STR_t  #define STRING_xdigit               STR_x STR_d STR_i STR_g STR_i STR_t
1743    
1744  #define STRING_DEFINE               STR_D STR_E STR_F STR_I STR_N STR_E  #define STRING_DEFINE               STR_D STR_E STR_F STR_I STR_N STR_E
1745    #define STRING_WEIRD_STARTWORD      STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
1746    #define STRING_WEIRD_ENDWORD        STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
1747    
1748  #define STRING_CR_RIGHTPAR             STR_C STR_R STR_RIGHT_PARENTHESIS  #define STRING_CR_RIGHTPAR              STR_C STR_R STR_RIGHT_PARENTHESIS
1749  #define STRING_LF_RIGHTPAR             STR_L STR_F STR_RIGHT_PARENTHESIS  #define STRING_LF_RIGHTPAR              STR_L STR_F STR_RIGHT_PARENTHESIS
1750  #define STRING_CRLF_RIGHTPAR           STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS  #define STRING_CRLF_RIGHTPAR            STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
1751  #define STRING_ANY_RIGHTPAR            STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS  #define STRING_ANY_RIGHTPAR             STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
1752  #define STRING_ANYCRLF_RIGHTPAR        STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS  #define STRING_ANYCRLF_RIGHTPAR         STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
1753  #define STRING_BSR_ANYCRLF_RIGHTPAR    STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS  #define STRING_BSR_ANYCRLF_RIGHTPAR     STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
1754  #define STRING_BSR_UNICODE_RIGHTPAR    STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS  #define STRING_BSR_UNICODE_RIGHTPAR     STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
1755  #define STRING_UTF8_RIGHTPAR           STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS  #define STRING_UTF8_RIGHTPAR            STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
1756  #define STRING_UTF16_RIGHTPAR          STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS  #define STRING_UTF16_RIGHTPAR           STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
1757  #define STRING_UTF32_RIGHTPAR          STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS  #define STRING_UTF32_RIGHTPAR           STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
1758  #define STRING_UTF_RIGHTPAR            STR_U STR_T STR_F STR_RIGHT_PARENTHESIS  #define STRING_UTF_RIGHTPAR             STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
1759  #define STRING_UCP_RIGHTPAR            STR_U STR_C STR_P STR_RIGHT_PARENTHESIS  #define STRING_UCP_RIGHTPAR             STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
1760  #define STRING_NO_START_OPT_RIGHTPAR   STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS  #define STRING_NO_AUTO_POSSESS_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_A STR_U STR_T STR_O STR_UNDERSCORE STR_P STR_O STR_S STR_S STR_E STR_S STR_S STR_RIGHT_PARENTHESIS
1761  #define STRING_LIMIT_MATCH_EQ          STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN  #define STRING_NO_START_OPT_RIGHTPAR    STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
1762  #define STRING_LIMIT_RECURSION_EQ      STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN  #define STRING_LIMIT_MATCH_EQ           STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
1763    #define STRING_LIMIT_RECURSION_EQ       STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
1764    
1765  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
1766    
1767  /* Escape items that are just an encoding of a particular data value. */  /* Escape items that are just an encoding of a particular data value. */
1768    
1769    #ifndef ESC_a
1770    #define ESC_a CHAR_BEL
1771    #endif
1772    
1773  #ifndef ESC_e  #ifndef ESC_e
1774  #define ESC_e CHAR_ESC  #define ESC_e CHAR_ESC
1775  #endif  #endif
# Line 1855  only. */ Line 1808  only. */
1808  #define PT_UCNC      10    /* Universal Character nameable character */  #define PT_UCNC      10    /* Universal Character nameable character */
1809  #define PT_TABSIZE   11    /* Size of square table for autopossessify tests */  #define PT_TABSIZE   11    /* Size of square table for autopossessify tests */
1810    
1811    /* The following special properties are used only in XCLASS items, when POSIX
1812    classes are specified and PCRE_UCP is set - in other words, for Unicode
1813    handling of these classes. They are not available via the \p or \P escapes like
1814    those in the above list, and so they do not take part in the autopossessifying
1815    table. */
1816    
1817    #define PT_PXGRAPH   11    /* [:graph:] - characters that mark the paper */
1818    #define PT_PXPRINT   12    /* [:print:] - [:graph:] plus non-control spaces */
1819    #define PT_PXPUNCT   13    /* [:punct:] - punctuation characters */
1820    
1821  /* Flag bits and data types for the extended class (OP_XCLASS) for classes that  /* Flag bits and data types for the extended class (OP_XCLASS) for classes that
1822  contain characters with values greater than 255. */  contain characters with values greater than 255. */
1823    
1824  #define XCL_NOT    0x01    /* Flag: this is a negative class */  #define XCL_NOT       0x01    /* Flag: this is a negative class */
1825  #define XCL_MAP    0x02    /* Flag: a 32-byte map is present */  #define XCL_MAP       0x02    /* Flag: a 32-byte map is present */
1826    #define XCL_HASPROP   0x04    /* Flag: property checks are present. */
1827    
1828  #define XCL_END       0    /* Marks end of individual items */  #define XCL_END       0    /* Marks end of individual items */
1829  #define XCL_SINGLE    1    /* Single item (one multibyte char) follows */  #define XCL_SINGLE    1    /* Single item (one multibyte char) follows */
# Line 1868  contain characters with values greater t Line 1832  contain characters with values greater t
1832  #define XCL_NOTPROP   4    /* Unicode inverted property (ditto) */  #define XCL_NOTPROP   4    /* Unicode inverted property (ditto) */
1833    
1834  /* These are escaped items that aren't just an encoding of a particular data  /* These are escaped items that aren't just an encoding of a particular data
1835  value such as \n. They must have non-zero values, as check_escape() returns  value such as \n. They must have non-zero values, as check_escape() returns 0
1836  0 for a data character.  Also, they must appear in the same order as in the opcode  for a data character.  Also, they must appear in the same order as in the
1837  definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it  opcode definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
1838  corresponds to "." in DOTALL mode rather than an escape sequence. It is also  corresponds to "." in DOTALL mode rather than an escape sequence. It is also
1839  used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In  used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In
1840  non-DOTALL mode, "." behaves like \N.  non-DOTALL mode, "." behaves like \N.
# Line 1896  enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, E Line 1860  enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, E
1860    
1861  /********************** Opcode definitions ******************/  /********************** Opcode definitions ******************/
1862    
1863  /****** NOTE NOTE NOTE ******  /****** NOTE NOTE NOTE ******
1864    
1865  Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in  Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in
1866  order to the list of escapes immediately above. Furthermore, values up to  order to the list of escapes immediately above. Furthermore, values up to
# Line 1949  enum { Line 1913  enum {
1913    OP_EXTUNI,         /* 22 \X (extended Unicode sequence */    OP_EXTUNI,         /* 22 \X (extended Unicode sequence */
1914    OP_EODN,           /* 23 End of data or \n at end of data (\Z) */    OP_EODN,           /* 23 End of data or \n at end of data (\Z) */
1915    OP_EOD,            /* 24 End of data (\z) */    OP_EOD,            /* 24 End of data (\z) */
1916    
1917    /* Line end assertions */    /* Line end assertions */
1918    
1919    OP_DOLL,           /* 25 End of line - not multiline */    OP_DOLL,           /* 25 End of line - not multiline */
1920    OP_DOLLM,          /* 26 End of line - multiline */    OP_DOLLM,          /* 26 End of line - multiline */
1921    OP_CIRC,           /* 27 Start of line - not multiline */    OP_CIRC,           /* 27 Start of line - not multiline */
1922    OP_CIRCM,          /* 28 Start of line - multiline */    OP_CIRCM,          /* 28 Start of line - multiline */
1923    
1924    /* Single characters; caseful must precede the caseless ones */    /* Single characters; caseful must precede the caseless ones */
1925    
1926    OP_CHAR,           /* 29 Match one character, casefully */    OP_CHAR,           /* 29 Match one character, casefully */
1927    OP_CHARI,          /* 30 Match one character, caselessly */    OP_CHARI,          /* 30 Match one character, caselessly */
1928    OP_NOT,            /* 31 Match one character, not the given one, casefully */    OP_NOT,            /* 31 Match one character, not the given one, casefully */
# Line 2325  enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4, Line 2289  enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,
2289         ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,         ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
2290         ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,         ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
2291         ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79,         ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79,
2292         ERR80, ERR81, ERRCOUNT };         ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERRCOUNT };
2293    
2294  /* JIT compiling modes. The function list is indexed by them. */  /* JIT compiling modes. The function list is indexed by them. */
2295    
2296  enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,  enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
2297         JIT_NUMBER_OF_COMPILE_MODES };         JIT_NUMBER_OF_COMPILE_MODES };
2298    
# Line 2480  typedef struct compile_data { Line 2445  typedef struct compile_data {
2445    int  top_backref;                 /* Maximum back reference */    int  top_backref;                 /* Maximum back reference */
2446    unsigned int backref_map;         /* Bitmap of low back refs */    unsigned int backref_map;         /* Bitmap of low back refs */
2447    unsigned int namedrefcount;       /* Number of backreferences by name */    unsigned int namedrefcount;       /* Number of backreferences by name */
2448      int  parens_depth;                /* Depth of nested parentheses */
2449    int  assert_depth;                /* Depth of nested assertions */    int  assert_depth;                /* Depth of nested assertions */
2450    pcre_uint32 external_options;     /* External (initial) options */    pcre_uint32 external_options;     /* External (initial) options */
2451    pcre_uint32 external_flags;       /* External flag bits to be set */    pcre_uint32 external_flags;       /* External flag bits to be set */
# Line 2488  typedef struct compile_data { Line 2454  typedef struct compile_data {
2454    BOOL had_pruneorskip;             /* (*PRUNE) or (*SKIP) encountered */    BOOL had_pruneorskip;             /* (*PRUNE) or (*SKIP) encountered */
2455    BOOL check_lookbehind;            /* Lookbehinds need later checking */    BOOL check_lookbehind;            /* Lookbehinds need later checking */
2456    BOOL dupnames;                    /* Duplicate names exist */    BOOL dupnames;                    /* Duplicate names exist */
2457      BOOL dupgroups;                   /* Duplicate groups exist: (?| found */
2458      BOOL iscondassert;                /* Next assert is a condition */
2459    int  nltype;                      /* Newline type */    int  nltype;                      /* Newline type */
2460    int  nllen;                       /* Newline string length */    int  nllen;                       /* Newline string length */
2461    pcre_uchar nl[4];                 /* Newline string when fixed length */    pcre_uchar nl[4];                 /* Newline string when fixed length */
# Line 2501  typedef struct branch_chain { Line 2469  typedef struct branch_chain {
2469    pcre_uchar *current_branch;    pcre_uchar *current_branch;
2470  } branch_chain;  } branch_chain;
2471    
2472    /* Structure for mutual recursion detection. */
2473    
2474    typedef struct recurse_check {
2475      struct recurse_check *prev;
2476      const pcre_uchar *group;
2477    } recurse_check;
2478    
2479  /* Structure for items in a linked list that represents an explicit recursive  /* Structure for items in a linked list that represents an explicit recursive
2480  call within the pattern; used by pcre_exec(). */  call within the pattern; used by pcre_exec(). */
2481    

Legend:
Removed from v.1380  
changed lines
  Added in v.1631

  ViewVC Help
Powered by ViewVC 1.1.5