/[pcre]/code/trunk/pcre_internal.h
ViewVC logotype

Diff of /code/trunk/pcre_internal.h

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 507 by ph10, Wed Mar 10 16:08:01 2010 UTC revision 550 by ph10, Sun Oct 10 16:24:11 2010 UTC
# Line 475  know we are in UTF-8 mode. */ Line 475  know we are in UTF-8 mode. */
475        } \        } \
476      }      }
477    
478  /* Get the next character, testing for UTF-8 mode, and advancing the pointer */  /* Get the next character, testing for UTF-8 mode, and advancing the pointer.
479    This is called when we don't know if we are in UTF-8 mode. */
480    
481  #define GETCHARINCTEST(c, eptr) \  #define GETCHARINCTEST(c, eptr) \
482    c = *eptr++; \    c = *eptr++; \
# Line 512  if there are extra bytes. This is called Line 513  if there are extra bytes. This is called
513    
514  /* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the  /* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the
515  pointer, incrementing length if there are extra bytes. This is called when we  pointer, incrementing length if there are extra bytes. This is called when we
516  know we are in UTF-8 mode. */  do not know if we are in UTF-8 mode. */
517    
518  #define GETCHARLENTEST(c, eptr, len) \  #define GETCHARLENTEST(c, eptr, len) \
519    c = *eptr; \    c = *eptr; \
# Line 580  time, run time, or study time, respectiv Line 581  time, run time, or study time, respectiv
581     PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \     PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
582     PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \     PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
583     PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \     PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
584     PCRE_JAVASCRIPT_COMPAT)     PCRE_JAVASCRIPT_COMPAT|PCRE_UCP)
585    
586  #define PUBLIC_EXEC_OPTIONS \  #define PUBLIC_EXEC_OPTIONS \
587    (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \    (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \
# Line 875  so that PCRE works on both ASCII and EBC Line 876  so that PCRE works on both ASCII and EBC
876  #define STRING_COMMIT0              "COMMIT\0"  #define STRING_COMMIT0              "COMMIT\0"
877  #define STRING_F0                   "F\0"  #define STRING_F0                   "F\0"
878  #define STRING_FAIL0                "FAIL\0"  #define STRING_FAIL0                "FAIL\0"
879    #define STRING_MARK0                "MARK\0"
880  #define STRING_PRUNE0               "PRUNE\0"  #define STRING_PRUNE0               "PRUNE\0"
881  #define STRING_SKIP0                "SKIP\0"  #define STRING_SKIP0                "SKIP\0"
882  #define STRING_THEN                 "THEN"  #define STRING_THEN                 "THEN"
# Line 904  so that PCRE works on both ASCII and EBC Line 906  so that PCRE works on both ASCII and EBC
906  #define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"  #define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
907  #define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"  #define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
908  #define STRING_UTF8_RIGHTPAR        "UTF8)"  #define STRING_UTF8_RIGHTPAR        "UTF8)"
909    #define STRING_UCP_RIGHTPAR         "UCP)"
910    
911  #else  /* SUPPORT_UTF8 */  #else  /* SUPPORT_UTF8 */
912    
# Line 1127  only. */ Line 1130  only. */
1130  #define STRING_COMMIT0              STR_C STR_O STR_M STR_M STR_I STR_T "\0"  #define STRING_COMMIT0              STR_C STR_O STR_M STR_M STR_I STR_T "\0"
1131  #define STRING_F0                   STR_F "\0"  #define STRING_F0                   STR_F "\0"
1132  #define STRING_FAIL0                STR_F STR_A STR_I STR_L "\0"  #define STRING_FAIL0                STR_F STR_A STR_I STR_L "\0"
1133    #define STRING_MARK0                STR_M STR_A STR_R STR_K "\0"
1134  #define STRING_PRUNE0               STR_P STR_R STR_U STR_N STR_E "\0"  #define STRING_PRUNE0               STR_P STR_R STR_U STR_N STR_E "\0"
1135  #define STRING_SKIP0                STR_S STR_K STR_I STR_P "\0"  #define STRING_SKIP0                STR_S STR_K STR_I STR_P "\0"
1136  #define STRING_THEN                 STR_T STR_H STR_E STR_N  #define STRING_THEN                 STR_T STR_H STR_E STR_N
# Line 1156  only. */ Line 1160  only. */
1160  #define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS  #define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
1161  #define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS  #define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
1162  #define STRING_UTF8_RIGHTPAR        STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS  #define STRING_UTF8_RIGHTPAR        STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
1163    #define STRING_UCP_RIGHTPAR         STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
1164    
1165  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
1166    
# Line 1188  only. */ Line 1193  only. */
1193    
1194  #define PT_ANY        0    /* Any property - matches all chars */  #define PT_ANY        0    /* Any property - matches all chars */
1195  #define PT_LAMP       1    /* L& - the union of Lu, Ll, Lt */  #define PT_LAMP       1    /* L& - the union of Lu, Ll, Lt */
1196  #define PT_GC         2    /* General characteristic (e.g. L) */  #define PT_GC         2    /* Specified general characteristic (e.g. L) */
1197  #define PT_PC         3    /* Particular characteristic (e.g. Lu) */  #define PT_PC         3    /* Specified particular characteristic (e.g. Lu) */
1198  #define PT_SC         4    /* Script (e.g. Han) */  #define PT_SC         4    /* Script (e.g. Han) */
1199    #define PT_ALNUM      5    /* Alphanumeric - the union of L and N */
1200    #define PT_SPACE      6    /* Perl space - Z plus 9,10,12,13 */
1201    #define PT_PXSPACE    7    /* POSIX space - Z plus 9,10,11,12,13 */
1202    #define PT_WORD       8    /* Word - L plus N plus underscore */
1203    
1204  /* Flag bits and data types for the extended class (OP_XCLASS) for classes that  /* Flag bits and data types for the extended class (OP_XCLASS) for classes that
1205  contain UTF-8 characters with values greater than 255. */  contain UTF-8 characters with values greater than 255. */
# Line 1207  contain UTF-8 characters with values gre Line 1216  contain UTF-8 characters with values gre
1216  /* These are escaped items that aren't just an encoding of a particular data  /* These are escaped items that aren't just an encoding of a particular data
1217  value such as \n. They must have non-zero values, as check_escape() returns  value such as \n. They must have non-zero values, as check_escape() returns
1218  their negation. Also, they must appear in the same order as in the opcode  their negation. Also, they must appear in the same order as in the opcode
1219  definitions below, up to ESC_z. There's a dummy for OP_ANY because it  definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
1220  corresponds to "." rather than an escape sequence, and another for OP_ALLANY  corresponds to "." in DOTALL mode rather than an escape sequence. It is also
1221  (which is used for [^] in JavaScript compatibility mode).  used for [^] in JavaScript compatibility mode. In non-DOTALL mode, "." behaves
1222    like \N.
1223    
1224    The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.
1225    when PCRE_UCP is set, when replacement of \d etc by \p sequences is required.
1226    They must be contiguous, and remain in order so that the replacements can be
1227    looked up from a table.
1228    
1229  The final escape must be ESC_REF as subsequent values are used for  The final escape must be ESC_REF as subsequent values are used for
1230  backreferences (\1, \2, \3, etc). There are two tests in the code for an escape  backreferences (\1, \2, \3, etc). There are two tests in the code for an escape
# Line 1219  put in between that don't consume a char Line 1234  put in between that don't consume a char
1234  */  */
1235    
1236  enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,  enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
1237         ESC_W, ESC_w, ESC_dum1, ESC_dum2, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,         ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,
1238         ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_g, ESC_k,         ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z,
1239           ESC_E, ESC_Q, ESC_g, ESC_k,
1240           ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu,
1241         ESC_REF };         ESC_REF };
1242    
   
1243  /* Opcode table: Starting from 1 (i.e. after OP_END), the values up to  /* Opcode table: Starting from 1 (i.e. after OP_END), the values up to
1244  OP_EOD must correspond in order to the list of escapes immediately above.  OP_EOD must correspond in order to the list of escapes immediately above.
1245    
# Line 1247  enum { Line 1263  enum {
1263    OP_WHITESPACE,         /*  9 \s */    OP_WHITESPACE,         /*  9 \s */
1264    OP_NOT_WORDCHAR,       /* 10 \W */    OP_NOT_WORDCHAR,       /* 10 \W */
1265    OP_WORDCHAR,           /* 11 \w */    OP_WORDCHAR,           /* 11 \w */
1266    OP_ANY,            /* 12 Match any character (subject to DOTALL) */    OP_ANY,            /* 12 Match any character except newline */
1267    OP_ALLANY,         /* 13 Match any character (not subject to DOTALL) */    OP_ALLANY,         /* 13 Match any character */
1268    OP_ANYBYTE,        /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */    OP_ANYBYTE,        /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */
1269    OP_NOTPROP,        /* 15 \P (not Unicode property) */    OP_NOTPROP,        /* 15 \P (not Unicode property) */
1270    OP_PROP,           /* 16 \p (Unicode property) */    OP_PROP,           /* 16 \p (Unicode property) */
# Line 1378  enum { Line 1394  enum {
1394    
1395    /* These are backtracking control verbs */    /* These are backtracking control verbs */
1396    
1397    OP_PRUNE,          /* 107 */    OP_MARK,           /* 107 always has an argument */
1398    OP_SKIP,           /* 108 */    OP_PRUNE,          /* 108 */
1399    OP_THEN,           /* 109 */    OP_PRUNE_ARG,      /* 109 same, but with argument */
1400    OP_COMMIT,         /* 110 */    OP_SKIP,           /* 110 */
1401      OP_SKIP_ARG,       /* 111 same, but with argument */
1402      OP_THEN,           /* 112 */
1403      OP_THEN_ARG,       /* 113 same, but with argument */
1404      OP_COMMIT,         /* 114 */
1405    
1406    /* These are forced failure and success verbs */    /* These are forced failure and success verbs */
1407    
1408    OP_FAIL,           /* 111 */    OP_FAIL,           /* 115 */
1409    OP_ACCEPT,         /* 112 */    OP_ACCEPT,         /* 116 */
1410    OP_CLOSE,          /* 113 Used before OP_ACCEPT to close open captures */    OP_CLOSE,          /* 117 Used before OP_ACCEPT to close open captures */
1411    
1412    /* This is used to skip a subpattern with a {0} quantifier */    /* This is used to skip a subpattern with a {0} quantifier */
1413    
1414    OP_SKIPZERO,       /* 114 */    OP_SKIPZERO,       /* 118 */
1415    
1416    /* This is not an opcode, but is used to check that tables indexed by opcode    /* This is not an opcode, but is used to check that tables indexed by opcode
1417    are the correct length, in order to catch updating errors - there have been    are the correct length, in order to catch updating errors - there have been
# Line 1402  enum { Line 1422  enum {
1422    
1423  /* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro  /* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
1424  definitions that follow must also be updated to match. There are also tables  definitions that follow must also be updated to match. There are also tables
1425  called "coptable" cna "poptable" in pcre_dfa_exec.c that must be updated. */  called "coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
1426    
1427    
1428  /* This macro defines textual names for all the opcodes. These are used only  /* This macro defines textual names for all the opcodes. These are used only
# Line 1427  for debugging. The macro is referenced o Line 1447  for debugging. The macro is referenced o
1447    "Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond",        \    "Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond",        \
1448    "Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def",   \    "Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def",   \
1449    "Brazero", "Braminzero",                                        \    "Brazero", "Braminzero",                                        \
1450    "*PRUNE", "*SKIP", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT",      \    "*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP",                  \
1451      "*THEN", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT",                \
1452    "Close", "Skip zero"    "Close", "Skip zero"
1453    
1454    
# Line 1493  in UTF-8 mode. The code that uses this t Line 1514  in UTF-8 mode. The code that uses this t
1514    3, 3,                          /* RREF, NRREF                            */ \    3, 3,                          /* RREF, NRREF                            */ \
1515    1,                             /* DEF                                    */ \    1,                             /* DEF                                    */ \
1516    1, 1,                          /* BRAZERO, BRAMINZERO                    */ \    1, 1,                          /* BRAZERO, BRAMINZERO                    */ \
1517    1, 1, 1, 1,                    /* PRUNE, SKIP, THEN, COMMIT,             */ \    3, 1, 3,                       /* MARK, PRUNE, PRUNE_ARG                 */ \
1518    1, 1, 3, 1                     /* FAIL, ACCEPT, CLOSE, SKIPZERO          */    1, 3,                          /* SKIP, SKIP_ARG                         */ \
1519      1+LINK_SIZE, 3+LINK_SIZE,      /* THEN, THEN_ARG                         */ \
1520      1, 1, 1, 3, 1                  /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO  */
1521    
1522    
1523  /* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion"  /* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion"
# Line 1512  enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4, Line 1535  enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,
1535         ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,         ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
1536         ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,         ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
1537         ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,         ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
1538         ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERRCOUNT };         ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERRCOUNT };
1539    
1540  /* The real format of the start of the pcre block; the index of names and the  /* The real format of the start of the pcre block; the index of names and the
1541  code vector run on as long as necessary after the end. We store an explicit  code vector run on as long as necessary after the end. We store an explicit
# Line 1655  typedef struct match_data { Line 1678  typedef struct match_data {
1678    BOOL   noteol;                /* NOTEOL flag */    BOOL   noteol;                /* NOTEOL flag */
1679    BOOL   utf8;                  /* UTF8 flag */    BOOL   utf8;                  /* UTF8 flag */
1680    BOOL   jscript_compat;        /* JAVASCRIPT_COMPAT flag */    BOOL   jscript_compat;        /* JAVASCRIPT_COMPAT flag */
1681      BOOL   use_ucp;               /* PCRE_UCP flag */
1682    BOOL   endonly;               /* Dollar not before final \n */    BOOL   endonly;               /* Dollar not before final \n */
1683    BOOL   notempty;              /* Empty string match not wanted */    BOOL   notempty;              /* Empty string match not wanted */
1684    BOOL   notempty_atstart;      /* Empty string match at start not wanted */    BOOL   notempty_atstart;      /* Empty string match at start not wanted */
# Line 1674  typedef struct match_data { Line 1698  typedef struct match_data {
1698    int    eptrn;                 /* Next free eptrblock */    int    eptrn;                 /* Next free eptrblock */
1699    recursion_info *recursive;    /* Linked list of recursion data */    recursion_info *recursive;    /* Linked list of recursion data */
1700    void  *callout_data;          /* To pass back to callouts */    void  *callout_data;          /* To pass back to callouts */
1701      const uschar *mark;           /* Mark pointer to pass back */
1702  } match_data;  } match_data;
1703    
1704  /* A similar structure is used for the same purpose by the DFA matching  /* A similar structure is used for the same purpose by the DFA matching

Legend:
Removed from v.507  
changed lines
  Added in v.550

  ViewVC Help
Powered by ViewVC 1.1.5